blob: 3809c7e6be8c27f78e911c141ed67a3729e89fd9 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_req.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <linux/slab.h>
29#include <linux/drbd.h>
30#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include "drbd_req.h"
32
33
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020034static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
Philipp Reisner57bcb6c2011-12-03 11:18:56 +010035
Philipp Reisnerb411b362009-09-25 16:07:19 -070036/* Update disk stats at start of I/O request */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020037static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req)
Philipp Reisnerb411b362009-09-25 16:07:19 -070038{
Jens Axboed62e26b2017-06-30 21:55:08 -060039 struct request_queue *q = device->rq_queue;
40
Michael Callahanddcf35d2018-07-18 04:47:39 -070041 generic_start_io_acct(q, bio_op(req->master_bio),
Jens Axboed62e26b2017-06-30 21:55:08 -060042 req->i.size >> 9, &device->vdisk->part0);
Philipp Reisnerb411b362009-09-25 16:07:19 -070043}
44
45/* Update disk stats when completing request upwards */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020046static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
Philipp Reisnerb411b362009-09-25 16:07:19 -070047{
Jens Axboed62e26b2017-06-30 21:55:08 -060048 struct request_queue *q = device->rq_queue;
49
Michael Callahanddcf35d2018-07-18 04:47:39 -070050 generic_end_io_acct(q, bio_op(req->master_bio),
Gu Zheng24480852014-11-24 11:05:25 +080051 &device->vdisk->part0, req->start_jif);
Philipp Reisnerb411b362009-09-25 16:07:19 -070052}
53
Lars Ellenberg9104d312016-06-14 00:26:31 +020054static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
Philipp Reisnerb411b362009-09-25 16:07:19 -070055{
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010056 struct drbd_request *req;
57
Kent Overstreet0892fac2018-05-20 18:25:48 -040058 req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010059 if (!req)
60 return NULL;
David Rientjes23fe8f82015-03-24 16:22:32 -070061 memset(req, 0, sizeof(*req));
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010062
63 drbd_req_make_private_bio(req, bio_src);
Lars Ellenberg9104d312016-06-14 00:26:31 +020064 req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
65 | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
Lars Ellenbergf31e5832018-12-20 17:23:42 +010066 | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
Lars Ellenberg9104d312016-06-14 00:26:31 +020067 | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
68 req->device = device;
69 req->master_bio = bio_src;
70 req->epoch = 0;
Andreas Gruenbacher53840642011-01-28 10:31:04 +010071
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010072 drbd_clear_interval(&req->i);
Kent Overstreet4f024f32013-10-11 15:44:27 -070073 req->i.sector = bio_src->bi_iter.bi_sector;
74 req->i.size = bio_src->bi_iter.bi_size;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +010075 req->i.local = true;
Andreas Gruenbacher53840642011-01-28 10:31:04 +010076 req->i.waiting = false;
77
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010078 INIT_LIST_HEAD(&req->tl_requests);
79 INIT_LIST_HEAD(&req->w.list);
Lars Ellenberg844a6ae72013-11-22 12:52:03 +010080 INIT_LIST_HEAD(&req->req_pending_master_completion);
81 INIT_LIST_HEAD(&req->req_pending_local);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010082
Lars Ellenberga0d856d2012-01-24 17:19:42 +010083 /* one reference to be put by __drbd_make_request */
Lars Ellenbergb4067772012-01-24 16:58:11 +010084 atomic_set(&req->completion_ref, 1);
Lars Ellenberga0d856d2012-01-24 17:19:42 +010085 /* one kref as long as completion_ref > 0 */
Lars Ellenbergb4067772012-01-24 16:58:11 +010086 kref_init(&req->kref);
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +010087 return req;
88}
89
Lars Ellenberg08d0dab2014-03-20 11:19:22 +010090static void drbd_remove_request_interval(struct rb_root *root,
91 struct drbd_request *req)
92{
93 struct drbd_device *device = req->device;
94 struct drbd_interval *i = &req->i;
95
96 drbd_remove_interval(root, i);
97
98 /* Wake up any processes waiting for this request to complete. */
99 if (i->waiting)
100 wake_up(&device->misc_wait);
101}
102
Lars Ellenberg9a278a72012-07-24 10:12:36 +0200103void drbd_req_destroy(struct kref *kref)
Andreas Gruenbacher9e204cd2011-01-26 18:45:11 +0100104{
Lars Ellenbergb4067772012-01-24 16:58:11 +0100105 struct drbd_request *req = container_of(kref, struct drbd_request, kref);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200106 struct drbd_device *device = req->device;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100107 const unsigned s = req->rq_state;
108
109 if ((req->master_bio && !(s & RQ_POSTPONED)) ||
110 atomic_read(&req->completion_ref) ||
111 (s & RQ_LOCAL_PENDING) ||
112 ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200113 drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100114 s, atomic_read(&req->completion_ref));
115 return;
116 }
Philipp Reisner288f4222010-05-27 15:07:43 +0200117
Lars Ellenberg844a6ae72013-11-22 12:52:03 +0100118 /* If called from mod_rq_state (expected normal case) or
119 * drbd_send_and_submit (the less likely normal path), this holds the
120 * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
121 * though it may be still empty (never added to the transfer log).
122 *
123 * If called from do_retry(), we do NOT hold the req_lock, but we are
124 * still allowed to unconditionally list_del(&req->tl_requests),
125 * because it will be on a local on-stack list only. */
Lars Ellenberg2312f0b32011-11-24 10:36:25 +0100126 list_del_init(&req->tl_requests);
Philipp Reisner288f4222010-05-27 15:07:43 +0200127
Lars Ellenberg08d0dab2014-03-20 11:19:22 +0100128 /* finally remove the request from the conflict detection
129 * respective block_id verification interval tree. */
130 if (!drbd_interval_empty(&req->i)) {
131 struct rb_root *root;
132
133 if (s & RQ_WRITE)
134 root = &device->write_requests;
135 else
136 root = &device->read_requests;
137 drbd_remove_request_interval(root, req);
138 } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
139 drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
140 s, (unsigned long long)req->i.sector, req->i.size);
141
Philipp Reisnerb411b362009-09-25 16:07:19 -0700142 /* if it was a write, we may have to set the corresponding
143 * bit(s) out-of-sync first. If it had a local part, we need to
144 * release the reference to the activity log. */
Lars Ellenbergb4067772012-01-24 16:58:11 +0100145 if (s & RQ_WRITE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700146 /* Set out-of-sync unless both OK flags are set
147 * (local only or remote failed).
148 * Other places where we set out-of-sync:
149 * READ with local io-error */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Lars Ellenberg70f17b62012-09-03 14:08:35 +0200151 /* There is a special case:
152 * we may notice late that IO was suspended,
153 * and postpone, or schedule for retry, a write,
154 * before it even was submitted or sent.
155 * In that case we do not want to touch the bitmap at all.
156 */
157 if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
Philipp Reisnerd7644012012-08-28 14:39:44 +0200158 if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200159 drbd_set_out_of_sync(device, req->i.sector, req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
Philipp Reisnerd7644012012-08-28 14:39:44 +0200161 if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200162 drbd_set_in_sync(device, req->i.sector, req->i.size);
Philipp Reisnerd7644012012-08-28 14:39:44 +0200163 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164
165 /* one might be tempted to move the drbd_al_complete_io
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100166 * to the local io completion callback drbd_request_endio.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 * but, if this was a mirror write, we may only
168 * drbd_al_complete_io after this is RQ_NET_DONE,
169 * otherwise the extent could be dropped from the al
170 * before it has actually been written on the peer.
171 * if we crash before our peer knows about the request,
172 * but after the extent has been dropped from the al,
173 * we would forget to resync the corresponding extent.
174 */
Philipp Reisner76590cd2012-08-29 15:23:14 +0200175 if (s & RQ_IN_ACT_LOG) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200176 if (get_ldev_if_state(device, D_FAILED)) {
177 drbd_al_complete_io(device, &req->i);
178 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700179 } else if (__ratelimit(&drbd_ratelimit_state)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200180 drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), "
Lars Ellenberg181286a2011-03-31 15:18:56 +0200181 "but my Disk seems to have failed :(\n",
182 (unsigned long long) req->i.sector, req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700183 }
184 }
185 }
186
Kent Overstreet0892fac2018-05-20 18:25:48 -0400187 mempool_free(req, &drbd_request_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700188}
189
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200190static void wake_all_senders(struct drbd_connection *connection)
191{
192 wake_up(&connection->sender_work.q_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700193}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700194
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +0100195/* must hold resource->req_lock */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200196void start_new_tl_epoch(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197{
Lars Ellenberg99b4d8f2012-08-07 06:42:09 +0200198 /* no point closing an epoch, if it is empty, anyways. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200199 if (connection->current_tle_writes == 0)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 return;
201
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200202 connection->current_tle_writes = 0;
203 atomic_inc(&connection->current_tle_nr);
204 wake_all_senders(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205}
206
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200207void complete_master_bio(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700208 struct bio_and_error *m)
209{
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200210 m->bio->bi_status = errno_to_blk_status(m->error);
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200211 bio_endio(m->bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200212 dec_ap_bio(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213}
214
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100215
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216/* Helper for __req_mod().
217 * Set m->bio to the master bio, if it is fit to be completed,
218 * or leave it alone (it is initialized to NULL in __req_mod),
219 * if it has already been completed, or cannot be completed yet.
220 * If m->bio is set, the error status to be returned is placed in m->error.
221 */
Lars Ellenberg6870ca62012-03-26 17:02:45 +0200222static
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100223void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224{
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100225 const unsigned s = req->rq_state;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200226 struct drbd_device *device = req->device;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100227 int error, ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229 /* we must not complete the master bio, while it is
230 * still being processed by _drbd_send_zc_bio (drbd_send_dblock)
231 * not yet acknowledged by the peer
232 * not yet completed by the local io subsystem
233 * these flags may get cleared in any order by
234 * the worker,
235 * the receiver,
236 * the bio_endio completion callbacks.
237 */
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100238 if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) ||
239 (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) ||
240 (s & RQ_COMPLETION_SUSP)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200241 drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243 }
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100244
245 if (!req->master_bio) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200246 drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700247 return;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100248 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100250 /*
251 * figure out whether to report success or failure.
252 *
253 * report success when at least one of the operations succeeded.
254 * or, to put the other way,
255 * only report failure, when both operations failed.
256 *
257 * what to do about the failures is handled elsewhere.
258 * what we need to do here is just: complete the master_bio.
259 *
260 * local completion error, if any, has been stored as ERR_PTR
261 * in private_bio within drbd_request_endio.
262 */
263 ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
264 error = PTR_ERR(req->private_bio);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700265
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100266 /* Before we can signal completion to the upper layers,
267 * we may need to close the current transfer log epoch.
268 * We are within the request lock, so we can simply compare
269 * the request epoch number with the current transfer log
270 * epoch number. If they match, increase the current_tle_nr,
271 * and reset the transfer log epoch write_cnt.
272 */
Christoph Hellwig70246282016-07-19 11:28:41 +0200273 if (op_is_write(bio_op(req->master_bio)) &&
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200274 req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr))
275 start_new_tl_epoch(first_peer_device(device)->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100277 /* Update disk stats */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200278 _drbd_end_io_acct(device, req);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100279
280 /* If READ failed,
281 * have it be pushed back to the retry work queue,
282 * so it will re-enter __drbd_make_request(),
283 * and be re-assigned to a suitable local or remote path,
284 * or failed if we do not have access to good data anymore.
285 *
286 * Unless it was failed early by __drbd_make_request(),
287 * because no path was available, in which case
288 * it was not even added to the transfer_log.
289 *
Christoph Hellwig70246282016-07-19 11:28:41 +0200290 * read-ahead may fail, and will not be retried.
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100291 *
292 * WRITE should have used all available paths already.
293 */
Christoph Hellwig70246282016-07-19 11:28:41 +0200294 if (!ok &&
295 bio_op(req->master_bio) == REQ_OP_READ &&
Jens Axboe1eff9d32016-08-05 15:35:16 -0600296 !(req->master_bio->bi_opf & REQ_RAHEAD) &&
Christoph Hellwig70246282016-07-19 11:28:41 +0200297 !list_empty(&req->tl_requests))
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100298 req->rq_state |= RQ_POSTPONED;
299
300 if (!(req->rq_state & RQ_POSTPONED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 m->error = ok ? 0 : (error ?: -EIO);
302 m->bio = req->master_bio;
303 req->master_bio = NULL;
Lars Ellenberg08d0dab2014-03-20 11:19:22 +0100304 /* We leave it in the tree, to be able to verify later
305 * write-acks in protocol != C during resync.
306 * But we mark it as "complete", so it won't be counted as
307 * conflict in a multi-primary setup. */
308 req->i.completed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700309 }
Lars Ellenberg08d0dab2014-03-20 11:19:22 +0100310
311 if (req->i.waiting)
312 wake_up(&device->misc_wait);
Lars Ellenberg844a6ae72013-11-22 12:52:03 +0100313
314 /* Either we are about to complete to upper layers,
315 * or we will restart this request.
316 * In either case, the request object will be destroyed soon,
317 * so better remove it from all lists. */
318 list_del_init(&req->req_pending_master_completion);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700319}
320
Lars Ellenberg844a6ae72013-11-22 12:52:03 +0100321/* still holds resource->req_lock */
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200322static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
Philipp Reisnercfa03412010-06-23 17:18:51 +0200323{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200324 struct drbd_device *device = req->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200325 D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED));
Philipp Reisnercfa03412010-06-23 17:18:51 +0200326
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200327 if (!put)
328 return;
329
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100330 if (!atomic_sub_and_test(put, &req->completion_ref))
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200331 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100333 drbd_req_complete(req, m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200335 /* local completion may still come in later,
336 * we need to keep the req object around. */
337 if (req->rq_state & RQ_LOCAL_ABORTED)
338 return;
339
Lars Ellenberg9a278a72012-07-24 10:12:36 +0200340 if (req->rq_state & RQ_POSTPONED) {
341 /* don't destroy the req object just yet,
342 * but queue it for retry */
343 drbd_restart_request(req);
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200344 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345 }
346
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200347 kref_put(&req->kref, drbd_req_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348}
349
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100350static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
351{
352 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
353 if (!connection)
354 return;
355 if (connection->req_next == NULL)
356 connection->req_next = req;
357}
358
359static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
360{
361 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
362 if (!connection)
363 return;
364 if (connection->req_next != req)
365 return;
366 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
367 const unsigned s = req->rq_state;
368 if (s & RQ_NET_QUEUED)
369 break;
370 }
371 if (&req->tl_requests == &connection->transfer_log)
372 req = NULL;
373 connection->req_next = req;
374}
375
376static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
377{
378 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
379 if (!connection)
380 return;
381 if (connection->req_ack_pending == NULL)
382 connection->req_ack_pending = req;
383}
384
385static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
386{
387 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
388 if (!connection)
389 return;
390 if (connection->req_ack_pending != req)
391 return;
392 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
393 const unsigned s = req->rq_state;
394 if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
395 break;
396 }
397 if (&req->tl_requests == &connection->transfer_log)
398 req = NULL;
399 connection->req_ack_pending = req;
400}
401
402static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
403{
404 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
405 if (!connection)
406 return;
407 if (connection->req_not_net_done == NULL)
408 connection->req_not_net_done = req;
409}
410
411static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
412{
413 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
414 if (!connection)
415 return;
416 if (connection->req_not_net_done != req)
417 return;
418 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
419 const unsigned s = req->rq_state;
420 if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
421 break;
422 }
423 if (&req->tl_requests == &connection->transfer_log)
424 req = NULL;
425 connection->req_not_net_done = req;
426}
427
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100428/* I'd like this to be the only place that manipulates
429 * req->completion_ref and req->kref. */
430static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
431 int clear, int set)
432{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200433 struct drbd_device *device = req->device;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100434 struct drbd_peer_device *peer_device = first_peer_device(device);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100435 unsigned s = req->rq_state;
436 int c_put = 0;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100437
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200438 if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP))
Philipp Reisner5af2e8c2012-08-14 11:28:52 +0200439 set |= RQ_COMPLETION_SUSP;
440
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100441 /* apply */
442
443 req->rq_state &= ~clear;
444 req->rq_state |= set;
445
446 /* no change? */
447 if (req->rq_state == s)
448 return;
449
450 /* intent: get references */
451
Peter Zijlstrabdfafc42016-11-14 17:34:19 +0100452 kref_get(&req->kref);
453
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100454 if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
455 atomic_inc(&req->completion_ref);
456
457 if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200458 inc_ap_pending(device);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100459 atomic_inc(&req->completion_ref);
460 }
461
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100462 if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100463 atomic_inc(&req->completion_ref);
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100464 set_if_null_req_next(peer_device, req);
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100465 }
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100466
467 if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
468 kref_get(&req->kref); /* wait for the DONE */
469
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100470 if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
Philipp Reisner668700b2015-03-16 16:08:29 +0100471 /* potentially already completed in the ack_receiver thread */
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100472 if (!(s & RQ_NET_DONE)) {
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100473 atomic_add(req->i.size >> 9, &device->ap_in_flight);
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100474 set_if_null_req_not_net_done(peer_device, req);
475 }
Lars Ellenbergf85d9f22015-05-18 14:08:46 +0200476 if (req->rq_state & RQ_NET_PENDING)
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100477 set_if_null_req_ack_pending(peer_device, req);
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100478 }
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100479
Philipp Reisner5af2e8c2012-08-14 11:28:52 +0200480 if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
481 atomic_inc(&req->completion_ref);
482
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100483 /* progress: put references */
484
485 if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
486 ++c_put;
487
488 if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200489 D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100490 ++c_put;
491 }
492
493 if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
494 if (req->rq_state & RQ_LOCAL_ABORTED)
Peter Zijlstrabdfafc42016-11-14 17:34:19 +0100495 kref_put(&req->kref, drbd_req_destroy);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100496 else
497 ++c_put;
Lars Ellenberg844a6ae72013-11-22 12:52:03 +0100498 list_del_init(&req->req_pending_local);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100499 }
500
501 if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200502 dec_ap_pending(device);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100503 ++c_put;
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100504 req->acked_jif = jiffies;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100505 advance_conn_req_ack_pending(peer_device, req);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100506 }
507
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100508 if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100509 ++c_put;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100510 advance_conn_req_next(peer_device, req);
511 }
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100512
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100513 if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
514 if (s & RQ_NET_SENT)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200515 atomic_sub(req->i.size >> 9, &device->ap_in_flight);
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100516 if (s & RQ_EXP_BARR_ACK)
Peter Zijlstrabdfafc42016-11-14 17:34:19 +0100517 kref_put(&req->kref, drbd_req_destroy);
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100518 req->net_done_jif = jiffies;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +0100519
520 /* in ahead/behind mode, or just in case,
521 * before we finally destroy this request,
522 * the caching pointers must not reference it anymore */
523 advance_conn_req_next(peer_device, req);
524 advance_conn_req_ack_pending(peer_device, req);
525 advance_conn_req_not_net_done(peer_device, req);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100526 }
527
528 /* potentially complete and destroy */
529
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100530 /* If we made progress, retry conflicting peer requests, if any. */
531 if (req->i.waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200532 wake_up(&device->misc_wait);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100533
Lars Ellenberga00ebd12017-05-11 10:21:46 +0200534 drbd_req_put_completion_ref(req, m, c_put);
535 kref_put(&req->kref, drbd_req_destroy);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536}
537
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200538static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
Lars Ellenbergccae7862012-09-26 14:07:04 +0200539{
540 char b[BDEVNAME_SIZE];
541
Lars Ellenberg42839f62012-09-27 15:19:38 +0200542 if (!__ratelimit(&drbd_ratelimit_state))
Lars Ellenbergccae7862012-09-26 14:07:04 +0200543 return;
544
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200545 drbd_warn(device, "local %s IO error sector %llu+%u on %s\n",
Lars Ellenbergccae7862012-09-26 14:07:04 +0200546 (req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
Lars Ellenberg42839f62012-09-27 15:19:38 +0200547 (unsigned long long)req->i.sector,
548 req->i.size >> 9,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200549 bdevname(device->ldev->backing_bdev, b));
Lars Ellenbergccae7862012-09-26 14:07:04 +0200550}
551
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100552/* Helper for HANDED_OVER_TO_NETWORK.
553 * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
554 * Is it also still "PENDING"?
555 * --> If so, clear PENDING and set NET_OK below.
556 * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
557 * (and we must not set RQ_NET_OK) */
558static inline bool is_pending_write_protocol_A(struct drbd_request *req)
559{
560 return (req->rq_state &
561 (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
562 == (RQ_WRITE|RQ_NET_PENDING);
563}
564
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565/* obviously this could be coded as many single functions
566 * instead of one huge switch,
567 * or by putting the code directly in the respective locations
568 * (as it has been before).
569 *
570 * but having it this way
571 * enforces that it is all in this one place, where it is easier to audit,
572 * it makes it obvious that whatever "event" "happens" to a request should
573 * happen "atomically" within the req_lock,
574 * and it enforces that we have to think in a very structured manner
575 * about the "events" that may happen to a request during its life time ...
576 */
Philipp Reisner2a806992010-06-09 14:07:43 +0200577int __req_mod(struct drbd_request *req, enum drbd_req_event what,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700578 struct bio_and_error *m)
579{
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100580 struct drbd_device *const device = req->device;
581 struct drbd_peer_device *const peer_device = first_peer_device(device);
582 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 struct net_conf *nc;
Philipp Reisner303d1442011-04-13 16:24:47 -0700584 int p, rv = 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100585
586 if (m)
587 m->bio = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588
Philipp Reisnerb411b362009-09-25 16:07:19 -0700589 switch (what) {
590 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200591 drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 break;
593
594 /* does not happen...
595 * initialization done in drbd_req_new
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100596 case CREATED:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597 break;
598 */
599
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100600 case TO_BE_SENT: /* via network */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100601 /* reached via __drbd_make_request
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602 * and from w_read_retry_remote */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200603 D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
Philipp Reisner44ed1672011-04-19 17:10:19 +0200604 rcu_read_lock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100605 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200606 p = nc->wire_protocol;
607 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -0700608 req->rq_state |=
609 p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
610 p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100611 mod_rq_state(req, m, 0, RQ_NET_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 break;
613
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100614 case TO_BE_SUBMITTED: /* locally */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100615 /* reached via __drbd_make_request */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200616 D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK));
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100617 mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618 break;
619
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100620 case COMPLETED_OK:
Philipp Reisner2b4dd362011-03-14 13:01:50 +0100621 if (req->rq_state & RQ_WRITE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200622 device->writ_cnt += req->i.size >> 9;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700623 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200624 device->read_cnt += req->i.size >> 9;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100626 mod_rq_state(req, m, RQ_LOCAL_PENDING,
627 RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700628 break;
629
Philipp Reisnercdfda632011-07-05 15:38:59 +0200630 case ABORT_DISK_IO:
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100631 mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
Philipp Reisner2b4dd362011-03-14 13:01:50 +0100632 break;
633
Lars Ellenbergedc9f5e2012-09-27 15:18:21 +0200634 case WRITE_COMPLETED_WITH_ERROR:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200635 drbd_report_io_error(device, req);
636 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Lars Ellenbergedc9f5e2012-09-27 15:18:21 +0200637 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700638 break;
639
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100640 case READ_COMPLETED_WITH_ERROR:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200641 drbd_set_out_of_sync(device, req->i.sector, req->i.size);
642 drbd_report_io_error(device, req);
643 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100644 /* fall through. */
645 case READ_AHEAD_COMPLETED_WITH_ERROR:
Christoph Hellwig70246282016-07-19 11:28:41 +0200646 /* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100647 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
Lars Ellenberg4439c402012-03-26 17:29:30 +0200648 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200650 case DISCARD_COMPLETED_NOTSUPP:
651 case DISCARD_COMPLETED_WITH_ERROR:
652 /* I'd rather not detach from local disk just because it
Bart Van Assche93054552018-10-03 13:56:25 -0700653 * failed a REQ_OP_DISCARD. */
Lars Ellenberg2f632ae2014-04-28 18:43:24 +0200654 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
655 break;
656
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100657 case QUEUE_FOR_NET_READ:
Christoph Hellwig70246282016-07-19 11:28:41 +0200658 /* READ, and
Philipp Reisnerb411b362009-09-25 16:07:19 -0700659 * no local disk,
660 * or target area marked as invalid,
661 * or just got an io-error. */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100662 /* from __drbd_make_request
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 * or from bio_endio during read io-error recovery */
664
Lars Ellenberg6870ca62012-03-26 17:02:45 +0200665 /* So we can verify the handle in the answer packet.
666 * Corresponding drbd_remove_request_interval is in
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100667 * drbd_req_complete() */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200668 D_ASSERT(device, drbd_interval_empty(&req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200669 drbd_insert_interval(&device->read_requests, &req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700670
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200671 set_bit(UNPLUG_REMOTE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200673 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
674 D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100675 mod_rq_state(req, m, 0, RQ_NET_QUEUED);
Lars Ellenberg4439c402012-03-26 17:29:30 +0200676 req->w.cb = w_send_read_req;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100677 drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200678 &req->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700679 break;
680
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100681 case QUEUE_FOR_NET_WRITE:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682 /* assert something? */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100683 /* from __drbd_make_request only */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700684
Lars Ellenberg6870ca62012-03-26 17:02:45 +0200685 /* Corresponding drbd_remove_request_interval is in
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100686 * drbd_req_complete() */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200687 D_ASSERT(device, drbd_interval_empty(&req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200688 drbd_insert_interval(&device->write_requests, &req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
690 /* NOTE
691 * In case the req ended up on the transfer log before being
692 * queued on the worker, it could lead to this request being
693 * missed during cleanup after connection loss.
694 * So we have to do both operations here,
695 * within the same lock that protects the transfer log.
696 *
697 * _req_add_to_epoch(req); this has to be after the
698 * _maybe_start_new_epoch(req); which happened in
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100699 * __drbd_make_request, because we now may set the bit
Philipp Reisnerb411b362009-09-25 16:07:19 -0700700 * again ourselves to close the current epoch.
701 *
702 * Add req to the (now) current epoch (barrier). */
703
Lars Ellenberg83c38832009-11-03 02:22:06 +0100704 /* otherwise we may lose an unplug, which may cause some remote
705 * io-scheduler timeout to expire, increasing maximum latency,
706 * hurting performance. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200707 set_bit(UNPLUG_REMOTE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
709 /* queue work item to send data */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200710 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100711 mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700712 req->w.cb = w_send_dblock;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100713 drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200714 &req->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
716 /* close the epoch, in case it outgrew the limit */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200717 rcu_read_lock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100718 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200719 p = nc->max_epoch_size;
720 rcu_read_unlock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100721 if (connection->current_tle_writes >= p)
722 start_new_tl_epoch(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723
724 break;
725
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100726 case QUEUE_FOR_SEND_OOS:
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100727 mod_rq_state(req, m, 0, RQ_NET_QUEUED);
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +0100728 req->w.cb = w_send_out_of_sync;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100729 drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200730 &req->w);
Philipp Reisner73a01a12010-10-27 14:33:00 +0200731 break;
732
Lars Ellenbergea9d6722012-03-26 16:46:39 +0200733 case READ_RETRY_REMOTE_CANCELED:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100734 case SEND_CANCELED:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100735 case SEND_FAILED:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736 /* real cleanup will be done from tl_clear. just update flags
737 * so it is no longer marked as on the worker queue */
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100738 mod_rq_state(req, m, RQ_NET_QUEUED, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739 break;
740
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100741 case HANDED_OVER_TO_NETWORK:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 /* assert something? */
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100743 if (is_pending_write_protocol_A(req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700744 /* this is what is dangerous about protocol A:
745 * pretend it was successfully written on the peer. */
Lars Ellenberge5f891b2013-11-22 12:32:01 +0100746 mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
747 RQ_NET_SENT|RQ_NET_OK);
748 else
749 mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
750 /* It is still not yet RQ_NET_DONE until the
751 * corresponding epoch barrier got acked as well,
752 * so we know what to dirty on connection loss. */
Lars Ellenberg6d49e102012-01-11 09:43:25 +0100753 break;
754
Lars Ellenberg27a434f2012-03-26 16:44:59 +0200755 case OOS_HANDED_TO_NETWORK:
Lars Ellenberg6d49e102012-01-11 09:43:25 +0100756 /* Was not set PENDING, no longer QUEUED, so is now DONE
757 * as far as this connection is concerned. */
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100758 mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 break;
760
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100761 case CONNECTION_LOST_WHILE_PENDING:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700762 /* transfer log cleanup after connection loss */
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100763 mod_rq_state(req, m,
764 RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP,
765 RQ_NET_DONE);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766 break;
767
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200768 case CONFLICT_RESOLVED:
769 /* for superseded conflicting writes of multiple primaries,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770 * there is no need to keep anything in the tl, potential
Lars Ellenberg934722a2012-07-24 09:31:18 +0200771 * node crashes are covered by the activity log.
772 *
773 * If this request had been marked as RQ_POSTPONED before,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200774 * it will actually not be completed, but "restarted",
Lars Ellenberg934722a2012-07-24 09:31:18 +0200775 * resubmitted from the retry worker context. */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200776 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
777 D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
Lars Ellenberg934722a2012-07-24 09:31:18 +0200778 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK);
779 break;
780
Lars Ellenberg0afd5692012-03-26 16:51:11 +0200781 case WRITE_ACKED_BY_PEER_AND_SIS:
Lars Ellenberg934722a2012-07-24 09:31:18 +0200782 req->rq_state |= RQ_NET_SIS;
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100783 case WRITE_ACKED_BY_PEER:
Lars Ellenberg08d0dab2014-03-20 11:19:22 +0100784 /* Normal operation protocol C: successfully written on peer.
785 * During resync, even in protocol != C,
786 * we requested an explicit write ack anyways.
787 * Which means we cannot even assert anything here.
Lars Ellenbergd64957c2012-03-23 14:42:19 +0100788 * Nothing more to do here.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700789 * We want to keep the tl in place for all protocols, to cater
Lars Ellenbergd64957c2012-03-23 14:42:19 +0100790 * for volatile write-back caches on lower level devices. */
Philipp Reisner303d1442011-04-13 16:24:47 -0700791 goto ack_common;
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100792 case RECV_ACKED_BY_PEER:
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200793 D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 /* protocol B; pretends to be successfully written on peer.
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100795 * see also notes above in HANDED_OVER_TO_NETWORK about
Philipp Reisnerb411b362009-09-25 16:07:19 -0700796 * protocol != C */
Philipp Reisner303d1442011-04-13 16:24:47 -0700797 ack_common:
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100798 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 break;
800
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100801 case POSTPONE_WRITE:
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200802 D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
Philipp Reisner303d1442011-04-13 16:24:47 -0700803 /* If this node has already detected the write conflict, the
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100804 * worker will be waiting on misc_wait. Wake it up once this
805 * request has completed locally.
806 */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200807 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100808 req->rq_state |= RQ_POSTPONED;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100809 if (req->i.waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200810 wake_up(&device->misc_wait);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100811 /* Do not clear RQ_NET_PENDING. This request will make further
812 * progress via restart_conflicting_writes() or
813 * fail_postponed_requests(). Hopefully. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814 break;
815
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100816 case NEG_ACKED:
Lars Ellenberg46e21bb2012-08-07 06:47:14 +0200817 mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818 break;
819
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100820 case FAIL_FROZEN_DISK_IO:
Philipp Reisner265be2d2010-05-31 10:14:17 +0200821 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
822 break;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100823 mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
Philipp Reisner265be2d2010-05-31 10:14:17 +0200824 break;
825
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100826 case RESTART_FROZEN_DISK_IO:
Philipp Reisner265be2d2010-05-31 10:14:17 +0200827 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
828 break;
829
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100830 mod_rq_state(req, m,
831 RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED,
832 RQ_LOCAL_PENDING);
Philipp Reisner265be2d2010-05-31 10:14:17 +0200833
834 rv = MR_READ;
835 if (bio_data_dir(req->master_bio) == WRITE)
836 rv = MR_WRITE;
837
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200838 get_ldev(device); /* always succeeds in this call path */
Philipp Reisner265be2d2010-05-31 10:14:17 +0200839 req->w.cb = w_restart_disk_io;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100840 drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200841 &req->w);
Philipp Reisner265be2d2010-05-31 10:14:17 +0200842 break;
843
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100844 case RESEND:
Philipp Reisner509fc012012-07-31 11:22:58 +0200845 /* Simply complete (local only) READs. */
846 if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
Philipp Reisner8a0bab22012-08-07 13:28:00 +0200847 mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
Philipp Reisner509fc012012-07-31 11:22:58 +0200848 break;
849 }
850
Philipp Reisner11b58e72010-05-12 17:08:26 +0200851 /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100852 before the connection loss (B&C only); only P_BARRIER_ACK
853 (or the local completion?) was missing when we suspended.
Lars Ellenberg6870ca62012-03-26 17:02:45 +0200854 Throwing them out of the TL here by pretending we got a BARRIER_ACK.
855 During connection handshake, we ensure that the peer was not rebooted. */
Philipp Reisner11b58e72010-05-12 17:08:26 +0200856 if (!(req->rq_state & RQ_NET_OK)) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200857 /* FIXME could this possibly be a req->dw.cb == w_send_out_of_sync?
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100858 * in that case we must not set RQ_NET_PENDING. */
859
860 mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200861 if (req->w.cb) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100862 /* w.cb expected to be w_send_dblock, or w_send_read_req */
863 drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200864 &req->w);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200865 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100866 } /* else: FIXME can this happen? */
Philipp Reisner11b58e72010-05-12 17:08:26 +0200867 break;
868 }
Gustavo A. R. Silvae16fb3a2019-01-23 00:33:09 -0600869 /* else, fall through - to BARRIER_ACKED */
Philipp Reisner11b58e72010-05-12 17:08:26 +0200870
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100871 case BARRIER_ACKED:
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100872 /* barrier ack for READ requests does not make sense */
Philipp Reisner288f4222010-05-27 15:07:43 +0200873 if (!(req->rq_state & RQ_WRITE))
874 break;
875
Philipp Reisnerb411b362009-09-25 16:07:19 -0700876 if (req->rq_state & RQ_NET_PENDING) {
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +0200877 /* barrier came in before all requests were acked.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878 * this is bad, because if the connection is lost now,
879 * we won't be able to clean them up... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200880 drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 }
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100882 /* Allowed to complete requests, even while suspended.
883 * As this is called for all requests within a matching epoch,
884 * we need to filter, and only set RQ_NET_DONE for those that
885 * have actually been on the wire. */
886 mod_rq_state(req, m, RQ_COMPLETION_SUSP,
887 (req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888 break;
889
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100890 case DATA_RECEIVED:
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200891 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
Lars Ellenberga0d856d2012-01-24 17:19:42 +0100892 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 break;
Lars Ellenberg7074e4a2013-03-27 14:08:41 +0100894
895 case QUEUE_AS_DRBD_BARRIER:
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100896 start_new_tl_epoch(connection);
Lars Ellenberg7074e4a2013-03-27 14:08:41 +0100897 mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
898 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899 };
Philipp Reisner2a806992010-06-09 14:07:43 +0200900
901 return rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902}
903
904/* we may do a local read if:
905 * - we are consistent (of course),
906 * - or we are generally inconsistent,
907 * BUT we are still/already IN SYNC for this area.
908 * since size may be bigger than BM_BLOCK_SIZE,
909 * we may need to check several bits.
910 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200911static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912{
913 unsigned long sbnr, ebnr;
914 sector_t esector, nr_sectors;
915
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200916 if (device->state.disk == D_UP_TO_DATE)
Andreas Gruenbacher0da34df2010-12-19 20:48:29 +0100917 return true;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200918 if (device->state.disk != D_INCONSISTENT)
Andreas Gruenbacher0da34df2010-12-19 20:48:29 +0100919 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 esector = sector + (size >> 9) - 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200921 nr_sectors = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200922 D_ASSERT(device, sector < nr_sectors);
923 D_ASSERT(device, esector < nr_sectors);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924
925 sbnr = BM_SECT_TO_BIT(sector);
926 ebnr = BM_SECT_TO_BIT(esector);
927
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200928 return drbd_bm_count_bits(device, sbnr, ebnr) == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700929}
930
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200931static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
Lars Ellenberg5da9c832012-03-29 17:04:14 +0200932 enum drbd_read_balancing rbm)
Philipp Reisner380207d2011-11-11 12:31:20 +0100933{
Philipp Reisner380207d2011-11-11 12:31:20 +0100934 struct backing_dev_info *bdi;
Philipp Reisnerd60de032011-11-17 10:12:31 +0100935 int stripe_shift;
Philipp Reisner380207d2011-11-11 12:31:20 +0100936
Philipp Reisner380207d2011-11-11 12:31:20 +0100937 switch (rbm) {
938 case RB_CONGESTED_REMOTE:
Jan Karadc3b17c2017-02-02 15:56:50 +0100939 bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
Philipp Reisner380207d2011-11-11 12:31:20 +0100940 return bdi_read_congested(bdi);
941 case RB_LEAST_PENDING:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200942 return atomic_read(&device->local_cnt) >
943 atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
Philipp Reisnerd60de032011-11-17 10:12:31 +0100944 case RB_32K_STRIPING: /* stripe_shift = 15 */
945 case RB_64K_STRIPING:
946 case RB_128K_STRIPING:
947 case RB_256K_STRIPING:
948 case RB_512K_STRIPING:
949 case RB_1M_STRIPING: /* stripe_shift = 20 */
950 stripe_shift = (rbm - RB_32K_STRIPING + 15);
951 return (sector >> (stripe_shift - 9)) & 1;
Philipp Reisner380207d2011-11-11 12:31:20 +0100952 case RB_ROUND_ROBIN:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200953 return test_and_change_bit(READ_BALANCE_RR, &device->flags);
Philipp Reisner380207d2011-11-11 12:31:20 +0100954 case RB_PREFER_REMOTE:
955 return true;
956 case RB_PREFER_LOCAL:
957 default:
958 return false;
959 }
960}
961
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +0100962/*
963 * complete_conflicting_writes - wait for any conflicting write requests
964 *
965 * The write_requests tree contains all active write requests which we
966 * currently know about. Wait for any requests to complete which conflict with
967 * the new one.
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200968 *
969 * Only way out: remove the conflicting intervals from the tree.
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +0100970 */
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200971static void complete_conflicting_writes(struct drbd_request *req)
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +0100972{
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200973 DEFINE_WAIT(wait);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200974 struct drbd_device *device = req->device;
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200975 struct drbd_interval *i;
976 sector_t sector = req->i.sector;
977 int size = req->i.size;
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +0100978
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200979 for (;;) {
Lars Ellenberg1b228c92016-06-14 00:26:17 +0200980 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
981 /* Ignore, if already completed to upper layers. */
982 if (i->completed)
983 continue;
984 /* Handle the first found overlap. After the schedule
985 * we have to restart the tree walk. */
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200986 break;
Lars Ellenberg1b228c92016-06-14 00:26:17 +0200987 }
988 if (!i) /* if any */
989 break;
990
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200991 /* Indicate to wake up device->misc_wait on progress. */
Lars Ellenberg1b228c92016-06-14 00:26:17 +0200992 prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200993 i->waiting = true;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200994 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg648e46b2012-03-26 20:12:24 +0200995 schedule();
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200996 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +0100997 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200998 finish_wait(&device->misc_wait, &wait);
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +0100999}
1000
Fabian Frederick7e5fec32016-06-14 00:26:35 +02001001/* called within req_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001002static void maybe_pull_ahead(struct drbd_device *device)
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001003{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001004 struct drbd_connection *connection = first_peer_device(device)->connection;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001005 struct net_conf *nc;
1006 bool congested = false;
1007 enum drbd_on_congestion on_congestion;
1008
Lars Ellenberg607f25e2013-03-27 14:08:45 +01001009 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001010 nc = rcu_dereference(connection->net_conf);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001011 on_congestion = nc ? nc->on_congestion : OC_BLOCK;
Lars Ellenberg607f25e2013-03-27 14:08:45 +01001012 rcu_read_unlock();
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001013 if (on_congestion == OC_BLOCK ||
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001014 connection->agreed_pro_version < 96)
Lars Ellenberg3b9ef852012-07-30 09:06:26 +02001015 return;
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001016
Lars Ellenberg0c066bc2014-03-20 14:04:35 +01001017 if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
1018 return; /* nothing to do ... */
1019
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001020 /* If I don't even have good local storage, we can not reasonably try
1021 * to pull ahead of the peer. We also need the local reference to make
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001022 * sure device->act_log is there.
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001023 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001024 if (!get_ldev_if_state(device, D_UP_TO_DATE))
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001025 return;
1026
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001027 if (nc->cong_fill &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001028 atomic_read(&device->ap_in_flight) >= nc->cong_fill) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001029 drbd_info(device, "Congestion-fill threshold reached\n");
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001030 congested = true;
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001031 }
1032
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001033 if (device->act_log->used >= nc->cong_extents) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001034 drbd_info(device, "Congestion-extents threshold reached\n");
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001035 congested = true;
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001036 }
1037
1038 if (congested) {
Lars Ellenberg99b4d8f2012-08-07 06:42:09 +02001039 /* start a new epoch for non-mirrored writes */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001040 start_new_tl_epoch(first_peer_device(device)->connection);
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001041
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001042 if (on_congestion == OC_PULL_AHEAD)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001043 _drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001044 else /*nc->on_congestion == OC_DISCONNECT */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001045 _drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL);
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001046 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001047 put_ldev(device);
Lars Ellenberg0d5934e2012-06-08 14:17:36 +02001048}
1049
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001050/* If this returns false, and req->private_bio is still set,
1051 * this should be submitted locally.
1052 *
1053 * If it returns false, but req->private_bio is not set,
1054 * we do not have access to good data :(
1055 *
1056 * Otherwise, this destroys req->private_bio, if any,
1057 * and returns true.
1058 */
1059static bool do_remote_read(struct drbd_request *req)
1060{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001061 struct drbd_device *device = req->device;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001062 enum drbd_read_balancing rbm;
1063
1064 if (req->private_bio) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001065 if (!drbd_may_do_local_read(device,
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001066 req->i.sector, req->i.size)) {
1067 bio_put(req->private_bio);
1068 req->private_bio = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001069 put_ldev(device);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001070 }
1071 }
1072
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001073 if (device->state.pdsk != D_UP_TO_DATE)
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001074 return false;
1075
Lars Ellenberga0d856d2012-01-24 17:19:42 +01001076 if (req->private_bio == NULL)
1077 return true;
1078
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001079 /* TODO: improve read balancing decisions, take into account drbd
1080 * protocol, pending requests etc. */
1081
1082 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001083 rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001084 rcu_read_unlock();
1085
1086 if (rbm == RB_PREFER_LOCAL && req->private_bio)
1087 return false; /* submit locally */
1088
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001089 if (remote_due_to_read_balancing(device, req->i.sector, rbm)) {
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001090 if (req->private_bio) {
1091 bio_put(req->private_bio);
1092 req->private_bio = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001093 put_ldev(device);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001094 }
1095 return true;
1096 }
1097
1098 return false;
1099}
1100
Andreas Gruenbacher2e9ffde2014-08-08 17:48:00 +02001101bool drbd_should_do_remote(union drbd_dev_state s)
1102{
1103 return s.pdsk == D_UP_TO_DATE ||
1104 (s.pdsk >= D_INCONSISTENT &&
1105 s.conn >= C_WF_BITMAP_T &&
1106 s.conn < C_AHEAD);
1107 /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
1108 That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
1109 states. */
1110}
1111
1112static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
1113{
1114 return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
1115 /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
1116 since we enter state C_AHEAD only if proto >= 96 */
1117}
1118
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001119/* returns number of connections (== 1, for drbd 8.4)
1120 * expected to actually write this data,
1121 * which does NOT include those that we are L_AHEAD for. */
1122static int drbd_process_write_request(struct drbd_request *req)
1123{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001124 struct drbd_device *device = req->device;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001125 int remote, send_oos;
1126
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001127 remote = drbd_should_do_remote(device->state);
1128 send_oos = drbd_should_send_out_of_sync(device->state);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001129
Lars Ellenberg519b6d3e2012-08-03 02:19:09 +02001130 /* Need to replicate writes. Unless it is an empty flush,
1131 * which is better mapped to a DRBD P_BARRIER packet,
1132 * also for drbd wire protocol compatibility reasons.
1133 * If this was a flush, just start a new epoch.
1134 * Unless the current epoch was empty anyways, or we are not currently
1135 * replicating, in which case there is no point. */
1136 if (unlikely(req->i.size == 0)) {
1137 /* The only size==0 bios we expect are empty flushes. */
Jens Axboe1eff9d32016-08-05 15:35:16 -06001138 D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH);
Lars Ellenberg99b4d8f2012-08-07 06:42:09 +02001139 if (remote)
Lars Ellenberg7074e4a2013-03-27 14:08:41 +01001140 _req_mod(req, QUEUE_AS_DRBD_BARRIER);
1141 return remote;
Lars Ellenberg519b6d3e2012-08-03 02:19:09 +02001142 }
1143
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001144 if (!remote && !send_oos)
1145 return 0;
1146
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001147 D_ASSERT(device, !(remote && send_oos));
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001148
1149 if (remote) {
1150 _req_mod(req, TO_BE_SENT);
1151 _req_mod(req, QUEUE_FOR_NET_WRITE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001152 } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size))
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001153 _req_mod(req, QUEUE_FOR_SEND_OOS);
1154
1155 return remote;
1156}
1157
Lars Ellenbergf31e5832018-12-20 17:23:42 +01001158static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
Lars Ellenberg7435e902016-06-14 00:26:22 +02001159{
Lars Ellenbergf31e5832018-12-20 17:23:42 +01001160 int err = drbd_issue_discard_or_zero_out(req->device,
1161 req->i.sector, req->i.size >> 9, flags);
1162 if (err)
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02001163 req->private_bio->bi_status = BLK_STS_IOERR;
Lars Ellenberg7435e902016-06-14 00:26:22 +02001164 bio_endio(req->private_bio);
1165}
1166
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001167static void
1168drbd_submit_req_private_bio(struct drbd_request *req)
1169{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001170 struct drbd_device *device = req->device;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001171 struct bio *bio = req->private_bio;
Christoph Hellwig70246282016-07-19 11:28:41 +02001172 unsigned int type;
1173
1174 if (bio_op(bio) != REQ_OP_READ)
1175 type = DRBD_FAULT_DT_WR;
Jens Axboe1eff9d32016-08-05 15:35:16 -06001176 else if (bio->bi_opf & REQ_RAHEAD)
Christoph Hellwig70246282016-07-19 11:28:41 +02001177 type = DRBD_FAULT_DT_RA;
1178 else
1179 type = DRBD_FAULT_DT_RD;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001180
Christoph Hellwig74d46992017-08-23 19:10:32 +02001181 bio_set_dev(bio, device->ldev->backing_bdev);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001182
1183 /* State may have changed since we grabbed our reference on the
1184 * ->ldev member. Double check, and short-circuit to endio.
1185 * In case the last activity log transaction failed to get on
1186 * stable storage, and this is a WRITE, we may not even submit
1187 * this bio. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001188 if (get_ldev(device)) {
Christoph Hellwig70246282016-07-19 11:28:41 +02001189 if (drbd_insert_fault(device, type))
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001190 bio_io_error(bio);
Lars Ellenbergf31e5832018-12-20 17:23:42 +01001191 else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
1192 drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
1193 ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
1194 else if (bio_op(bio) == REQ_OP_DISCARD)
1195 drbd_process_discard_or_zeroes_req(req, EE_TRIM);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001196 else
1197 generic_make_request(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001198 put_ldev(device);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001199 } else
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001200 bio_io_error(bio);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001201}
1202
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001203static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
Lars Ellenberg779b3fe2013-03-19 18:16:54 +01001204{
Lars Ellenberg844a6ae72013-11-22 12:52:03 +01001205 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001206 list_add_tail(&req->tl_requests, &device->submit.writes);
Lars Ellenberg844a6ae72013-11-22 12:52:03 +01001207 list_add_tail(&req->req_pending_master_completion,
1208 &device->pending_master_completion[1 /* WRITE */]);
1209 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001210 queue_work(device->submit.wq, &device->submit.worker);
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001211 /* do_submit() may sleep internally on al_wait, too */
1212 wake_up(&device->al_wait);
Lars Ellenberg779b3fe2013-03-19 18:16:54 +01001213}
1214
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001215/* returns the new drbd_request pointer, if the caller is expected to
1216 * drbd_send_and_submit() it (to save latency), or NULL if we queued the
1217 * request on the submitter thread.
1218 * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
1219 */
Rashika Kheria01cd2632013-12-19 15:12:27 +05301220static struct drbd_request *
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001221drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222{
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001223 const int rw = bio_data_dir(bio);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225
1226 /* allocate outside of all locks; */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001227 req = drbd_req_new(device, bio);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 if (!req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001229 dec_ap_bio(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230 /* only pass the error to the upper layers.
1231 * if user cannot handle io errors, that's not our business. */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001232 drbd_err(device, "could not kmalloc() req\n");
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02001233 bio->bi_status = BLK_STS_RESOURCE;
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001234 bio_endio(bio);
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001235 return ERR_PTR(-ENOMEM);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001237 req->start_jif = start_jif;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001239 if (!get_ldev(device)) {
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001240 bio_put(req->private_bio);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241 req->private_bio = NULL;
1242 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243
Lars Ellenberg7e8c2882013-03-19 18:16:57 +01001244 /* Update disk stats */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001245 _drbd_start_io_acct(device, req);
Lars Ellenberg7e8c2882013-03-19 18:16:57 +01001246
Lars Ellenberg7435e902016-06-14 00:26:22 +02001247 /* process discards always from our submitter thread */
Bart Van Asschefad2d4e2018-06-25 15:51:30 -07001248 if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
1249 bio_op(bio) == REQ_OP_DISCARD)
Lars Ellenberg7435e902016-06-14 00:26:22 +02001250 goto queue_for_submitter_thread;
1251
Lars Ellenberg519b6d3e2012-08-03 02:19:09 +02001252 if (rw == WRITE && req->private_bio && req->i.size
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001253 && !test_bit(AL_SUSPENDED, &device->flags)) {
Lars Ellenberg7435e902016-06-14 00:26:22 +02001254 if (!drbd_al_begin_io_fastpath(device, &req->i))
1255 goto queue_for_submitter_thread;
Philipp Reisner07782862010-08-31 12:00:50 +02001256 req->rq_state |= RQ_IN_ACT_LOG;
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001257 req->in_actlog_jif = jiffies;
Philipp Reisner07782862010-08-31 12:00:50 +02001258 }
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001259 return req;
Lars Ellenberg7435e902016-06-14 00:26:22 +02001260
1261 queue_for_submitter_thread:
1262 atomic_inc(&device->ap_actlog_cnt);
1263 drbd_queue_write(device, req);
1264 return NULL;
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001265}
1266
Lars Ellenberg0ead5cc2016-06-14 00:26:27 +02001267/* Require at least one path to current data.
1268 * We don't want to allow writes on C_STANDALONE D_INCONSISTENT:
1269 * We would not allow to read what was written,
1270 * we would not have bumped the data generation uuids,
1271 * we would cause data divergence for all the wrong reasons.
1272 *
1273 * If we don't see at least one D_UP_TO_DATE, we will fail this request,
1274 * which either returns EIO, or, if OND_SUSPEND_IO is set, suspends IO,
1275 * and queues for retry later.
1276 */
1277static bool may_do_writes(struct drbd_device *device)
1278{
1279 const union drbd_dev_state s = device->state;
1280 return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
1281}
1282
Lars Ellenbergc51a0ef2017-08-29 10:20:32 +02001283struct drbd_plug_cb {
1284 struct blk_plug_cb cb;
1285 struct drbd_request *most_recent_req;
1286 /* do we need more? */
1287};
1288
1289static void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
1290{
1291 struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
1292 struct drbd_resource *resource = plug->cb.data;
1293 struct drbd_request *req = plug->most_recent_req;
1294
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001295 kfree(cb);
Lars Ellenbergc51a0ef2017-08-29 10:20:32 +02001296 if (!req)
1297 return;
1298
1299 spin_lock_irq(&resource->req_lock);
1300 /* In case the sender did not process it yet, raise the flag to
1301 * have it followed with P_UNPLUG_REMOTE just after. */
1302 req->rq_state |= RQ_UNPLUG;
1303 /* but also queue a generic unplug */
1304 drbd_queue_unplug(req->device);
Lars Ellenbergc51a0ef2017-08-29 10:20:32 +02001305 kref_put(&req->kref, drbd_req_destroy);
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001306 spin_unlock_irq(&resource->req_lock);
Lars Ellenbergc51a0ef2017-08-29 10:20:32 +02001307}
1308
1309static struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
1310{
1311 /* A lot of text to say
1312 * return (struct drbd_plug_cb*)blk_check_plugged(); */
1313 struct drbd_plug_cb *plug;
1314 struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
1315
1316 if (cb)
1317 plug = container_of(cb, struct drbd_plug_cb, cb);
1318 else
1319 plug = NULL;
1320 return plug;
1321}
1322
1323static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
1324{
1325 struct drbd_request *tmp = plug->most_recent_req;
1326 /* Will be sent to some peer.
1327 * Remember to tag it with UNPLUG_REMOTE on unplug */
1328 kref_get(&req->kref);
1329 plug->most_recent_req = req;
1330 if (tmp)
1331 kref_put(&tmp->kref, drbd_req_destroy);
1332}
1333
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001334static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001335{
Lars Ellenberg35b5ed52013-12-04 12:07:09 +01001336 struct drbd_resource *resource = device->resource;
Christoph Hellwig70246282016-07-19 11:28:41 +02001337 const int rw = bio_data_dir(req->master_bio);
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001338 struct bio_and_error m = { NULL, };
1339 bool no_remote = false;
Lars Ellenberg35b5ed52013-12-04 12:07:09 +01001340 bool submit_private_bio = false;
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001341
Lars Ellenberg35b5ed52013-12-04 12:07:09 +01001342 spin_lock_irq(&resource->req_lock);
Andreas Gruenbacher6024fec2011-01-28 15:53:51 +01001343 if (rw == WRITE) {
Lars Ellenberg648e46b2012-03-26 20:12:24 +02001344 /* This may temporarily give up the req_lock,
1345 * but will re-aquire it before it returns here.
1346 * Needs to be before the check on drbd_suspended() */
1347 complete_conflicting_writes(req);
Lars Ellenberg607f25e2013-03-27 14:08:45 +01001348 /* no more giving up req_lock from now on! */
1349
1350 /* check for congestion, and potentially stop sending
1351 * full data updates, but start sending "dirty bits" only. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001352 maybe_pull_ahead(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 }
1354
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001355
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001356 if (drbd_suspended(device)) {
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001357 /* push back and retry: */
1358 req->rq_state |= RQ_POSTPONED;
1359 if (req->private_bio) {
1360 bio_put(req->private_bio);
1361 req->private_bio = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001362 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001363 }
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001364 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001365 }
1366
Christoph Hellwig70246282016-07-19 11:28:41 +02001367 /* We fail READ early, if we can not serve it.
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001368 * We must do this before req is registered on any lists.
Lars Ellenberga0d856d2012-01-24 17:19:42 +01001369 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001370 if (rw != WRITE) {
1371 if (!do_remote_read(req) && !req->private_bio)
1372 goto nodata;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 }
1374
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001375 /* which transfer log epoch does this belong to? */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001376 req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr);
Philipp Reisner288f4222010-05-27 15:07:43 +02001377
Lars Ellenberg227f0522012-07-31 09:31:11 +02001378 /* no point in adding empty flushes to the transfer log,
1379 * they are mapped to drbd barriers already. */
Lars Ellenberg99b4d8f2012-08-07 06:42:09 +02001380 if (likely(req->i.size!=0)) {
1381 if (rw == WRITE)
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001382 first_peer_device(device)->connection->current_tle_writes++;
Philipp Reisner288f4222010-05-27 15:07:43 +02001383
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001384 list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 }
Philipp Reisner67531712010-10-27 12:21:30 +02001386
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001387 if (rw == WRITE) {
Lars Ellenberg0ead5cc2016-06-14 00:26:27 +02001388 if (req->private_bio && !may_do_writes(device)) {
1389 bio_put(req->private_bio);
1390 req->private_bio = NULL;
1391 put_ldev(device);
1392 goto nodata;
1393 }
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001394 if (!drbd_process_write_request(req))
1395 no_remote = true;
1396 } else {
1397 /* We either have a private_bio, or we can read from remote.
1398 * Otherwise we had done the goto nodata above. */
1399 if (req->private_bio == NULL) {
1400 _req_mod(req, TO_BE_SENT);
1401 _req_mod(req, QUEUE_FOR_NET_READ);
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001402 } else
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001403 no_remote = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001404 }
1405
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001406 if (no_remote == false) {
1407 struct drbd_plug_cb *plug = drbd_check_plugged(resource);
1408 if (plug)
1409 drbd_update_plug(plug, req);
1410 }
Lars Ellenbergc51a0ef2017-08-29 10:20:32 +02001411
Lars Ellenberg844a6ae72013-11-22 12:52:03 +01001412 /* If it took the fast path in drbd_request_prepare, add it here.
1413 * The slow path has added it already. */
1414 if (list_empty(&req->req_pending_master_completion))
1415 list_add_tail(&req->req_pending_master_completion,
1416 &device->pending_master_completion[rw == WRITE]);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001417 if (req->private_bio) {
1418 /* needs to be marked within the same spinlock */
Lars Ellenberg05cbbb32015-01-16 17:41:55 +01001419 req->pre_submit_jif = jiffies;
Lars Ellenberg844a6ae72013-11-22 12:52:03 +01001420 list_add_tail(&req->req_pending_local,
1421 &device->pending_completion[rw == WRITE]);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001422 _req_mod(req, TO_BE_SUBMITTED);
1423 /* but we need to give up the spinlock to submit */
Lars Ellenberg35b5ed52013-12-04 12:07:09 +01001424 submit_private_bio = true;
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001425 } else if (no_remote) {
1426nodata:
1427 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001428 drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
Lars Ellenberg42839f62012-09-27 15:19:38 +02001429 (unsigned long long)req->i.sector, req->i.size >> 9);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001430 /* A write may have been queued for send_oos, however.
Lars Ellenberga0d856d2012-01-24 17:19:42 +01001431 * So we can not simply free it, we must go through drbd_req_put_completion_ref() */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001432 }
1433
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001434out:
Lars Ellenberga00ebd12017-05-11 10:21:46 +02001435 drbd_req_put_completion_ref(req, &m, 1);
Lars Ellenberg35b5ed52013-12-04 12:07:09 +01001436 spin_unlock_irq(&resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437
Lars Ellenberg35b5ed52013-12-04 12:07:09 +01001438 /* Even though above is a kref_put(), this is safe.
1439 * As long as we still need to submit our private bio,
1440 * we hold a completion ref, and the request cannot disappear.
1441 * If however this request did not even have a private bio to submit
1442 * (e.g. remote read), req may already be invalid now.
1443 * That's why we cannot check on req->private_bio. */
1444 if (submit_private_bio)
1445 drbd_submit_req_private_bio(req);
Lars Ellenberg5da9c832012-03-29 17:04:14 +02001446 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001447 complete_master_bio(device, &m);
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001448}
1449
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001450void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001451{
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001452 struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
Lars Ellenberg6d9febe2013-03-19 18:16:50 +01001453 if (IS_ERR_OR_NULL(req))
1454 return;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001455 drbd_send_and_submit(device, req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456}
1457
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001458static void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
Lars Ellenberg113fef92013-03-22 18:14:40 -06001459{
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001460 struct blk_plug plug;
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001461 struct drbd_request *req, *tmp;
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001462
1463 blk_start_plug(&plug);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001464 list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
1465 const int rw = bio_data_dir(req->master_bio);
Lars Ellenberg113fef92013-03-22 18:14:40 -06001466
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001467 if (rw == WRITE /* rw != WRITE should not even end up here! */
1468 && req->private_bio && req->i.size
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001469 && !test_bit(AL_SUSPENDED, &device->flags)) {
1470 if (!drbd_al_begin_io_fastpath(device, &req->i))
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001471 continue;
1472
1473 req->rq_state |= RQ_IN_ACT_LOG;
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001474 req->in_actlog_jif = jiffies;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01001475 atomic_dec(&device->ap_actlog_cnt);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001476 }
1477
1478 list_del_init(&req->tl_requests);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001479 drbd_send_and_submit(device, req);
Lars Ellenberg113fef92013-03-22 18:14:40 -06001480 }
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001481 blk_finish_plug(&plug);
Lars Ellenberg113fef92013-03-22 18:14:40 -06001482}
1483
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001484static bool prepare_al_transaction_nonblock(struct drbd_device *device,
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001485 struct list_head *incoming,
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001486 struct list_head *pending,
1487 struct list_head *later)
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001488{
Lars Ellenberg9da10e82017-08-29 10:20:33 +02001489 struct drbd_request *req;
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001490 int wake = 0;
1491 int err;
1492
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001493 spin_lock_irq(&device->al_lock);
Lars Ellenberg9da10e82017-08-29 10:20:33 +02001494 while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001495 err = drbd_al_begin_io_nonblock(device, &req->i);
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001496 if (err == -ENOBUFS)
1497 break;
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001498 if (err == -EBUSY)
1499 wake = 1;
1500 if (err)
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001501 list_move_tail(&req->tl_requests, later);
1502 else
1503 list_move_tail(&req->tl_requests, pending);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001504 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001505 spin_unlock_irq(&device->al_lock);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001506 if (wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001507 wake_up(&device->al_wait);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001508 return !list_empty(pending);
1509}
Lars Ellenberg113fef92013-03-22 18:14:40 -06001510
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001511static void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001512{
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001513 struct blk_plug plug;
Lars Ellenberg9da10e82017-08-29 10:20:33 +02001514 struct drbd_request *req;
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001515
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001516 blk_start_plug(&plug);
Lars Ellenberg9da10e82017-08-29 10:20:33 +02001517 while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001518 req->rq_state |= RQ_IN_ACT_LOG;
1519 req->in_actlog_jif = jiffies;
1520 atomic_dec(&device->ap_actlog_cnt);
1521 list_del_init(&req->tl_requests);
1522 drbd_send_and_submit(device, req);
1523 }
Lars Ellenbergde6978b2017-08-29 10:20:34 +02001524 blk_finish_plug(&plug);
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001525}
1526
Lars Ellenberg113fef92013-03-22 18:14:40 -06001527void do_submit(struct work_struct *ws)
1528{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001529 struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001530 LIST_HEAD(incoming); /* from drbd_make_request() */
1531 LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
1532 LIST_HEAD(busy); /* blocked by resync requests */
1533
1534 /* grab new incoming requests */
1535 spin_lock_irq(&device->resource->req_lock);
1536 list_splice_tail_init(&device->submit.writes, &incoming);
1537 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg113fef92013-03-22 18:14:40 -06001538
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001539 for (;;) {
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001540 DEFINE_WAIT(wait);
Lars Ellenberg113fef92013-03-22 18:14:40 -06001541
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001542 /* move used-to-be-busy back to front of incoming */
1543 list_splice_init(&busy, &incoming);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001544 submit_fast_path(device, &incoming);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +01001545 if (list_empty(&incoming))
1546 break;
1547
Lars Ellenberg45ad07b2013-03-19 18:16:58 +01001548 for (;;) {
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001549 prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
1550
1551 list_splice_init(&busy, &incoming);
1552 prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
1553 if (!list_empty(&pending))
1554 break;
1555
1556 schedule();
1557
1558 /* If all currently "hot" activity log extents are kept busy by
1559 * incoming requests, we still must not totally starve new
1560 * requests to "cold" extents.
1561 * Something left on &incoming means there had not been
1562 * enough update slots available, and the activity log
1563 * has been marked as "starving".
1564 *
1565 * Try again now, without looking for new requests,
1566 * effectively blocking all new requests until we made
1567 * at least _some_ progress with what we currently have.
1568 */
1569 if (!list_empty(&incoming))
1570 continue;
1571
1572 /* Nothing moved to pending, but nothing left
1573 * on incoming: all moved to busy!
1574 * Grab new and iterate. */
1575 spin_lock_irq(&device->resource->req_lock);
1576 list_splice_tail_init(&device->submit.writes, &incoming);
1577 spin_unlock_irq(&device->resource->req_lock);
1578 }
1579 finish_wait(&device->al_wait, &wait);
1580
1581 /* If the transaction was full, before all incoming requests
1582 * had been processed, skip ahead to commit, and iterate
1583 * without splicing in more incoming requests from upper layers.
1584 *
1585 * Else, if all incoming have been processed,
1586 * they have become either "pending" (to be submitted after
1587 * next transaction commit) or "busy" (blocked by resync).
1588 *
1589 * Maybe more was queued, while we prepared the transaction?
1590 * Try to stuff those into this transaction as well.
1591 * Be strictly non-blocking here,
1592 * we already have something to commit.
1593 *
1594 * Commit if we don't make any more progres.
1595 */
1596
1597 while (list_empty(&incoming)) {
Lars Ellenberg45ad07b2013-03-19 18:16:58 +01001598 LIST_HEAD(more_pending);
1599 LIST_HEAD(more_incoming);
1600 bool made_progress;
1601
1602 /* It is ok to look outside the lock,
1603 * it's only an optimization anyways */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001604 if (list_empty(&device->submit.writes))
Lars Ellenberg45ad07b2013-03-19 18:16:58 +01001605 break;
1606
Lars Ellenberg844a6ae72013-11-22 12:52:03 +01001607 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001608 list_splice_tail_init(&device->submit.writes, &more_incoming);
Lars Ellenberg844a6ae72013-11-22 12:52:03 +01001609 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg45ad07b2013-03-19 18:16:58 +01001610
1611 if (list_empty(&more_incoming))
1612 break;
1613
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001614 made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
Lars Ellenberg45ad07b2013-03-19 18:16:58 +01001615
1616 list_splice_tail_init(&more_pending, &pending);
1617 list_splice_tail_init(&more_incoming, &incoming);
Lars Ellenberg45ad07b2013-03-19 18:16:58 +01001618 if (!made_progress)
1619 break;
1620 }
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001621
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01001622 drbd_al_begin_io_commit(device);
Lars Ellenbergf5b90b62014-05-07 22:41:28 +02001623 send_and_submit_pending(device, &pending);
Lars Ellenberg113fef92013-03-22 18:14:40 -06001624 }
1625}
1626
Jens Axboedece1632015-11-05 10:41:16 -07001627blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001629 struct drbd_device *device = (struct drbd_device *) q->queuedata;
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001630 unsigned long start_jif;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001631
NeilBrownaf67c312017-06-18 14:38:57 +10001632 blk_queue_split(q, &bio);
Kent Overstreet54efd502015-04-23 22:37:18 -07001633
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001634 start_jif = jiffies;
Philipp Reisneraeda1cd62010-11-09 17:45:06 +01001635
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636 /*
1637 * what we "blindly" assume:
1638 */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001639 D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001641 inc_ap_bio(device);
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001642 __drbd_make_request(device, bio, start_jif);
Jens Axboedece1632015-11-05 10:41:16 -07001643 return BLK_QC_T_NONE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644}
1645
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001646static bool net_timeout_reached(struct drbd_request *net_req,
1647 struct drbd_connection *connection,
1648 unsigned long now, unsigned long ent,
1649 unsigned int ko_count, unsigned int timeout)
1650{
1651 struct drbd_device *device = net_req->device;
1652
1653 if (!time_after(now, net_req->pre_send_jif + ent))
1654 return false;
1655
1656 if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
1657 return false;
1658
1659 if (net_req->rq_state & RQ_NET_PENDING) {
1660 drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
1661 jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
1662 return true;
1663 }
1664
1665 /* We received an ACK already (or are using protocol A),
1666 * but are waiting for the epoch closing barrier ack.
1667 * Check if we sent the barrier already. We should not blame the peer
1668 * for being unresponsive, if we did not even ask it yet. */
1669 if (net_req->epoch == connection->send.current_epoch_nr) {
1670 drbd_warn(device,
1671 "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
1672 jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
1673 return false;
1674 }
1675
1676 /* Worst case: we may have been blocked for whatever reason, then
1677 * suddenly are able to send a lot of requests (and epoch separating
1678 * barriers) in quick succession.
1679 * The timestamp of the net_req may be much too old and not correspond
1680 * to the sending time of the relevant unack'ed barrier packet, so
1681 * would trigger a spurious timeout. The latest barrier packet may
1682 * have a too recent timestamp to trigger the timeout, potentially miss
1683 * a timeout. Right now we don't have a place to conveniently store
1684 * these timestamps.
1685 * But in this particular situation, the application requests are still
1686 * completed to upper layers, DRBD should still "feel" responsive.
1687 * No need yet to kill this connection, it may still recover.
1688 * If not, eventually we will have queued enough into the network for
1689 * us to block. From that point of view, the timestamp of the last sent
1690 * barrier packet is relevant enough.
1691 */
1692 if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
1693 drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
1694 connection->send.last_sent_barrier_jif, now,
1695 jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
1696 return true;
1697 }
1698 return false;
1699}
1700
1701/* A request is considered timed out, if
1702 * - we have some effective timeout from the configuration,
1703 * with some state restrictions applied,
1704 * - the oldest request is waiting for a response from the network
1705 * resp. the local disk,
1706 * - the oldest request is in fact older than the effective timeout,
1707 * - the connection was established (resp. disk was attached)
1708 * for longer than the timeout already.
1709 * Note that for 32bit jiffies and very stable connections/disks,
1710 * we may have a wrap around, which is catched by
1711 * !time_in_range(now, last_..._jif, last_..._jif + timeout).
1712 *
1713 * Side effect: once per 32bit wrap-around interval, which means every
1714 * ~198 days with 250 HZ, we have a window where the timeout would need
1715 * to expire twice (worst case) to become effective. Good enough.
1716 */
1717
Kees Cook2bccef32017-10-17 20:33:01 -07001718void request_timer_fn(struct timer_list *t)
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001719{
Kees Cook2bccef32017-10-17 20:33:01 -07001720 struct drbd_device *device = from_timer(device, t, request_timer);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001721 struct drbd_connection *connection = first_peer_device(device)->connection;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001722 struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001723 struct net_conf *nc;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001724 unsigned long oldest_submit_jif;
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001725 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
Lars Ellenbergba280c02012-04-25 11:46:14 +02001726 unsigned long now;
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001727 unsigned int ko_count = 0, timeout = 0;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001728
Philipp Reisner44ed1672011-04-19 17:10:19 +02001729 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001730 nc = rcu_dereference(connection->net_conf);
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001731 if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
1732 ko_count = nc->ko_count;
1733 timeout = nc->timeout;
1734 }
Philipp Reisnercdfda632011-07-05 15:38:59 +02001735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001736 if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
1737 dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
1738 put_ldev(device);
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001739 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02001740 rcu_read_unlock();
1741
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001742
1743 ent = timeout * HZ/10 * ko_count;
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001744 et = min_not_zero(dt, ent);
1745
Lars Ellenbergba280c02012-04-25 11:46:14 +02001746 if (!et)
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001747 return; /* Recurring timer stopped */
1748
Lars Ellenbergba280c02012-04-25 11:46:14 +02001749 now = jiffies;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001750 nt = now + et;
Lars Ellenbergba280c02012-04-25 11:46:14 +02001751
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001752 spin_lock_irq(&device->resource->req_lock);
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001753 req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
1754 req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001755
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001756 /* maybe the oldest request waiting for the peer is in fact still
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001757 * blocking in tcp sendmsg. That's ok, though, that's handled via the
1758 * socket send timeout, requesting a ping, and bumping ko-count in
1759 * we_should_drop_the_connection().
1760 */
1761
1762 /* check the oldest request we did successfully sent,
1763 * but which is still waiting for an ACK. */
1764 req_peer = connection->req_ack_pending;
1765
1766 /* if we don't have such request (e.g. protocoll A)
1767 * check the oldest requests which is still waiting on its epoch
1768 * closing barrier ack. */
1769 if (!req_peer)
1770 req_peer = connection->req_not_net_done;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001771
1772 /* evaluate the oldest peer request only in one timer! */
1773 if (req_peer && req_peer->device != device)
1774 req_peer = NULL;
1775
1776 /* do we have something to evaluate? */
1777 if (req_peer == NULL && req_write == NULL && req_read == NULL)
1778 goto out;
1779
1780 oldest_submit_jif =
1781 (req_write && req_read)
1782 ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
1783 ? req_write->pre_submit_jif : req_read->pre_submit_jif )
1784 : req_write ? req_write->pre_submit_jif
1785 : req_read ? req_read->pre_submit_jif : now;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001786
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001787 if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
Philipp Reisner9581f972014-11-10 17:21:14 +01001788 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001789
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001790 if (dt && oldest_submit_jif != now &&
1791 time_after(now, oldest_submit_jif + dt) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001792 !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001793 drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001794 __drbd_chk_io_error(device, DRBD_FORCE_DETACH);
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001795 }
Lars Ellenberg08535462014-04-28 18:43:31 +02001796
1797 /* Reschedule timer for the nearest not already expired timeout.
1798 * Fallback to now + min(effective network timeout, disk timeout). */
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001799 ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
1800 ? req_peer->pre_send_jif + ent : now + et;
1801 dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
1802 ? oldest_submit_jif + dt : now + et;
Lars Ellenberg08535462014-04-28 18:43:31 +02001803 nt = time_before(ent, dt) ? ent : dt;
Lars Ellenberg7753a4c12013-11-22 13:00:12 +01001804out:
Andreas Gruenbacher8d4ba3f2014-09-11 14:29:08 +02001805 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001806 mod_timer(&device->request_timer, nt);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01001807}