drbd: New packet for Ahead/Behind mode: P_OUT_OF_SYNC
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index b4adb58..33f6cc5 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -1007,22 +1007,22 @@
* called by tl_clear and drbd_send_dblock (==drbd_make_request).
* so this can be _any_ process.
*/
-void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
const char *file, const unsigned int line)
{
unsigned long sbnr, ebnr, lbnr, flags;
sector_t esector, nr_sectors;
- unsigned int enr, count;
+ unsigned int enr, count = 0;
struct lc_element *e;
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
- return;
+ return 0;
}
if (!get_ldev(mdev))
- return; /* no disk, no metadata, no bitmap to set bits in */
+ return 0; /* no disk, no metadata, no bitmap to set bits in */
nr_sectors = drbd_get_capacity(mdev->this_bdev);
esector = sector + (size >> 9) - 1;
@@ -1052,6 +1052,8 @@
out:
put_ldev(mdev);
+
+ return count;
}
static
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 21b7439..4713312 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -212,6 +212,7 @@
/* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
/* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
+ P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
P_MAX_CMD = 0x28,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
@@ -269,6 +270,7 @@
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
[P_COMPRESSED_BITMAP] = "CBitmap",
[P_DELAY_PROBE] = "DelayProbe",
+ [P_OUT_OF_SYNC] = "OutOfSync",
[P_MAX_CMD] = NULL,
};
@@ -550,6 +552,13 @@
u32 pad;
} __packed;
+struct p_block_desc {
+ struct p_header80 head;
+ u64 sector;
+ u32 blksize;
+ u32 pad; /* to multiple of 8 Byte */
+} __packed;
+
/* Valid values for the encoding field.
* Bump proto version when changing this. */
enum drbd_bitmap_code {
@@ -647,6 +656,7 @@
struct p_block_req block_req;
struct p_delay_probe93 delay_probe93;
struct p_rs_uuid rs_uuid;
+ struct p_block_desc block_desc;
} __packed;
/**********************************************************************/
@@ -1221,6 +1231,7 @@
struct p_data *dp, int data_size);
extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
sector_t sector, int blksize, u64 block_id);
+extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
struct drbd_epoch_entry *e);
extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
@@ -1534,6 +1545,7 @@
extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
@@ -1626,7 +1638,7 @@
int size, const char *file, const unsigned int line);
#define drbd_set_in_sync(mdev, sector, size) \
__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
int size, const char *file, const unsigned int line);
#define drbd_set_out_of_sync(mdev, sector, size) \
__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 46f27d6..0dc93f4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2634,6 +2634,16 @@
return ok;
}
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+ struct p_block_desc p;
+
+ p.sector = cpu_to_be64(req->sector);
+ p.blksize = cpu_to_be32(req->size);
+
+ return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
/*
drbd_send distinguishes two cases:
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index b19e8b2..04a08e7 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3562,6 +3562,15 @@
return TRUE;
}
+static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+ struct p_block_desc *p = &mdev->data.rbuf.block_desc;
+
+ drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+
+ return TRUE;
+}
+
typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
struct data_cmd {
@@ -3592,6 +3601,7 @@
[P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
[P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
+ [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
/* anything missing from this table is in
* the asender_tbl, see get_asender_cmd */
[P_MAX_CMD] = { 0, 0, NULL },
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 60288fb..a8d1ff2 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -142,7 +142,7 @@
/* before we can signal completion to the upper layers,
* we may need to close the current epoch */
- if (mdev->state.conn >= C_CONNECTED &&
+ if (mdev->state.conn >= C_CONNECTED && mdev->state.conn < C_AHEAD &&
req->epoch == mdev->newest_tle->br_number)
queue_barrier(mdev);
@@ -545,6 +545,14 @@
break;
+ case queue_for_send_oos:
+ req->rq_state |= RQ_NET_QUEUED;
+ req->w.cb = w_send_oos;
+ drbd_queue_work(&mdev->data.work, &req->w);
+ break;
+
+ case oos_handed_to_network:
+ /* actually the same */
case send_canceled:
/* treat it the same */
case send_failed:
@@ -756,7 +764,7 @@
const sector_t sector = bio->bi_sector;
struct drbd_tl_epoch *b = NULL;
struct drbd_request *req;
- int local, remote;
+ int local, remote, send_oos = 0;
int err = -EIO;
int ret = 0;
@@ -820,8 +828,11 @@
}
remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
- (mdev->state.pdsk == D_INCONSISTENT &&
- mdev->state.conn >= C_CONNECTED));
+ (mdev->state.pdsk >= D_INCONSISTENT &&
+ mdev->state.conn >= C_CONNECTED &&
+ mdev->state.conn < C_AHEAD));
+ send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD &&
+ mdev->state.pdsk >= D_INCONSISTENT);
if (!(local || remote) && !is_susp(mdev->state)) {
if (__ratelimit(&drbd_ratelimit_state))
@@ -835,7 +846,7 @@
* but there is a race between testing the bit and pointer outside the
* spinlock, and grabbing the spinlock.
* if we lost that race, we retry. */
- if (rw == WRITE && remote &&
+ if (rw == WRITE && (remote || send_oos) &&
mdev->unused_spare_tle == NULL &&
test_bit(CREATE_BARRIER, &mdev->flags)) {
allocate_barrier:
@@ -860,11 +871,15 @@
goto fail_free_complete;
}
- if (remote) {
+ if (remote || send_oos) {
remote = (mdev->state.pdsk == D_UP_TO_DATE ||
- (mdev->state.pdsk == D_INCONSISTENT &&
- mdev->state.conn >= C_CONNECTED));
- if (!remote)
+ (mdev->state.pdsk >= D_INCONSISTENT &&
+ mdev->state.conn >= C_CONNECTED &&
+ mdev->state.conn < C_AHEAD));
+ send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD &&
+ mdev->state.pdsk >= D_INCONSISTENT);
+
+ if (!(remote || send_oos))
dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
if (!(local || remote)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
@@ -877,7 +892,7 @@
mdev->unused_spare_tle = b;
b = NULL;
}
- if (rw == WRITE && remote &&
+ if (rw == WRITE && (remote || send_oos) &&
mdev->unused_spare_tle == NULL &&
test_bit(CREATE_BARRIER, &mdev->flags)) {
/* someone closed the current epoch
@@ -900,7 +915,7 @@
* barrier packet. To get the write ordering right, we only have to
* make sure that, if this is a write request and it triggered a
* barrier packet, this request is queued within the same spinlock. */
- if (remote && mdev->unused_spare_tle &&
+ if ((remote || send_oos) && mdev->unused_spare_tle &&
test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
_tl_add_barrier(mdev, mdev->unused_spare_tle);
mdev->unused_spare_tle = NULL;
@@ -948,8 +963,11 @@
? queue_for_net_write
: queue_for_net_read);
}
+ if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
+ _req_mod(req, queue_for_send_oos);
- if (remote && mdev->net_conf->on_congestion != OC_BLOCK) {
+ if (remote &&
+ mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
int congested = 0;
if (mdev->net_conf->cong_fill &&
@@ -964,6 +982,8 @@
}
if (congested) {
+ queue_barrier(mdev);
+
if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 69d350f..40d3dcd 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -82,14 +82,16 @@
to_be_submitted,
/* XXX yes, now I am inconsistent...
- * these two are not "events" but "actions"
+ * these are not "events" but "actions"
* oh, well... */
queue_for_net_write,
queue_for_net_read,
+ queue_for_send_oos,
send_canceled,
send_failed,
handed_over_to_network,
+ oos_handed_to_network,
connection_lost_while_pending,
read_retry_remote_canceled,
recv_acked_by_peer,
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 782d872..6749907 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1237,6 +1237,22 @@
return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
}
+int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_request *req = container_of(w, struct drbd_request, w);
+ int ok;
+
+ if (unlikely(cancel)) {
+ req_mod(req, send_canceled);
+ return 1;
+ }
+
+ ok = drbd_send_oos(mdev, req);
+ req_mod(req, oos_handed_to_network);
+
+ return ok;
+}
+
/**
* w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
* @mdev: DRBD device.
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 23f31be..41da654 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -56,7 +56,7 @@
#define REL_VERSION "8.3.9"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 95
+#define PRO_VERSION_MAX 96
enum drbd_io_error_p {