[SCSI] iscsi: fix command requeues during iscsi recovery
Do not flush queues then block session. This will cause commands
to needlessly swing around on us and remove goofy
recovery_failed field and replace with state value.
And do not start recovery from within the host reset function.
This causeis too many problems becuase open-iscsi was desinged to
call out to userspace then have userpscae decide if we should
go into recovery or kill the session.
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 580c050..d810aca 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -487,6 +487,11 @@
unsigned long flags;
spin_lock_irqsave(&session->lock, flags);
+ if (session->state == ISCSI_STATE_FAILED) {
+ spin_unlock_irqrestore(&session->lock, flags);
+ return;
+ }
+
if (session->conn_cnt == 1 || session->leadconn == conn)
session->state = ISCSI_STATE_FAILED;
spin_unlock_irqrestore(&session->lock, flags);
@@ -612,6 +617,7 @@
FAILURE_SESSION_FREED,
FAILURE_WINDOW_CLOSED,
FAILURE_SESSION_TERMINATE,
+ FAILURE_SESSION_IN_RECOVERY,
FAILURE_SESSION_RECOVERY_TIMEOUT,
};
@@ -631,18 +637,30 @@
spin_lock(&session->lock);
- if (session->state != ISCSI_STATE_LOGGED_IN) {
- if (session->recovery_failed) {
- reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
- goto fault;
- } else if (session->state == ISCSI_STATE_FAILED) {
- reason = FAILURE_SESSION_FAILED;
- goto reject;
- } else if (session->state == ISCSI_STATE_TERMINATE) {
- reason = FAILURE_SESSION_TERMINATE;
+ /*
+ * ISCSI_STATE_FAILED is a temp. state. The recovery
+ * code will decide what is best to do with command queued
+ * during this time
+ */
+ if (session->state != ISCSI_STATE_LOGGED_IN &&
+ session->state != ISCSI_STATE_FAILED) {
+ /*
+ * to handle the race between when we set the recovery state
+ * and block the session we requeue here (commands could
+ * be entering our queuecommand while a block is starting
+ * up because the block code is not locked)
+ */
+ if (session->state == ISCSI_STATE_IN_RECOVERY) {
+ reason = FAILURE_SESSION_IN_RECOVERY;
goto fault;
}
- reason = FAILURE_SESSION_FREED;
+
+ if (session->state == ISCSI_STATE_RECOVERY_FAILED)
+ reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
+ else if (session->state == ISCSI_STATE_TERMINATE)
+ reason = FAILURE_SESSION_TERMINATE;
+ else
+ reason = FAILURE_SESSION_FREED;
goto fault;
}
@@ -728,8 +746,8 @@
*/
mtask = conn->login_mtask;
else {
- BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
- BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
+ BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
+ BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
if (!__kfifo_get(session->mgmtpool.queue,
@@ -803,7 +821,7 @@
spin_lock_bh(&session->lock);
if (session->state != ISCSI_STATE_LOGGED_IN) {
- session->recovery_failed = 1;
+ session->state = ISCSI_STATE_RECOVERY_FAILED;
if (conn)
wake_up(&conn->ehwait);
}
@@ -838,20 +856,14 @@
* we drop the lock here but the leadconn cannot be destoyed while
* we are in the scsi eh
*/
- if (fail_session) {
+ if (fail_session)
iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
- /*
- * if userspace cannot respond then we must kick this off
- * here for it
- */
- iscsi_start_session_recovery(session, conn, STOP_CONN_RECOVER);
- }
debug_scsi("iscsi_eh_host_reset wait for relogin\n");
wait_event_interruptible(conn->ehwait,
session->state == ISCSI_STATE_TERMINATE ||
session->state == ISCSI_STATE_LOGGED_IN ||
- session->recovery_failed);
+ session->state == ISCSI_STATE_RECOVERY_FAILED);
if (signal_pending(current))
flush_signals(current);
@@ -940,8 +952,7 @@
wait_event_interruptible(conn->ehwait,
sc->SCp.phase != session->age ||
session->state != ISCSI_STATE_LOGGED_IN ||
- conn->tmabort_state != TMABORT_INITIAL ||
- session->recovery_failed);
+ conn->tmabort_state != TMABORT_INITIAL);
if (signal_pending(current))
flush_signals(current);
del_timer_sync(&conn->tmabort_timer);
@@ -1491,7 +1502,6 @@
conn->stop_stage = 0;
conn->tmabort_state = TMABORT_INITIAL;
session->age++;
- session->recovery_failed = 0;
spin_unlock_bh(&session->lock);
iscsi_unblock_session(session_to_cls(session));
@@ -1566,8 +1576,8 @@
conn->ctask = NULL;
}
-void iscsi_start_session_recovery(struct iscsi_session *session,
- struct iscsi_conn *conn, int flag)
+static void iscsi_start_session_recovery(struct iscsi_session *session,
+ struct iscsi_conn *conn, int flag)
{
int old_stop_stage;
@@ -1597,19 +1607,10 @@
set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
if (session->conn_cnt == 0 || session->leadconn == conn)
- session->state = ISCSI_STATE_FAILED;
+ session->state = ISCSI_STATE_IN_RECOVERY;
spin_unlock_bh(&session->lock);
- session->tt->terminate_conn(conn);
- /*
- * flush queues.
- */
- spin_lock_bh(&session->lock);
- fail_all_commands(conn);
- flush_control_queues(session, conn);
- spin_unlock_bh(&session->lock);
-
/*
* for connection level recovery we should not calculate
* header digest. conn->hdr_size used for optimization
@@ -1619,18 +1620,26 @@
if (flag == STOP_CONN_RECOVER) {
conn->hdrdgst_en = 0;
conn->datadgst_en = 0;
-
/*
* if this is called from the eh and and from userspace
* then we only need to block once.
*/
- if (session->state == ISCSI_STATE_FAILED &&
+ if (session->state == ISCSI_STATE_IN_RECOVERY &&
old_stop_stage != STOP_CONN_RECOVER)
iscsi_block_session(session_to_cls(session));
}
+
+ session->tt->terminate_conn(conn);
+ /*
+ * flush queues.
+ */
+ spin_lock_bh(&session->lock);
+ fail_all_commands(conn);
+ flush_control_queues(session, conn);
+ spin_unlock_bh(&session->lock);
+
mutex_unlock(&conn->xmitmutex);
}
-EXPORT_SYMBOL_GPL(iscsi_start_session_recovery);
void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 2dba929..17b28f0 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -210,7 +210,6 @@
* - mgmtpool, *
* - r2tpool */
int state; /* session state */
- int recovery_failed;
struct list_head item;
int conn_cnt;
int age; /* counts session re-opens */
@@ -241,8 +240,6 @@
int, int, uint32_t, uint32_t *);
extern void iscsi_session_teardown(struct iscsi_cls_session *);
extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *);
-extern void iscsi_start_session_recovery(struct iscsi_session *,
- struct iscsi_conn *, int);
extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
#define session_to_cls(_sess) \
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index c9e9475..92129b9 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -173,6 +173,8 @@
#define ISCSI_STATE_LOGGED_IN 2
#define ISCSI_STATE_FAILED 3
#define ISCSI_STATE_TERMINATE 4
+#define ISCSI_STATE_IN_RECOVERY 5
+#define ISCSI_STATE_RECOVERY_FAILED 6
struct iscsi_cls_session {
struct list_head sess_list; /* item in session_list */