UHCI: Eliminate asynchronous skeleton Queue Headers

This patch (as856) attempts to improve the performance of uhci-hcd by
removing the asynchronous skeleton Queue Headers.  They don't contain
any useful information but the controller has to read through them at
least once every millisecond, incurring a non-zero DMA overhead.

Now all the asynchronous queues are combined, along with the period-1
interrupt queue, into a single list with a single skeleton QH.  The
start of the low-speed control, full-speed control, and bulk sublists
is determined by linear search.  Since there should rarely be more
than a couple of QHs in the list, the searches should incur a much
smaller total load than keeping the skeleton QHs.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c
index a0c6bf6..f4ebdb3 100644
--- a/drivers/usb/host/uhci-q.c
+++ b/drivers/usb/host/uhci-q.c
@@ -13,7 +13,7 @@
  * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
  *               support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
  * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- * (C) Copyright 2004-2006 Alan Stern, stern@rowland.harvard.edu
+ * (C) Copyright 2004-2007 Alan Stern, stern@rowland.harvard.edu
  */
 
 
@@ -45,14 +45,43 @@
  */
 static void uhci_fsbr_on(struct uhci_hcd *uhci)
 {
+	struct uhci_qh *fsbr_qh, *lqh, *tqh;
+
 	uhci->fsbr_is_on = 1;
-	uhci->skel_term_qh->link = LINK_TO_QH(uhci->skel_fs_control_qh);
+	lqh = list_entry(uhci->skel_async_qh->node.prev,
+			struct uhci_qh, node);
+
+	/* Find the first FSBR QH.  Linear search through the list is
+	 * acceptable because normally FSBR gets turned on as soon as
+	 * one QH needs it. */
+	fsbr_qh = NULL;
+	list_for_each_entry_reverse(tqh, &uhci->skel_async_qh->node, node) {
+		if (tqh->skel < SKEL_FSBR)
+			break;
+		fsbr_qh = tqh;
+	}
+
+	/* No FSBR QH means we must insert the terminating skeleton QH */
+	if (!fsbr_qh) {
+		uhci->skel_term_qh->link = LINK_TO_QH(uhci->skel_term_qh);
+		wmb();
+		lqh->link = uhci->skel_term_qh->link;
+
+	/* Otherwise loop the last QH to the first FSBR QH */
+	} else
+		lqh->link = LINK_TO_QH(fsbr_qh);
 }
 
 static void uhci_fsbr_off(struct uhci_hcd *uhci)
 {
+	struct uhci_qh *lqh;
+
 	uhci->fsbr_is_on = 0;
-	uhci->skel_term_qh->link = UHCI_PTR_TERM;
+	lqh = list_entry(uhci->skel_async_qh->node.prev,
+			struct uhci_qh, node);
+
+	/* End the async list normally and unlink the terminating QH */
+	lqh->link = uhci->skel_term_qh->link = UHCI_PTR_TERM;
 }
 
 static void uhci_add_fsbr(struct uhci_hcd *uhci, struct urb *urb)
@@ -404,12 +433,81 @@
 }
 
 /*
+ * Link an Isochronous QH into its skeleton's list
+ */
+static inline void link_iso(struct uhci_hcd *uhci, struct uhci_qh *qh)
+{
+	list_add_tail(&qh->node, &uhci->skel_iso_qh->node);
+
+	/* Isochronous QHs aren't linked by the hardware */
+}
+
+/*
+ * Link a high-period interrupt QH into the schedule at the end of its
+ * skeleton's list
+ */
+static void link_interrupt(struct uhci_hcd *uhci, struct uhci_qh *qh)
+{
+	struct uhci_qh *pqh;
+
+	list_add_tail(&qh->node, &uhci->skelqh[qh->skel]->node);
+
+	pqh = list_entry(qh->node.prev, struct uhci_qh, node);
+	qh->link = pqh->link;
+	wmb();
+	pqh->link = LINK_TO_QH(qh);
+}
+
+/*
+ * Link a period-1 interrupt or async QH into the schedule at the
+ * correct spot in the async skeleton's list, and update the FSBR link
+ */
+static void link_async(struct uhci_hcd *uhci, struct uhci_qh *qh)
+{
+	struct uhci_qh *pqh, *lqh;
+	__le32 link_to_new_qh;
+	__le32 *extra_link = &link_to_new_qh;
+
+	/* Find the predecessor QH for our new one and insert it in the list.
+	 * The list of QHs is expected to be short, so linear search won't
+	 * take too long. */
+	list_for_each_entry_reverse(pqh, &uhci->skel_async_qh->node, node) {
+		if (pqh->skel <= qh->skel)
+			break;
+	}
+	list_add(&qh->node, &pqh->node);
+	qh->link = pqh->link;
+
+	link_to_new_qh = LINK_TO_QH(qh);
+
+	/* If this is now the first FSBR QH, take special action */
+	if (uhci->fsbr_is_on && pqh->skel < SKEL_FSBR &&
+			qh->skel >= SKEL_FSBR) {
+		lqh = list_entry(uhci->skel_async_qh->node.prev,
+				struct uhci_qh, node);
+
+		/* If the new QH is also the last one, we must unlink
+		 * the terminating skeleton QH and make the new QH point
+		 * back to itself. */
+		if (qh == lqh) {
+			qh->link = link_to_new_qh;
+			extra_link = &uhci->skel_term_qh->link;
+
+		/* Otherwise the last QH must point to the new QH */
+		} else
+			extra_link = &lqh->link;
+	}
+
+	/* Link it into the schedule */
+	wmb();
+	*extra_link = pqh->link = link_to_new_qh;
+}
+
+/*
  * Put a QH on the schedule in both hardware and software
  */
 static void uhci_activate_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
 {
-	struct uhci_qh *pqh;
-
 	WARN_ON(list_empty(&qh->queue));
 
 	/* Set the element pointer if it isn't set already.
@@ -431,18 +529,64 @@
 		return;
 	qh->state = QH_STATE_ACTIVE;
 
-	/* Move the QH from its old list to the end of the appropriate
+	/* Move the QH from its old list to the correct spot in the appropriate
 	 * skeleton's list */
 	if (qh == uhci->next_qh)
 		uhci->next_qh = list_entry(qh->node.next, struct uhci_qh,
 				node);
-	list_move_tail(&qh->node, &qh->skel->node);
+	list_del(&qh->node);
 
-	/* Link it into the schedule */
+	if (qh->skel == SKEL_ISO)
+		link_iso(uhci, qh);
+	else if (qh->skel < SKEL_ASYNC)
+		link_interrupt(uhci, qh);
+	else
+		link_async(uhci, qh);
+}
+
+/*
+ * Unlink a high-period interrupt QH from the schedule
+ */
+static void unlink_interrupt(struct uhci_hcd *uhci, struct uhci_qh *qh)
+{
+	struct uhci_qh *pqh;
+
 	pqh = list_entry(qh->node.prev, struct uhci_qh, node);
-	qh->link = pqh->link;
-	wmb();
-	pqh->link = LINK_TO_QH(qh);
+	pqh->link = qh->link;
+	mb();
+}
+
+/*
+ * Unlink a period-1 interrupt or async QH from the schedule
+ */
+static void unlink_async(struct uhci_hcd *uhci, struct uhci_qh *qh)
+{
+	struct uhci_qh *pqh, *lqh;
+	__le32 link_to_next_qh = qh->link;
+
+	pqh = list_entry(qh->node.prev, struct uhci_qh, node);
+
+	/* If this is the first FSBQ QH, take special action */
+	if (uhci->fsbr_is_on && pqh->skel < SKEL_FSBR &&
+			qh->skel >= SKEL_FSBR) {
+		lqh = list_entry(uhci->skel_async_qh->node.prev,
+				struct uhci_qh, node);
+
+		/* If this QH is also the last one, we must link in
+		 * the terminating skeleton QH. */
+		if (qh == lqh) {
+			link_to_next_qh = LINK_TO_QH(uhci->skel_term_qh);
+			uhci->skel_term_qh->link = link_to_next_qh;
+			wmb();
+			qh->link = link_to_next_qh;
+
+		/* Otherwise the last QH must point to the new first FSBR QH */
+		} else
+			lqh->link = link_to_next_qh;
+	}
+
+	pqh->link = link_to_next_qh;
+	mb();
 }
 
 /*
@@ -450,17 +594,18 @@
  */
 static void uhci_unlink_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
 {
-	struct uhci_qh *pqh;
-
 	if (qh->state == QH_STATE_UNLINKING)
 		return;
 	WARN_ON(qh->state != QH_STATE_ACTIVE || !qh->udev);
 	qh->state = QH_STATE_UNLINKING;
 
 	/* Unlink the QH from the schedule and record when we did it */
-	pqh = list_entry(qh->node.prev, struct uhci_qh, node);
-	pqh->link = qh->link;
-	mb();
+	if (qh->skel == SKEL_ISO)
+		;
+	else if (qh->skel < SKEL_ASYNC)
+		unlink_interrupt(uhci, qh);
+	else
+		unlink_async(uhci, qh);
 
 	uhci_get_current_frame_number(uhci);
 	qh->unlink_frame = uhci->frame_number;
@@ -696,6 +841,7 @@
 	dma_addr_t data = urb->transfer_dma;
 	__le32 *plink;
 	struct urb_priv *urbp = urb->hcpriv;
+	int skel;
 
 	/* The "pipe" thing contains the destination in bits 8--18 */
 	destination = (urb->pipe & PIPE_DEVEP_MASK) | USB_PID_SETUP;
@@ -796,11 +942,13 @@
 	 * isn't in the CONFIGURED state. */
 	if (urb->dev->speed == USB_SPEED_LOW ||
 			urb->dev->state != USB_STATE_CONFIGURED)
-		qh->skel = uhci->skel_ls_control_qh;
+		skel = SKEL_LS_CONTROL;
 	else {
-		qh->skel = uhci->skel_fs_control_qh;
+		skel = SKEL_FS_CONTROL;
 		uhci_add_fsbr(uhci, urb);
 	}
+	if (qh->state != QH_STATE_ACTIVE)
+		qh->skel = skel;
 
 	urb->actual_length = -8;	/* Account for the SETUP packet */
 	return 0;
@@ -930,7 +1078,7 @@
 	return -ENOMEM;
 }
 
-static inline int uhci_submit_bulk(struct uhci_hcd *uhci, struct urb *urb,
+static int uhci_submit_bulk(struct uhci_hcd *uhci, struct urb *urb,
 		struct uhci_qh *qh)
 {
 	int ret;
@@ -939,7 +1087,8 @@
 	if (urb->dev->speed == USB_SPEED_LOW)
 		return -EINVAL;
 
-	qh->skel = uhci->skel_bulk_qh;
+	if (qh->state != QH_STATE_ACTIVE)
+		qh->skel = SKEL_BULK;
 	ret = uhci_submit_common(uhci, urb, qh);
 	if (ret == 0)
 		uhci_add_fsbr(uhci, urb);
@@ -967,7 +1116,7 @@
 		if (exponent < 0)
 			return -EINVAL;
 		qh->period = 1 << exponent;
-		qh->skel = uhci->skelqh[UHCI_SKEL_INDEX(exponent)];
+		qh->skel = SKEL_INDEX(exponent);
 
 		/* For now, interrupt phase is fixed by the layout
 		 * of the QH lists. */
@@ -1215,7 +1364,7 @@
 		qh->iso_status = 0;
 	}
 
-	qh->skel = uhci->skel_iso_qh;
+	qh->skel = SKEL_ISO;
 	if (!qh->bandwidth_reserved)
 		uhci_reserve_bandwidth(uhci, qh);
 	return 0;