I/OAT: Add support for version 2 of ioatdma device

Add support for version 2 of the ioatdma device.  This device handles
the descriptor chain and DCA services slightly differently:
 - Instead of moving the dma descriptors between a busy and an idle chain,
   this new version uses a single circular chain so that we don't have
   rewrite the next_descriptor pointers as we add new requests, and the
   device doesn't need to re-read the last descriptor.
 - The new device has the DCA tags defined internally instead of needing
   them defined statically.

Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Cc: "Williams, Dan J" <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
index ba98571..0fa8a98 100644
--- a/drivers/dma/ioat_dca.c
+++ b/drivers/dma/ioat_dca.c
@@ -261,3 +261,167 @@
 	return dca;
 }
 
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+			writel(id | IOAT_DCA_GREQID_VALID,
+			       ioatdca->iobase + global_req_table + (i * 4));
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+				      struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+			writel(0, ioatdca->iobase + global_req_table + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu)
+{
+	u8 tag;
+
+	tag = ioat_dca_get_tag(dca, cpu);
+	tag = (~tag) & 0x1F;
+	return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+	.add_requester		= ioat2_dca_add_requester,
+	.remove_requester	= ioat2_dca_remove_requester,
+	.get_tag		= ioat2_dca_get_tag,
+};
+
+static int ioat2_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+	int slots = 0;
+	u32 req;
+	u16 global_req_table;
+
+	global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+	if (global_req_table == 0)
+		return 0;
+	do {
+		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+		slots++;
+	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+	return slots;
+}
+
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	int slots;
+	int i;
+	int err;
+	u32 tag_map;
+	u16 dca_offset;
+	u16 csi_fsb_control;
+	u16 pcie_control;
+	u8 bit;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+	if (dca_offset == 0)
+		return NULL;
+
+	slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+	if (slots == 0)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat2_dca_ops,
+				 sizeof(*ioatdca)
+				      + (sizeof(struct ioat_dca_slot) * slots));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->iobase = iobase;
+	ioatdca->dca_base = iobase + dca_offset;
+	ioatdca->max_requesters = slots;
+
+	/* some bios might not know to turn these on */
+	csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+	if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+		csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+		writew(csi_fsb_control,
+		       ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+	}
+	pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+	if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+		pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+		writew(pcie_control,
+		       ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+	}
+
+
+	/* TODO version, compatibility and configuration checks */
+
+	/* copy out the APIC to DCA tag map */
+	tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+	for (i = 0; i < 5; i++) {
+		bit = (tag_map >> (4 * i)) & 0x0f;
+		if (bit < 8)
+			ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+		else
+			ioatdca->tag_map[i] = 0;
+	}
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}