spi: dw: Locally wait for the DMA transfers completion

In general each DMA-based SPI transfer can be split up into two stages:
DMA data transmission/reception and SPI-bus transmission/reception. DMA
asynchronous transactions completion can be tracked by means of the
DMA async Tx-descriptor completion callback. But that callback being
called indicates that the DMA transfer has been finished, it doesn't
mean that SPI data transmission is also done. Moreover in fact it isn't
for at least Tx-only SPI transfers. Upon DMA transfer completion some
data is left in the Tx FIFO and being pushed out by the SPI controller.
So in order to make sure that an SPI transfer is completely pushed to the
SPI-bus, the driver has to wait for both DMA transaction and the SPI-bus
transmission/reception are finished. Note if there is a way to
asynchronously track the former event by means of the DMA async Tx
callback, there isn't easy one for the later (IRQ-based solution won't
work since SPI controller doesn't notify about Rx FIFO being empty).

The DMA transfer completion callback isn't suitable to wait for the
SPI controller activity finish either. The callback might (in case of DW
DMAC it will) be called in the tasklet context. Waiting for the SPI
controller to complete the transfer might take a considerable amount of
time since SPI-bus might be pretty slow. In this case delaying the
execution in the tasklet atomic context might cause significant system
performance drop.

So to speak the best option we've got to solve the problem is to
consequently wait for both stages being finished in the locally
implemented SPI transfer execution procedure even if it costs us of the
local wait-function re-implementation. In this case we don't need to use
the SPI-core transfer-wait functionality, but we'll make sure that
all DMA and SPI-bus transactions are completely finished before the
SPI-core transfer_one callback returns. In this commit we provide an
implementation of the DMA-transfers completion wait functionality.
The DW APB SSI DMA-specific SPI transfer_one function waits for both
Tx and Rx DMA transfers being finished, and only then exits with zero
returned signalling to the SPI core that the SPI transfer is finished.
This implementation is fully equivalent to the currently used
DMA-execution-SPI-core-wait algorithm. The SPI-bus transmission/reception
wait methods will be added in the follow-up commits.

Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Cc: Georgy Vlasov <Georgy.Vlasov@baikalelectronics.ru>
Cc: Ramil Zaripov <Ramil.Zaripov@baikalelectronics.ru>
Cc: Alexey Malahov <Alexey.Malahov@baikalelectronics.ru>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: devicetree@vger.kernel.org
Link: https://lore.kernel.org/r/20200529131205.31838-4-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Mark Brown <broonie@kernel.org>
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 7ff1aca..355b641 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -11,9 +11,11 @@
 #include "spi-dw.h"
 
 #ifdef CONFIG_SPI_DW_MID_DMA
+#include <linux/completion.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/irqreturn.h>
+#include <linux/jiffies.h>
 #include <linux/pci.h>
 #include <linux/platform_data/dma-dw.h>
 
@@ -66,6 +68,8 @@ static int mid_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
 	dws->master->dma_rx = dws->rxchan;
 	dws->master->dma_tx = dws->txchan;
 
+	init_completion(&dws->dma_completion);
+
 	return 0;
 
 free_rxchan:
@@ -91,6 +95,8 @@ static int mid_spi_dma_init_generic(struct device *dev, struct dw_spi *dws)
 	dws->master->dma_rx = dws->rxchan;
 	dws->master->dma_tx = dws->txchan;
 
+	init_completion(&dws->dma_completion);
+
 	return 0;
 }
 
@@ -121,7 +127,7 @@ static irqreturn_t dma_transfer(struct dw_spi *dws)
 
 	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
 	dws->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(dws->master);
+	complete(&dws->dma_completion);
 	return IRQ_HANDLED;
 }
 
@@ -142,6 +148,29 @@ static enum dma_slave_buswidth convert_dma_width(u8 n_bytes) {
 	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
 }
 
+static int dw_spi_dma_wait(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	unsigned long long ms;
+
+	ms = xfer->len * MSEC_PER_SEC * BITS_PER_BYTE;
+	do_div(ms, xfer->effective_speed_hz);
+	ms += ms + 200;
+
+	if (ms > UINT_MAX)
+		ms = UINT_MAX;
+
+	ms = wait_for_completion_timeout(&dws->dma_completion,
+					 msecs_to_jiffies(ms));
+
+	if (ms == 0) {
+		dev_err(&dws->master->cur_msg->spi->dev,
+			"DMA transaction timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
 /*
  * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
  * channel will clear a corresponding bit.
@@ -155,7 +184,7 @@ static void dw_spi_dma_tx_done(void *arg)
 		return;
 
 	dw_writel(dws, DW_SPI_DMACR, 0);
-	spi_finalize_current_transfer(dws->master);
+	complete(&dws->dma_completion);
 }
 
 static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws,
@@ -204,7 +233,7 @@ static void dw_spi_dma_rx_done(void *arg)
 		return;
 
 	dw_writel(dws, DW_SPI_DMACR, 0);
-	spi_finalize_current_transfer(dws->master);
+	complete(&dws->dma_completion);
 }
 
 static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
@@ -260,6 +289,8 @@ static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
 	/* Set the interrupt mask */
 	spi_umask_intr(dws, imr);
 
+	reinit_completion(&dws->dma_completion);
+
 	dws->transfer_handler = dma_transfer;
 
 	return 0;
@@ -268,6 +299,7 @@ static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
 static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
 {
 	struct dma_async_tx_descriptor *txdesc, *rxdesc;
+	int ret;
 
 	/* Prepare the TX dma transfer */
 	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
@@ -288,7 +320,11 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
 		dma_async_issue_pending(dws->txchan);
 	}
 
-	return 1;
+	ret = dw_spi_dma_wait(dws, xfer);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static void mid_spi_dma_stop(struct dw_spi *dws)