spi: atmel: Refactor spi-atmel to use SPI framework queue

Replace the deprecated master->transfer with transfer_one_message()
and allow the SPI subsystem handle all the queuing of messages.

Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
Tested-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index b96f9a8..b0842f7 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -189,6 +189,8 @@
  */
 #define DMA_MIN_BYTES	16
 
+#define SPI_DMA_TIMEOUT		(msecs_to_jiffies(1000))
+
 struct atmel_spi_dma {
 	struct dma_chan			*chan_rx;
 	struct dma_chan			*chan_tx;
@@ -220,17 +222,13 @@
 	int			irq;
 	struct clk		*clk;
 	struct platform_device	*pdev;
-	struct spi_device	*stay;
 
-	u8			stopping;
-	struct list_head	queue;
-	struct tasklet_struct	tasklet;
 	struct spi_transfer	*current_transfer;
 	unsigned long		current_remaining_bytes;
-	struct spi_transfer	*next_transfer;
-	unsigned long		next_remaining_bytes;
 	int			done_status;
 
+	struct completion	xfer_completion;
+
 	/* scratch buffer */
 	void			*buffer;
 	dma_addr_t		buffer_dma;
@@ -241,6 +239,9 @@
 	bool			use_pdc;
 	/* dmaengine data */
 	struct atmel_spi_dma	dma;
+
+	bool			keep_cs;
+	bool			cs_active;
 };
 
 /* Controller-specific per-slave state */
@@ -376,17 +377,6 @@
 	return as->use_dma && xfer->len >= DMA_MIN_BYTES;
 }
 
-static inline int atmel_spi_xfer_is_last(struct spi_message *msg,
-					struct spi_transfer *xfer)
-{
-	return msg->transfers.prev == &xfer->transfer_list;
-}
-
-static inline int atmel_spi_xfer_can_be_chained(struct spi_transfer *xfer)
-{
-	return xfer->delay_usecs == 0 && !xfer->cs_change;
-}
-
 static int atmel_spi_dma_slave_config(struct atmel_spi *as,
 				struct dma_slave_config *slave_config,
 				u8 bits_per_word)
@@ -513,23 +503,20 @@
 	struct spi_master	*master = data;
 	struct atmel_spi	*as = spi_master_get_devdata(master);
 
-	/* trigger SPI tasklet */
-	tasklet_schedule(&as->tasklet);
+	complete(&as->xfer_completion);
 }
 
 /*
  * Next transfer using PIO.
- * lock is held, spi tasklet is blocked
  */
 static void atmel_spi_next_xfer_pio(struct spi_master *master,
 				struct spi_transfer *xfer)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
+	unsigned long xfer_pos = xfer->len - as->current_remaining_bytes;
 
 	dev_vdbg(master->dev.parent, "atmel_spi_next_xfer_pio\n");
 
-	as->current_remaining_bytes = xfer->len;
-
 	/* Make sure data is not remaining in RDR */
 	spi_readl(as, RDR);
 	while (spi_readl(as, SR) & SPI_BIT(RDRF)) {
@@ -537,13 +524,14 @@
 		cpu_relax();
 	}
 
-	if (xfer->tx_buf)
+	if (xfer->tx_buf) {
 		if (xfer->bits_per_word > 8)
-			spi_writel(as, TDR, *(u16 *)(xfer->tx_buf));
+			spi_writel(as, TDR, *(u16 *)(xfer->tx_buf + xfer_pos));
 		else
-			spi_writel(as, TDR, *(u8 *)(xfer->tx_buf));
-	else
+			spi_writel(as, TDR, *(u8 *)(xfer->tx_buf + xfer_pos));
+	} else {
 		spi_writel(as, TDR, 0);
+	}
 
 	dev_dbg(master->dev.parent,
 		"  start pio xfer %p: len %u tx %p rx %p bitpw %d\n",
@@ -556,7 +544,6 @@
 
 /*
  * Submit next transfer for DMA.
- * lock is held, spi tasklet is blocked
  */
 static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 				struct spi_transfer *xfer,
@@ -747,71 +734,37 @@
  * lock is held, spi irq is blocked
  */
 static void atmel_spi_pdc_next_xfer(struct spi_master *master,
-				struct spi_message *msg)
+					struct spi_message *msg,
+					struct spi_transfer *xfer)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct spi_transfer	*xfer;
-	u32			len, remaining;
-	u32			ieval;
+	u32			len;
 	dma_addr_t		tx_dma, rx_dma;
 
-	if (!as->current_transfer)
-		xfer = list_entry(msg->transfers.next,
-				struct spi_transfer, transfer_list);
-	else if (!as->next_transfer)
-		xfer = list_entry(as->current_transfer->transfer_list.next,
-				struct spi_transfer, transfer_list);
-	else
-		xfer = NULL;
+	spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
 
-	if (xfer) {
-		spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
+	len = as->current_remaining_bytes;
+	atmel_spi_next_xfer_data(master, xfer, &tx_dma, &rx_dma, &len);
+	as->current_remaining_bytes -= len;
 
-		len = xfer->len;
+	spi_writel(as, RPR, rx_dma);
+	spi_writel(as, TPR, tx_dma);
+
+	if (msg->spi->bits_per_word > 8)
+		len >>= 1;
+	spi_writel(as, RCR, len);
+	spi_writel(as, TCR, len);
+
+	dev_dbg(&msg->spi->dev,
+		"  start xfer %p: len %u tx %p/%08llx rx %p/%08llx\n",
+		xfer, xfer->len, xfer->tx_buf,
+		(unsigned long long)xfer->tx_dma, xfer->rx_buf,
+		(unsigned long long)xfer->rx_dma);
+
+	if (as->current_remaining_bytes) {
+		len = as->current_remaining_bytes;
 		atmel_spi_next_xfer_data(master, xfer, &tx_dma, &rx_dma, &len);
-		remaining = xfer->len - len;
-
-		spi_writel(as, RPR, rx_dma);
-		spi_writel(as, TPR, tx_dma);
-
-		if (msg->spi->bits_per_word > 8)
-			len >>= 1;
-		spi_writel(as, RCR, len);
-		spi_writel(as, TCR, len);
-
-		atmel_spi_set_xfer_speed(as, msg->spi, xfer);
-
-		dev_dbg(&msg->spi->dev,
-			"  start xfer %p: len %u tx %p/%08llx rx %p/%08llx\n",
-			xfer, xfer->len, xfer->tx_buf,
-			(unsigned long long)xfer->tx_dma, xfer->rx_buf,
-			(unsigned long long)xfer->rx_dma);
-	} else {
-		xfer = as->next_transfer;
-		remaining = as->next_remaining_bytes;
-	}
-
-	as->current_transfer = xfer;
-	as->current_remaining_bytes = remaining;
-
-	if (remaining > 0)
-		len = remaining;
-	else if (!atmel_spi_xfer_is_last(msg, xfer)
-			&& atmel_spi_xfer_can_be_chained(xfer)) {
-		xfer = list_entry(xfer->transfer_list.next,
-				struct spi_transfer, transfer_list);
-		len = xfer->len;
-	} else
-		xfer = NULL;
-
-	as->next_transfer = xfer;
-
-	if (xfer) {
-		u32	total;
-
-		total = len;
-		atmel_spi_next_xfer_data(master, xfer, &tx_dma, &rx_dma, &len);
-		as->next_remaining_bytes = total - len;
+		as->current_remaining_bytes -= len;
 
 		spi_writel(as, RNPR, rx_dma);
 		spi_writel(as, TNPR, tx_dma);
@@ -826,11 +779,6 @@
 			xfer, xfer->len, xfer->tx_buf,
 			(unsigned long long)xfer->tx_dma, xfer->rx_buf,
 			(unsigned long long)xfer->rx_dma);
-		ieval = SPI_BIT(ENDRX) | SPI_BIT(OVRES);
-	} else {
-		spi_writel(as, RNCR, 0);
-		spi_writel(as, TNCR, 0);
-		ieval = SPI_BIT(RXBUFF) | SPI_BIT(ENDRX) | SPI_BIT(OVRES);
 	}
 
 	/* REVISIT: We're waiting for ENDRX before we start the next
@@ -843,84 +791,11 @@
 	 *
 	 * It should be doable, though. Just not now...
 	 */
-	spi_writel(as, IER, ieval);
+	spi_writel(as, IER, SPI_BIT(ENDRX) | SPI_BIT(OVRES));
 	spi_writel(as, PTCR, SPI_BIT(TXTEN) | SPI_BIT(RXTEN));
 }
 
 /*
- * Choose way to submit next transfer and start it.
- * lock is held, spi tasklet is blocked
- */
-static void atmel_spi_dma_next_xfer(struct spi_master *master,
-				struct spi_message *msg)
-{
-	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct spi_transfer	*xfer;
-	u32	remaining, len;
-
-	remaining = as->current_remaining_bytes;
-	if (remaining) {
-		xfer = as->current_transfer;
-		len = remaining;
-	} else {
-		if (!as->current_transfer)
-			xfer = list_entry(msg->transfers.next,
-				struct spi_transfer, transfer_list);
-		else
-			xfer = list_entry(
-				as->current_transfer->transfer_list.next,
-					struct spi_transfer, transfer_list);
-
-		as->current_transfer = xfer;
-		len = xfer->len;
-		atmel_spi_set_xfer_speed(as, msg->spi, xfer);
-	}
-
-	if (atmel_spi_use_dma(as, xfer)) {
-		u32 total = len;
-		if (!atmel_spi_next_xfer_dma_submit(master, xfer, &len)) {
-			as->current_remaining_bytes = total - len;
-			return;
-		} else {
-			dev_err(&msg->spi->dev, "unable to use DMA, fallback to PIO\n");
-		}
-	}
-
-	/* use PIO if error appened using DMA */
-	atmel_spi_next_xfer_pio(master, xfer);
-}
-
-static void atmel_spi_next_message(struct spi_master *master)
-{
-	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct spi_message	*msg;
-	struct spi_device	*spi;
-
-	BUG_ON(as->current_transfer);
-
-	msg = list_entry(as->queue.next, struct spi_message, queue);
-	spi = msg->spi;
-
-	dev_dbg(master->dev.parent, "start message %p for %s\n",
-			msg, dev_name(&spi->dev));
-
-	/* select chip if it's not still active */
-	if (as->stay) {
-		if (as->stay != spi) {
-			cs_deactivate(as, as->stay);
-			cs_activate(as, spi);
-		}
-		as->stay = NULL;
-	} else
-		cs_activate(as, spi);
-
-	if (as->use_pdc)
-		atmel_spi_pdc_next_xfer(master, msg);
-	else
-		atmel_spi_dma_next_xfer(master, msg);
-}
-
-/*
  * For DMA, tx_buf/tx_dma have the same relationship as rx_buf/rx_dma:
  *  - The buffer is either valid for CPU access, else NULL
  *  - If the buffer is valid, so is its DMA address
@@ -975,41 +850,7 @@
 	spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
 }
 
-static void
-atmel_spi_msg_done(struct spi_master *master, struct atmel_spi *as,
-		struct spi_message *msg, int stay)
-{
-	if (!stay || as->done_status < 0)
-		cs_deactivate(as, msg->spi);
-	else
-		as->stay = msg->spi;
-
-	list_del(&msg->queue);
-	msg->status = as->done_status;
-
-	dev_dbg(master->dev.parent,
-		"xfer complete: %u bytes transferred\n",
-		msg->actual_length);
-
-	atmel_spi_unlock(as);
-	msg->complete(msg->context);
-	atmel_spi_lock(as);
-
-	as->current_transfer = NULL;
-	as->next_transfer = NULL;
-	as->done_status = 0;
-
-	/* continue if needed */
-	if (list_empty(&as->queue) || as->stopping) {
-		if (as->use_pdc)
-			atmel_spi_disable_pdc_transfer(as);
-	} else {
-		atmel_spi_next_message(master);
-	}
-}
-
 /* Called from IRQ
- * lock is held
  *
  * Must update "current_remaining_bytes" to keep track of data
  * to transfer.
@@ -1017,9 +858,7 @@
 static void
 atmel_spi_pump_pio_data(struct atmel_spi *as, struct spi_transfer *xfer)
 {
-	u8		*txp;
 	u8		*rxp;
-	u16		*txp16;
 	u16		*rxp16;
 	unsigned long	xfer_pos = xfer->len - as->current_remaining_bytes;
 
@@ -1041,96 +880,12 @@
 	} else {
 		as->current_remaining_bytes--;
 	}
-
-	if (as->current_remaining_bytes) {
-		if (xfer->tx_buf) {
-			if (xfer->bits_per_word > 8) {
-				txp16 = (u16 *)(((u8 *)xfer->tx_buf)
-							+ xfer_pos + 2);
-				spi_writel(as, TDR, *txp16);
-			} else {
-				txp = ((u8 *)xfer->tx_buf) + xfer_pos + 1;
-				spi_writel(as, TDR, *txp);
-			}
-		} else {
-			spi_writel(as, TDR, 0);
-		}
-	}
-}
-
-/* Tasklet
- * Called from DMA callback + pio transfer and overrun IRQ.
- */
-static void atmel_spi_tasklet_func(unsigned long data)
-{
-	struct spi_master	*master = (struct spi_master *)data;
-	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct spi_message	*msg;
-	struct spi_transfer	*xfer;
-
-	dev_vdbg(master->dev.parent, "atmel_spi_tasklet_func\n");
-
-	atmel_spi_lock(as);
-
-	xfer = as->current_transfer;
-
-	if (xfer == NULL)
-		/* already been there */
-		goto tasklet_out;
-
-	msg = list_entry(as->queue.next, struct spi_message, queue);
-
-	if (as->current_remaining_bytes == 0) {
-		if (as->done_status < 0) {
-			/* error happened (overrun) */
-			if (atmel_spi_use_dma(as, xfer))
-				atmel_spi_stop_dma(as);
-		} else {
-			/* only update length if no error */
-			msg->actual_length += xfer->len;
-		}
-
-		if (atmel_spi_use_dma(as, xfer))
-			if (!msg->is_dma_mapped)
-				atmel_spi_dma_unmap_xfer(master, xfer);
-
-		if (xfer->delay_usecs)
-			udelay(xfer->delay_usecs);
-
-		if (atmel_spi_xfer_is_last(msg, xfer) || as->done_status < 0) {
-			/* report completed (or erroneous) message */
-			atmel_spi_msg_done(master, as, msg, xfer->cs_change);
-		} else {
-			if (xfer->cs_change) {
-				cs_deactivate(as, msg->spi);
-				udelay(1);
-				cs_activate(as, msg->spi);
-			}
-
-			/*
-			 * Not done yet. Submit the next transfer.
-			 *
-			 * FIXME handle protocol options for xfer
-			 */
-			atmel_spi_dma_next_xfer(master, msg);
-		}
-	} else {
-		/*
-		 * Keep going, we still have data to send in
-		 * the current transfer.
-		 */
-		atmel_spi_dma_next_xfer(master, msg);
-	}
-
-tasklet_out:
-	atmel_spi_unlock(as);
 }
 
 /* Interrupt
  *
  * No need for locking in this Interrupt handler: done_status is the
- * only information modified. What we need is the update of this field
- * before tasklet runs. This is ensured by using barrier.
+ * only information modified.
  */
 static irqreturn_t
 atmel_spi_pio_interrupt(int irq, void *dev_id)
@@ -1158,8 +913,6 @@
 		 *
 		 * We will also not process any remaning transfers in
 		 * the message.
-		 *
-		 * All actions are done in tasklet with done_status indication
 		 */
 		as->done_status = -EIO;
 		smp_wmb();
@@ -1167,7 +920,7 @@
 		/* Clear any overrun happening while cleaning up */
 		spi_readl(as, SR);
 
-		tasklet_schedule(&as->tasklet);
+		complete(&as->xfer_completion);
 
 	} else if (pending & SPI_BIT(RDRF)) {
 		atmel_spi_lock(as);
@@ -1176,11 +929,10 @@
 			ret = IRQ_HANDLED;
 			xfer = as->current_transfer;
 			atmel_spi_pump_pio_data(as, xfer);
-			if (!as->current_remaining_bytes) {
-				/* no more data to xfer, kick tasklet */
+			if (!as->current_remaining_bytes)
 				spi_writel(as, IDR, pending);
-				tasklet_schedule(&as->tasklet);
-			}
+
+			complete(&as->xfer_completion);
 		}
 
 		atmel_spi_unlock(as);
@@ -1198,116 +950,35 @@
 {
 	struct spi_master	*master = dev_id;
 	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct spi_message	*msg;
-	struct spi_transfer	*xfer;
 	u32			status, pending, imr;
 	int			ret = IRQ_NONE;
 
-	atmel_spi_lock(as);
-
-	xfer = as->current_transfer;
-	msg = list_entry(as->queue.next, struct spi_message, queue);
-
 	imr = spi_readl(as, IMR);
 	status = spi_readl(as, SR);
 	pending = status & imr;
 
 	if (pending & SPI_BIT(OVRES)) {
-		int timeout;
 
 		ret = IRQ_HANDLED;
 
 		spi_writel(as, IDR, (SPI_BIT(RXBUFF) | SPI_BIT(ENDRX)
 				     | SPI_BIT(OVRES)));
 
-		/*
-		 * When we get an overrun, we disregard the current
-		 * transfer. Data will not be copied back from any
-		 * bounce buffer and msg->actual_len will not be
-		 * updated with the last xfer.
-		 *
-		 * We will also not process any remaning transfers in
-		 * the message.
-		 *
-		 * First, stop the transfer and unmap the DMA buffers.
-		 */
-		spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
-		if (!msg->is_dma_mapped)
-			atmel_spi_dma_unmap_xfer(master, xfer);
-
-		/* REVISIT: udelay in irq is unfriendly */
-		if (xfer->delay_usecs)
-			udelay(xfer->delay_usecs);
-
-		dev_warn(master->dev.parent, "overrun (%u/%u remaining)\n",
-			 spi_readl(as, TCR), spi_readl(as, RCR));
-
-		/*
-		 * Clean up DMA registers and make sure the data
-		 * registers are empty.
-		 */
-		spi_writel(as, RNCR, 0);
-		spi_writel(as, TNCR, 0);
-		spi_writel(as, RCR, 0);
-		spi_writel(as, TCR, 0);
-		for (timeout = 1000; timeout; timeout--)
-			if (spi_readl(as, SR) & SPI_BIT(TXEMPTY))
-				break;
-		if (!timeout)
-			dev_warn(master->dev.parent,
-				 "timeout waiting for TXEMPTY");
-		while (spi_readl(as, SR) & SPI_BIT(RDRF))
-			spi_readl(as, RDR);
-
 		/* Clear any overrun happening while cleaning up */
 		spi_readl(as, SR);
 
 		as->done_status = -EIO;
-		atmel_spi_msg_done(master, as, msg, 0);
+
+		complete(&as->xfer_completion);
+
 	} else if (pending & (SPI_BIT(RXBUFF) | SPI_BIT(ENDRX))) {
 		ret = IRQ_HANDLED;
 
 		spi_writel(as, IDR, pending);
 
-		if (as->current_remaining_bytes == 0) {
-			msg->actual_length += xfer->len;
-
-			if (!msg->is_dma_mapped)
-				atmel_spi_dma_unmap_xfer(master, xfer);
-
-			/* REVISIT: udelay in irq is unfriendly */
-			if (xfer->delay_usecs)
-				udelay(xfer->delay_usecs);
-
-			if (atmel_spi_xfer_is_last(msg, xfer)) {
-				/* report completed message */
-				atmel_spi_msg_done(master, as, msg,
-						xfer->cs_change);
-			} else {
-				if (xfer->cs_change) {
-					cs_deactivate(as, msg->spi);
-					udelay(1);
-					cs_activate(as, msg->spi);
-				}
-
-				/*
-				 * Not done yet. Submit the next transfer.
-				 *
-				 * FIXME handle protocol options for xfer
-				 */
-				atmel_spi_pdc_next_xfer(master, msg);
-			}
-		} else {
-			/*
-			 * Keep going, we still have data to send in
-			 * the current transfer.
-			 */
-			atmel_spi_pdc_next_xfer(master, msg);
-		}
+		complete(&as->xfer_completion);
 	}
 
-	atmel_spi_unlock(as);
-
 	return ret;
 }
 
@@ -1322,9 +993,6 @@
 
 	as = spi_master_get_devdata(spi->master);
 
-	if (as->stopping)
-		return -ESHUTDOWN;
-
 	if (spi->chip_select > spi->master->num_chipselect) {
 		dev_dbg(&spi->dev,
 				"setup: invalid chipselect %u (%u defined)\n",
@@ -1376,12 +1044,6 @@
 		asd->npcs_pin = npcs_pin;
 		spi->controller_state = asd;
 		gpio_direction_output(npcs_pin, !(spi->mode & SPI_CS_HIGH));
-	} else {
-		atmel_spi_lock(as);
-		if (as->stay == spi)
-			as->stay = NULL;
-		cs_deactivate(as, spi);
-		atmel_spi_unlock(as);
 	}
 
 	asd->csr = csr;
@@ -1396,97 +1058,218 @@
 	return 0;
 }
 
-static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg)
+static int atmel_spi_one_transfer(struct spi_master *master,
+					struct spi_message *msg,
+					struct spi_transfer *xfer)
 {
 	struct atmel_spi	*as;
-	struct spi_transfer	*xfer;
-	struct device		*controller = spi->master->dev.parent;
+	struct spi_device	*spi = msg->spi;
 	u8			bits;
+	u32			len;
 	struct atmel_spi_device	*asd;
+	int			timeout;
+	int			ret;
 
-	as = spi_master_get_devdata(spi->master);
+	as = spi_master_get_devdata(master);
 
-	dev_dbg(controller, "new message %p submitted for %s\n",
-			msg, dev_name(&spi->dev));
+	if (!(xfer->tx_buf || xfer->rx_buf) && xfer->len) {
+		dev_dbg(&spi->dev, "missing rx or tx buf\n");
+		return -EINVAL;
+	}
+
+	if (xfer->bits_per_word) {
+		asd = spi->controller_state;
+		bits = (asd->csr >> 4) & 0xf;
+		if (bits != xfer->bits_per_word - 8) {
+			dev_dbg(&spi->dev,
+			"you can't yet change bits_per_word in transfers\n");
+			return -ENOPROTOOPT;
+		}
+	}
+
+	if (xfer->bits_per_word > 8) {
+		if (xfer->len % 2) {
+			dev_dbg(&spi->dev,
+			"buffer len should be 16 bits aligned\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * DMA map early, for performance (empties dcache ASAP) and
+	 * better fault reporting.
+	 */
+	if ((!msg->is_dma_mapped)
+		&& (atmel_spi_use_dma(as, xfer)	|| as->use_pdc)) {
+		if (atmel_spi_dma_map_xfer(as, xfer) < 0)
+			return -ENOMEM;
+	}
+
+	atmel_spi_set_xfer_speed(as, msg->spi, xfer);
+
+	as->done_status = 0;
+	as->current_transfer = xfer;
+	as->current_remaining_bytes = xfer->len;
+	while (as->current_remaining_bytes) {
+		reinit_completion(&as->xfer_completion);
+
+		if (as->use_pdc) {
+			atmel_spi_pdc_next_xfer(master, msg, xfer);
+		} else if (atmel_spi_use_dma(as, xfer)) {
+			len = as->current_remaining_bytes;
+			ret = atmel_spi_next_xfer_dma_submit(master,
+								xfer, &len);
+			if (ret) {
+				dev_err(&spi->dev,
+					"unable to use DMA, fallback to PIO\n");
+				atmel_spi_next_xfer_pio(master, xfer);
+			} else {
+				as->current_remaining_bytes -= len;
+			}
+		} else {
+			atmel_spi_next_xfer_pio(master, xfer);
+		}
+
+		ret = wait_for_completion_timeout(&as->xfer_completion,
+							SPI_DMA_TIMEOUT);
+		if (WARN_ON(ret == 0)) {
+			dev_err(&spi->dev,
+				"spi trasfer timeout, err %d\n", ret);
+			as->done_status = -EIO;
+		} else {
+			ret = 0;
+		}
+
+		if (as->done_status)
+			break;
+	}
+
+	if (as->done_status) {
+		if (as->use_pdc) {
+			dev_warn(master->dev.parent,
+				"overrun (%u/%u remaining)\n",
+				spi_readl(as, TCR), spi_readl(as, RCR));
+
+			/*
+			 * Clean up DMA registers and make sure the data
+			 * registers are empty.
+			 */
+			spi_writel(as, RNCR, 0);
+			spi_writel(as, TNCR, 0);
+			spi_writel(as, RCR, 0);
+			spi_writel(as, TCR, 0);
+			for (timeout = 1000; timeout; timeout--)
+				if (spi_readl(as, SR) & SPI_BIT(TXEMPTY))
+					break;
+			if (!timeout)
+				dev_warn(master->dev.parent,
+					 "timeout waiting for TXEMPTY");
+			while (spi_readl(as, SR) & SPI_BIT(RDRF))
+				spi_readl(as, RDR);
+
+			/* Clear any overrun happening while cleaning up */
+			spi_readl(as, SR);
+
+		} else if (atmel_spi_use_dma(as, xfer)) {
+			atmel_spi_stop_dma(as);
+		}
+
+		if (!msg->is_dma_mapped
+			&& (atmel_spi_use_dma(as, xfer) || as->use_pdc))
+			atmel_spi_dma_unmap_xfer(master, xfer);
+
+		return 0;
+
+	} else {
+		/* only update length if no error */
+		msg->actual_length += xfer->len;
+	}
+
+	if (!msg->is_dma_mapped
+		&& (atmel_spi_use_dma(as, xfer) || as->use_pdc))
+		atmel_spi_dma_unmap_xfer(master, xfer);
+
+	if (xfer->delay_usecs)
+		udelay(xfer->delay_usecs);
+
+	if (xfer->cs_change) {
+		if (list_is_last(&xfer->transfer_list,
+				 &msg->transfers)) {
+			as->keep_cs = true;
+		} else {
+			as->cs_active = !as->cs_active;
+			if (as->cs_active)
+				cs_activate(as, msg->spi);
+			else
+				cs_deactivate(as, msg->spi);
+		}
+	}
+
+	return 0;
+}
+
+static int atmel_spi_transfer_one_message(struct spi_master *master,
+						struct spi_message *msg)
+{
+	struct atmel_spi *as;
+	struct spi_transfer *xfer;
+	struct spi_device *spi = msg->spi;
+	int ret = 0;
+
+	as = spi_master_get_devdata(master);
+
+	dev_dbg(&spi->dev, "new message %p submitted for %s\n",
+					msg, dev_name(&spi->dev));
 
 	if (unlikely(list_empty(&msg->transfers)))
 		return -EINVAL;
 
-	if (as->stopping)
-		return -ESHUTDOWN;
+	atmel_spi_lock(as);
+	cs_activate(as, spi);
+
+	as->cs_active = true;
+	as->keep_cs = false;
+
+	msg->status = 0;
+	msg->actual_length = 0;
 
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		if (!(xfer->tx_buf || xfer->rx_buf) && xfer->len) {
-			dev_dbg(&spi->dev, "missing rx or tx buf\n");
-			return -EINVAL;
-		}
-
-		if (xfer->bits_per_word) {
-			asd = spi->controller_state;
-			bits = (asd->csr >> 4) & 0xf;
-			if (bits != xfer->bits_per_word - 8) {
-				dev_dbg(&spi->dev,
-					"you can't yet change bits_per_word in transfers\n");
-				return -ENOPROTOOPT;
-			}
-		}
-
-		if (xfer->bits_per_word > 8) {
-			if (xfer->len % 2) {
-				dev_dbg(&spi->dev, "buffer len should be 16 bits aligned\n");
-				return -EINVAL;
-			}
-		}
-
-		/*
-		 * DMA map early, for performance (empties dcache ASAP) and
-		 * better fault reporting.
-		 */
-		if ((!msg->is_dma_mapped) && (atmel_spi_use_dma(as, xfer)
-			|| as->use_pdc)) {
-			if (atmel_spi_dma_map_xfer(as, xfer) < 0)
-				return -ENOMEM;
-		}
+		ret = atmel_spi_one_transfer(master, msg, xfer);
+		if (ret)
+			goto msg_done;
 	}
 
-#ifdef VERBOSE
+	if (as->use_pdc)
+		atmel_spi_disable_pdc_transfer(as);
+
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		dev_dbg(controller,
+		dev_dbg(&spi->dev,
 			"  xfer %p: len %u tx %p/%08x rx %p/%08x\n",
 			xfer, xfer->len,
 			xfer->tx_buf, xfer->tx_dma,
 			xfer->rx_buf, xfer->rx_dma);
 	}
-#endif
 
-	msg->status = -EINPROGRESS;
-	msg->actual_length = 0;
+msg_done:
+	if (!as->keep_cs)
+		cs_deactivate(as, msg->spi);
 
-	atmel_spi_lock(as);
-	list_add_tail(&msg->queue, &as->queue);
-	if (!as->current_transfer)
-		atmel_spi_next_message(spi->master);
 	atmel_spi_unlock(as);
 
-	return 0;
+	msg->status = as->done_status;
+	spi_finalize_current_message(spi->master);
+
+	return ret;
 }
 
 static void atmel_spi_cleanup(struct spi_device *spi)
 {
-	struct atmel_spi	*as = spi_master_get_devdata(spi->master);
 	struct atmel_spi_device	*asd = spi->controller_state;
 	unsigned		gpio = (unsigned) spi->controller_data;
 
 	if (!asd)
 		return;
 
-	atmel_spi_lock(as);
-	if (as->stay == spi) {
-		as->stay = NULL;
-		cs_deactivate(as, spi);
-	}
-	atmel_spi_unlock(as);
-
 	spi->controller_state = NULL;
 	gpio_free(gpio);
 	kfree(asd);
@@ -1545,7 +1328,7 @@
 	master->bus_num = pdev->id;
 	master->num_chipselect = master->dev.of_node ? 0 : 4;
 	master->setup = atmel_spi_setup;
-	master->transfer = atmel_spi_transfer;
+	master->transfer_one_message = atmel_spi_transfer_one_message;
 	master->cleanup = atmel_spi_cleanup;
 	platform_set_drvdata(pdev, master);
 
@@ -1561,7 +1344,6 @@
 		goto out_free;
 
 	spin_lock_init(&as->lock);
-	INIT_LIST_HEAD(&as->queue);
 
 	as->pdev = pdev;
 	as->regs = devm_ioremap_resource(&pdev->dev, regs);
@@ -1573,6 +1355,8 @@
 	as->irq = irq;
 	as->clk = clk;
 
+	init_completion(&as->xfer_completion);
+
 	atmel_get_caps(as);
 
 	as->use_dma = false;
@@ -1591,9 +1375,6 @@
 		ret = devm_request_irq(&pdev->dev, irq, atmel_spi_pdc_interrupt,
 					0, dev_name(&pdev->dev), master);
 	} else {
-		tasklet_init(&as->tasklet, atmel_spi_tasklet_func,
-					(unsigned long)master);
-
 		ret = devm_request_irq(&pdev->dev, irq, atmel_spi_pio_interrupt,
 					0, dev_name(&pdev->dev), master);
 	}
@@ -1637,8 +1418,6 @@
 out_free_irq:
 out_unmap_regs:
 out_free_buffer:
-	if (!as->use_pdc)
-		tasklet_kill(&as->tasklet);
 	dma_free_coherent(&pdev->dev, BUFFER_SIZE, as->buffer,
 			as->buffer_dma);
 out_free:
@@ -1650,12 +1429,9 @@
 {
 	struct spi_master	*master = platform_get_drvdata(pdev);
 	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct spi_message	*msg;
-	struct spi_transfer	*xfer;
 
 	/* reset the hardware and block queue progress */
 	spin_lock_irq(&as->lock);
-	as->stopping = 1;
 	if (as->use_dma) {
 		atmel_spi_stop_dma(as);
 		atmel_spi_release_dma(as);
@@ -1666,20 +1442,6 @@
 	spi_readl(as, SR);
 	spin_unlock_irq(&as->lock);
 
-	/* Terminate remaining queued transfers */
-	list_for_each_entry(msg, &as->queue, queue) {
-		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-			if (!msg->is_dma_mapped
-				&& (atmel_spi_use_dma(as, xfer)
-					|| as->use_pdc))
-				atmel_spi_dma_unmap_xfer(master, xfer);
-		}
-		msg->status = -ESHUTDOWN;
-		msg->complete(msg->context);
-	}
-
-	if (!as->use_pdc)
-		tasklet_kill(&as->tasklet);
 	dma_free_coherent(&pdev->dev, BUFFER_SIZE, as->buffer,
 			as->buffer_dma);