Merge series "spi-bcm-qspi spcr3 enahancements" from Kamal Dasu <kdasu.kdev@gmail.com>:

This change set feature enahancements for spcr3 transfer modes as well as
adds support for half-duplex 3-wire mode transfer.

Kamal Dasu (3):
  spi: bcm-qspi: Add mspi spcr3 32/64-bits xfer mode
  spi: bcm-qspi: clear MSPI spifie interrupt during probe
  spi: bcm-qspi: add support for 3-wire mode for half duplex transfer

 drivers/spi/spi-bcm-qspi.c | 262 +++++++++++++++++++++++++++++--------
 1 file changed, 208 insertions(+), 54 deletions(-)

--
2.17.1
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 0d95fe5..6cf7cff 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -83,6 +83,9 @@
 /* MSPI register offsets */
 #define MSPI_SPCR0_LSB				0x000
 #define MSPI_SPCR0_MSB				0x004
+#define MSPI_SPCR0_MSB_CPHA			BIT(0)
+#define MSPI_SPCR0_MSB_CPOL			BIT(1)
+#define MSPI_SPCR0_MSB_BITS_SHIFT		0x2
 #define MSPI_SPCR1_LSB				0x008
 #define MSPI_SPCR1_MSB				0x00c
 #define MSPI_NEWQP				0x010
@@ -100,8 +103,10 @@
 #define MSPI_MASTER_BIT			BIT(7)
 
 #define MSPI_NUM_CDRAM				16
+#define MSPI_CDRAM_OUTP				BIT(8)
 #define MSPI_CDRAM_CONT_BIT			BIT(7)
 #define MSPI_CDRAM_BITSE_BIT			BIT(6)
+#define MSPI_CDRAM_DT_BIT			BIT(5)
 #define MSPI_CDRAM_PCS				0xf
 
 #define MSPI_SPCR2_SPE				BIT(6)
@@ -114,6 +119,14 @@
 						 ~(BIT(10) | BIT(11)))
 #define MSPI_SPCR3_SYSCLKSEL_108		(MSPI_SPCR3_SYSCLKSEL_MASK & \
 						 BIT(11))
+#define MSPI_SPCR3_TXRXDAM_MASK			GENMASK(4, 2)
+#define MSPI_SPCR3_DAM_8BYTE			0
+#define MSPI_SPCR3_DAM_16BYTE			(BIT(2) | BIT(4))
+#define MSPI_SPCR3_DAM_32BYTE			(BIT(3) | BIT(5))
+#define MSPI_SPCR3_HALFDUPLEX			BIT(6)
+#define MSPI_SPCR3_HDOUTTYPE			BIT(7)
+#define MSPI_SPCR3_DATA_REG_SZ			BIT(8)
+#define MSPI_SPCR3_CPHARX			BIT(9)
 
 #define MSPI_MSPI_STATUS_SPIF			BIT(0)
 
@@ -153,6 +166,14 @@
 #define TRANS_STATUS_BREAK_DESELECT (TRANS_STATUS_BREAK_EOM |		\
 				     TRANS_STATUS_BREAK_CS_CHANGE)
 
+/*
+ * Used for writing and reading data in the right order
+ * to TXRAM and RXRAM when used as 32-bit registers respectively
+ */
+#define swap4bytes(__val) \
+	((((__val) >> 24) & 0x000000FF) | (((__val) >>  8) & 0x0000FF00) | \
+	 (((__val) <<  8) & 0x00FF0000) | (((__val) << 24) & 0xFF000000))
+
 struct bcm_qspi_parms {
 	u32 speed_hz;
 	u8 mode;
@@ -261,7 +282,7 @@ static inline bool bcm_qspi_has_sysclk_108(struct bcm_qspi *qspi)
 static inline int bcm_qspi_spbr_min(struct bcm_qspi *qspi)
 {
 	if (bcm_qspi_has_fastbr(qspi))
-		return 1;
+		return (bcm_qspi_has_sysclk_108(qspi) ? 4 : 1);
 	else
 		return 8;
 }
@@ -571,23 +592,23 @@ static void bcm_qspi_hw_set_parms(struct bcm_qspi *qspi,
 {
 	u32 spcr, spbr = 0;
 
-	if (xp->speed_hz)
-		spbr = qspi->base_clk / (2 * xp->speed_hz);
-
-	spcr = clamp_val(spbr, bcm_qspi_spbr_min(qspi), QSPI_SPBR_MAX);
-	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, spcr);
-
 	if (!qspi->mspi_maj_rev)
 		/* legacy controller */
 		spcr = MSPI_MASTER_BIT;
 	else
 		spcr = 0;
 
-	/* for 16 bit the data should be zero */
-	if (xp->bits_per_word != 16)
-		spcr |= xp->bits_per_word << 2;
-	spcr |= xp->mode & 3;
+	/*
+	 * Bits per transfer.  BITS determines the number of data bits
+	 * transferred if the command control bit (BITSE of a
+	 * CDRAM Register) is equal to 1.
+	 * If CDRAM BITSE is equal to 0, 8 data bits are transferred
+	 * regardless
+	 */
+	if (xp->bits_per_word != 16 && xp->bits_per_word != 64)
+		spcr |= xp->bits_per_word << MSPI_SPCR0_MSB_BITS_SHIFT;
 
+	spcr |= xp->mode & (MSPI_SPCR0_MSB_CPHA | MSPI_SPCR0_MSB_CPOL);
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_MSB, spcr);
 
 	if (bcm_qspi_has_fastbr(qspi)) {
@@ -596,17 +617,44 @@ static void bcm_qspi_hw_set_parms(struct bcm_qspi *qspi,
 		/* enable fastbr */
 		spcr |=	MSPI_SPCR3_FASTBR;
 
+		if (xp->mode & SPI_3WIRE)
+			spcr |= MSPI_SPCR3_HALFDUPLEX |  MSPI_SPCR3_HDOUTTYPE;
+
 		if (bcm_qspi_has_sysclk_108(qspi)) {
 			/* SYSCLK_108 */
 			spcr |= MSPI_SPCR3_SYSCLKSEL_108;
 			qspi->base_clk = MSPI_BASE_FREQ * 4;
-			/* Change spbr as we changed sysclk */
-			bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, 4);
 		}
 
+		if (xp->bits_per_word > 16) {
+			/* data_reg_size 1 (64bit) */
+			spcr |=	MSPI_SPCR3_DATA_REG_SZ;
+			/* TxRx RAM data access mode 2 for 32B and set fastdt */
+			spcr |=	MSPI_SPCR3_DAM_32BYTE  | MSPI_SPCR3_FASTDT;
+			/*
+			 *  Set length of delay after transfer
+			 *  DTL from 0(256) to 1
+			 */
+			bcm_qspi_write(qspi, MSPI, MSPI_SPCR1_LSB, 1);
+		} else {
+			/* data_reg_size[8] = 0 */
+			spcr &=	~(MSPI_SPCR3_DATA_REG_SZ);
+
+			/*
+			 * TxRx RAM access mode 8B
+			 * and disable fastdt
+			 */
+			spcr &= ~(MSPI_SPCR3_DAM_32BYTE);
+		}
 		bcm_qspi_write(qspi, MSPI, MSPI_SPCR3, spcr);
 	}
 
+	if (xp->speed_hz)
+		spbr = qspi->base_clk / (2 * xp->speed_hz);
+
+	spbr = clamp_val(spbr, bcm_qspi_spbr_min(qspi), QSPI_SPBR_MAX);
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, spbr);
+
 	qspi->last_parms = *xp;
 }
 
@@ -627,7 +675,7 @@ static int bcm_qspi_setup(struct spi_device *spi)
 {
 	struct bcm_qspi_parms *xp;
 
-	if (spi->bits_per_word > 16)
+	if (spi->bits_per_word > 64)
 		return -EINVAL;
 
 	xp = spi_get_ctldata(spi);
@@ -666,8 +714,12 @@ static int update_qspi_trans_byte_count(struct bcm_qspi *qspi,
 	/* count the last transferred bytes */
 	if (qt->trans->bits_per_word <= 8)
 		qt->byte++;
-	else
+	else if (qt->trans->bits_per_word <= 16)
 		qt->byte += 2;
+	else if (qt->trans->bits_per_word <= 32)
+		qt->byte += 4;
+	else if (qt->trans->bits_per_word <= 64)
+		qt->byte += 8;
 
 	if (qt->byte >= qt->trans->len) {
 		/* we're at the end of the spi_transfer */
@@ -710,6 +762,33 @@ static inline u16 read_rxram_slot_u16(struct bcm_qspi *qspi, int slot)
 		((bcm_qspi_read(qspi, MSPI, msb_offset) & 0xff) << 8);
 }
 
+static inline u32 read_rxram_slot_u32(struct bcm_qspi *qspi, int slot)
+{
+	u32 reg_offset = MSPI_RXRAM;
+	u32 offset = reg_offset + (slot << 3);
+	u32 val;
+
+	val = bcm_qspi_read(qspi, MSPI, offset);
+	val = swap4bytes(val);
+
+	return val;
+}
+
+static inline u64 read_rxram_slot_u64(struct bcm_qspi *qspi, int slot)
+{
+	u32 reg_offset = MSPI_RXRAM;
+	u32 lsb_offset = reg_offset + (slot << 3) + 0x4;
+	u32 msb_offset = reg_offset + (slot << 3);
+	u32 msb, lsb;
+
+	msb = bcm_qspi_read(qspi, MSPI, msb_offset);
+	msb = swap4bytes(msb);
+	lsb = bcm_qspi_read(qspi, MSPI, lsb_offset);
+	lsb = swap4bytes(lsb);
+
+	return ((u64)msb << 32 | lsb);
+}
+
 static void read_from_hw(struct bcm_qspi *qspi, int slots)
 {
 	struct qspi_trans tp;
@@ -733,7 +812,7 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots)
 				buf[tp.byte] = read_rxram_slot_u8(qspi, slot);
 			dev_dbg(&qspi->pdev->dev, "RD %02x\n",
 				buf ? buf[tp.byte] : 0x0);
-		} else {
+		} else if (tp.trans->bits_per_word <= 16) {
 			u16 *buf = tp.trans->rx_buf;
 
 			if (buf)
@@ -741,6 +820,25 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots)
 								      slot);
 			dev_dbg(&qspi->pdev->dev, "RD %04x\n",
 				buf ? buf[tp.byte / 2] : 0x0);
+		} else if (tp.trans->bits_per_word <= 32) {
+			u32 *buf = tp.trans->rx_buf;
+
+			if (buf)
+				buf[tp.byte / 4] = read_rxram_slot_u32(qspi,
+								      slot);
+			dev_dbg(&qspi->pdev->dev, "RD %08x\n",
+				buf ? buf[tp.byte / 4] : 0x0);
+
+		} else if (tp.trans->bits_per_word <= 64) {
+			u64 *buf = tp.trans->rx_buf;
+
+			if (buf)
+				buf[tp.byte / 8] = read_rxram_slot_u64(qspi,
+								      slot);
+			dev_dbg(&qspi->pdev->dev, "RD %llx\n",
+				buf ? buf[tp.byte / 8] : 0x0);
+
+
 		}
 
 		update_qspi_trans_byte_count(qspi, &tp,
@@ -770,6 +868,28 @@ static inline void write_txram_slot_u16(struct bcm_qspi *qspi, int slot,
 	bcm_qspi_write(qspi, MSPI, lsb_offset, (val & 0xff));
 }
 
+static inline void write_txram_slot_u32(struct bcm_qspi *qspi, int slot,
+					u32 val)
+{
+	u32 reg_offset = MSPI_TXRAM;
+	u32 msb_offset = reg_offset + (slot << 3);
+
+	bcm_qspi_write(qspi, MSPI, msb_offset, swap4bytes(val));
+}
+
+static inline void write_txram_slot_u64(struct bcm_qspi *qspi, int slot,
+					u64 val)
+{
+	u32 reg_offset = MSPI_TXRAM;
+	u32 msb_offset = reg_offset + (slot << 3);
+	u32 lsb_offset = reg_offset + (slot << 3) + 0x4;
+	u32 msb = upper_32_bits(val);
+	u32 lsb = lower_32_bits(val);
+
+	bcm_qspi_write(qspi, MSPI, msb_offset, swap4bytes(msb));
+	bcm_qspi_write(qspi, MSPI, lsb_offset, swap4bytes(lsb));
+}
+
 static inline u32 read_cdram_slot(struct bcm_qspi *qspi, int slot)
 {
 	return bcm_qspi_read(qspi, MSPI, MSPI_CDRAM + (slot << 2));
@@ -793,20 +913,43 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
 
 	/* Run until end of transfer or reached the max data */
 	while (!tstatus && slot < MSPI_NUM_CDRAM) {
+		mspi_cdram = MSPI_CDRAM_CONT_BIT;
 		if (tp.trans->bits_per_word <= 8) {
 			const u8 *buf = tp.trans->tx_buf;
 			u8 val = buf ? buf[tp.byte] : 0x00;
 
 			write_txram_slot_u8(qspi, slot, val);
 			dev_dbg(&qspi->pdev->dev, "WR %02x\n", val);
-		} else {
+		} else if (tp.trans->bits_per_word <= 16) {
 			const u16 *buf = tp.trans->tx_buf;
 			u16 val = buf ? buf[tp.byte / 2] : 0x0000;
 
 			write_txram_slot_u16(qspi, slot, val);
 			dev_dbg(&qspi->pdev->dev, "WR %04x\n", val);
+		} else if (tp.trans->bits_per_word <= 32) {
+			const u32 *buf = tp.trans->tx_buf;
+			u32 val = buf ? buf[tp.byte/4] : 0x0;
+
+			write_txram_slot_u32(qspi, slot, val);
+			dev_dbg(&qspi->pdev->dev, "WR %08x\n", val);
+		} else if (tp.trans->bits_per_word <= 64) {
+			const u64 *buf = tp.trans->tx_buf;
+			u64 val = (buf ? buf[tp.byte/8] : 0x0);
+
+			/* use the length of delay from SPCR1_LSB */
+			if (bcm_qspi_has_fastbr(qspi))
+				mspi_cdram |= MSPI_CDRAM_DT_BIT;
+
+			write_txram_slot_u64(qspi, slot, val);
+			dev_dbg(&qspi->pdev->dev, "WR %llx\n", val);
 		}
-		mspi_cdram = MSPI_CDRAM_CONT_BIT;
+
+		mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
+			       MSPI_CDRAM_BITSE_BIT);
+
+		/* set 3wrire halfduplex mode data from master to slave */
+		if ((spi->mode & SPI_3WIRE) && tp.trans->tx_buf)
+			mspi_cdram |= MSPI_CDRAM_OUTP;
 
 		if (has_bspi(qspi))
 			mspi_cdram &= ~1;
@@ -814,9 +957,6 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
 			mspi_cdram |= (~(1 << spi->chip_select) &
 				       MSPI_CDRAM_PCS);
 
-		mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
-				MSPI_CDRAM_BITSE_BIT);
-
 		write_cdram_slot(qspi, slot, mspi_cdram);
 
 		tstatus = update_qspi_trans_byte_count(qspi, &tp,
@@ -1251,10 +1391,14 @@ static void bcm_qspi_hw_init(struct bcm_qspi *qspi)
 
 static void bcm_qspi_hw_uninit(struct bcm_qspi *qspi)
 {
+	u32 status = bcm_qspi_read(qspi, MSPI, MSPI_MSPI_STATUS);
+
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR2, 0);
 	if (has_bspi(qspi))
 		bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
 
+	/* clear interrupt */
+	bcm_qspi_write(qspi, MSPI, MSPI_MSPI_STATUS, status & ~1);
 }
 
 static const struct spi_controller_mem_ops bcm_qspi_mem_ops = {
@@ -1347,7 +1491,8 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	qspi->master = master;
 
 	master->bus_num = -1;
-	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD;
+	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD |
+				SPI_3WIRE;
 	master->setup = bcm_qspi_setup;
 	master->transfer_one = bcm_qspi_transfer_one;
 	master->mem_ops = &bcm_qspi_mem_ops;
@@ -1398,6 +1543,47 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	if (!qspi->dev_ids)
 		return -ENOMEM;
 
+	/*
+	 * Some SoCs integrate spi controller (e.g., its interrupt bits)
+	 * in specific ways
+	 */
+	if (soc_intc) {
+		qspi->soc_intc = soc_intc;
+		soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true);
+	} else {
+		qspi->soc_intc = NULL;
+	}
+
+	if (qspi->clk) {
+		ret = clk_prepare_enable(qspi->clk);
+		if (ret) {
+			dev_err(dev, "failed to prepare clock\n");
+			goto qspi_probe_err;
+		}
+		qspi->base_clk = clk_get_rate(qspi->clk);
+	} else {
+		qspi->base_clk = MSPI_BASE_FREQ;
+	}
+
+	if (data->has_mspi_rev) {
+		rev = bcm_qspi_read(qspi, MSPI, MSPI_REV);
+		/* some older revs do not have a MSPI_REV register */
+		if ((rev & 0xff) == 0xff)
+			rev = 0;
+	}
+
+	qspi->mspi_maj_rev = (rev >> 4) & 0xf;
+	qspi->mspi_min_rev = rev & 0xf;
+	qspi->mspi_spcr3_sysclk = data->has_spcr3_sysclk;
+
+	qspi->max_speed_hz = qspi->base_clk / (bcm_qspi_spbr_min(qspi) * 2);
+
+	/*
+	 * On SW resets it is possible to have the mask still enabled
+	 * Need to disable the mask and clear the status while we init
+	 */
+	bcm_qspi_hw_uninit(qspi);
+
 	for (val = 0; val < num_irqs; val++) {
 		irq = -1;
 		name = qspi_irq_tab[val].irq_name;
@@ -1434,38 +1620,6 @@ int bcm_qspi_probe(struct platform_device *pdev,
 		goto qspi_probe_err;
 	}
 
-	/*
-	 * Some SoCs integrate spi controller (e.g., its interrupt bits)
-	 * in specific ways
-	 */
-	if (soc_intc) {
-		qspi->soc_intc = soc_intc;
-		soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true);
-	} else {
-		qspi->soc_intc = NULL;
-	}
-
-	ret = clk_prepare_enable(qspi->clk);
-	if (ret) {
-		dev_err(dev, "failed to prepare clock\n");
-		goto qspi_probe_err;
-	}
-
-	qspi->base_clk = clk_get_rate(qspi->clk);
-
-	if (data->has_mspi_rev) {
-		rev = bcm_qspi_read(qspi, MSPI, MSPI_REV);
-		/* some older revs do not have a MSPI_REV register */
-		if ((rev & 0xff) == 0xff)
-			rev = 0;
-	}
-
-	qspi->mspi_maj_rev = (rev >> 4) & 0xf;
-	qspi->mspi_min_rev = rev & 0xf;
-	qspi->mspi_spcr3_sysclk = data->has_spcr3_sysclk;
-
-	qspi->max_speed_hz = qspi->base_clk / (bcm_qspi_spbr_min(qspi) * 2);
-
 	bcm_qspi_hw_init(qspi);
 	init_completion(&qspi->mspi_done);
 	init_completion(&qspi->bspi_done);