diff mbox

[V2] spi: exynos: Support word transfers

Message ID 1369893156-3954-1-git-send-email-rajeshwari.s@samsung.com
State New
Headers show

Commit Message

Rajeshwari Shinde May 30, 2013, 5:52 a.m. UTC
Since SPI register access is so expensive, it is worth transferring data
a word at a time if we can. This complicates the driver unfortunately.

Use the byte-swapping feature to avoid having to convert to/from big
endian in software.

This change increases speed from about 2MB/s to about 4.5MB/s.

Based on "[PATCH V2] spi: exynos: Minimise access to SPI FIFO level"

Signed-off-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Rajeshwari Shinde <rajeshwari.s@samsung.com>
---
Changes in V2:
	- Rebased on [PATCH V2] spi: exynos: Minimise access to SPI FIFO level
 arch/arm/include/asm/arch-exynos/spi.h |   11 ++++-
 drivers/spi/exynos_spi.c               |   79 ++++++++++++++++++++++++++------
 2 files changed, 74 insertions(+), 16 deletions(-)

Comments

Jagan Teki June 2, 2013, 6:09 p.m. UTC | #1
On Thu, May 30, 2013 at 11:22 AM, Rajeshwari Shinde
<rajeshwari.s@samsung.com> wrote:
> Since SPI register access is so expensive, it is worth transferring data
> a word at a time if we can. This complicates the driver unfortunately.
>
> Use the byte-swapping feature to avoid having to convert to/from big
> endian in software.
>
> This change increases speed from about 2MB/s to about 4.5MB/s.
>
> Based on "[PATCH V2] spi: exynos: Minimise access to SPI FIFO level"
>
> Signed-off-by: Simon Glass <sjg@chromium.org>
> Signed-off-by: Rajeshwari Shinde <rajeshwari.s@samsung.com>
> ---
> Changes in V2:
>         - Rebased on [PATCH V2] spi: exynos: Minimise access to SPI FIFO level
>  arch/arm/include/asm/arch-exynos/spi.h |   11 ++++-
>  drivers/spi/exynos_spi.c               |   79 ++++++++++++++++++++++++++------
>  2 files changed, 74 insertions(+), 16 deletions(-)
>
> diff --git a/arch/arm/include/asm/arch-exynos/spi.h b/arch/arm/include/asm/arch-exynos/spi.h
> index 7cab1e9..e67ad27 100644
> --- a/arch/arm/include/asm/arch-exynos/spi.h
> +++ b/arch/arm/include/asm/arch-exynos/spi.h
> @@ -34,7 +34,7 @@ struct exynos_spi {
>         unsigned int            rx_data;        /* 0x1c */
>         unsigned int            pkt_cnt;        /* 0x20 */
>         unsigned char           reserved2[4];
> -       unsigned char           reserved3[4];
> +       unsigned int            swap_cfg;       /* 0x28 */
>         unsigned int            fb_clk;         /* 0x2c */
>         unsigned char           padding[0xffd0];
>  };
> @@ -74,5 +74,14 @@ struct exynos_spi {
>  /* Packet Count */
>  #define SPI_PACKET_CNT_EN      (1 << 16)
>
> +/* Swap config */
> +#define SPI_TX_SWAP_EN         (1 << 0)
> +#define SPI_TX_BYTE_SWAP       (1 << 2)
> +#define SPI_TX_HWORD_SWAP      (1 << 3)
> +#define SPI_TX_BYTE_SWAP       (1 << 2)
> +#define SPI_RX_SWAP_EN         (1 << 4)
> +#define SPI_RX_BYTE_SWAP       (1 << 6)
> +#define SPI_RX_HWORD_SWAP      (1 << 7)
> +
>  #endif /* __ASSEMBLY__ */
>  #endif
> diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c
> index bcca3d6..4dda23b 100644
> --- a/drivers/spi/exynos_spi.c
> +++ b/drivers/spi/exynos_spi.c
> @@ -216,12 +216,29 @@ static void spi_get_fifo_levels(struct exynos_spi *regs,
>   *
>   * @param regs SPI peripheral registers
>   * @param count        Number of bytes to transfer
> + * @param step Number of bytes to transfer in each packet (1 or 4)
>   */
> -static void spi_request_bytes(struct exynos_spi *regs, int count)
> +static void spi_request_bytes(struct exynos_spi *regs, int count, int step)
>  {
> +       /* For word address we need to swap bytes */
> +       if (step == 4) {
> +               setbits_le32(&regs->mode_cfg,
> +                            SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD);
> +               count /= 4;
> +               setbits_le32(&regs->swap_cfg, SPI_TX_SWAP_EN | SPI_RX_SWAP_EN |
> +                       SPI_TX_BYTE_SWAP | SPI_RX_BYTE_SWAP |
> +                       SPI_TX_HWORD_SWAP | SPI_RX_HWORD_SWAP);
> +       } else {
> +               /* Select byte access and clear the swap configuration */
> +               clrbits_le32(&regs->mode_cfg,
> +                            SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD);
> +               writel(0, &regs->swap_cfg);
> +       }
> +
>         assert(count && count < (1 << 16));
>         setbits_le32(&regs->ch_cfg, SPI_CH_RST);
>         clrbits_le32(&regs->ch_cfg, SPI_CH_RST);
> +
>         writel(count | SPI_PACKET_CNT_EN, &regs->pkt_cnt);
>  }
>
> @@ -236,6 +253,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
>         int toread;
>         unsigned start = get_timer(0);
>         int stopping;
> +       int step;
>
>         out_bytes = in_bytes = todo;
>
> @@ -243,10 +261,19 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
>                                         !(spi_slave->mode & SPI_SLAVE);
>
>         /*
> +        * Try to transfer words if we can. This helps read performance at
> +        * SPI clock speeds above about 20MHz.
> +        */
> +       step = 1;
> +       if (!((todo | (uintptr_t)rxp | (uintptr_t)txp) & 3) &&
> +           !spi_slave->skip_preamble)
> +               step = 4;
> +
> +       /*
>          * If there's something to send, do a software reset and set a
>          * transaction size.
>          */
> -       spi_request_bytes(regs, todo);
> +       spi_request_bytes(regs, todo, step);
>
>         /*
>          * Bytes are transmitted/received in pairs. Wait to receive all the
> @@ -259,14 +286,26 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
>
>                 /* Keep the fifos full/empty. */
>                 spi_get_fifo_levels(regs, &rx_lvl, &tx_lvl);
> +
> +               /*
> +                * Don't completely fill the txfifo, since we don't want our
> +                * rxfifo to overflow, and it may already contain data.
> +                */
>                 while (tx_lvl < spi_slave->fifo_size/2 && out_bytes) {
> -                       temp = txp ? *txp++ : 0xff;
> +                       if (!txp)
> +                               temp = -1;
> +                       else if (step == 4)
> +                               temp = *(uint32_t *)txp;
> +                       else
> +                               temp = *txp;
>                         writel(temp, &regs->tx_data);
> -                       out_bytes--;
> -                       tx_lvl++;
> +                       out_bytes -= step;
> +                       if (txp)
> +                               txp += step;
> +                       tx_lvl += step;
>                 }
> -               if (rx_lvl > 0) {
> -                       while (rx_lvl > 0) {
> +               if (rx_lvl >= step) {
> +                       while (rx_lvl >= step) {
>                                 temp = readl(&regs->rx_data);
>                                 if (spi_slave->skip_preamble) {
>                                         if (temp == SPI_PREAMBLE_END_BYTE) {
> @@ -274,12 +313,18 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
>                                                 stopping = 0;
>                                         }
>                                 } else {
> -                                       if (rxp || stopping)
> -                                               *rxp++ = temp;
> -                                       in_bytes--;

Please see below Tag code, seems like some code style issues with checkpatch.
---------------------- Tag+
> +                                       if (rxp || stopping) {
> +                                               if (step == 4)
> +                                                       *(uint32_t *)rxp = temp;
> +                                               else
> +                                                       *rxp = temp;
> +                                               rxp += step;
> +                                       }
------------------------- Tag-
> +                                       in_bytes -= step;
>                                 }
> -                               toread--;
> -                               rx_lvl--;
> +                               toread -= step;
> +                               rx_lvl -= step;
> +                       }
>                 } else if (!toread) {
>                         /*
>                          * We have run out of input data, but haven't read
> @@ -291,7 +336,7 @@ static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
>                         out_bytes = in_bytes;
>                         toread = in_bytes;
>                         txp = NULL;
> -                       spi_request_bytes(regs, toread);
> +                       spi_request_bytes(regs, toread, step);
>                 }
>                 if (spi_slave->skip_preamble && get_timer(start) > 100) {
>                         printf("SPI timeout: in_bytes=%d, out_bytes=%d, ",
> @@ -335,10 +380,14 @@ int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
>         if ((flags & SPI_XFER_BEGIN))
>                 spi_cs_activate(slave);
>
> -       /* Exynos SPI limits each transfer to 65535 bytes */
> +       /*
> +        * Exynos SPI limits each transfer to 65535 transfers. To keep
> +        * things simple, allow a maximum of 65532 bytes. We could allow
> +        * more in word mode, but the performance difference is small.
> +        */
>         bytelen =  bitlen / 8;
>         for (upto = 0; !ret && upto < bytelen; upto += todo) {
> -               todo = min(bytelen - upto, (1 << 16) - 1);
> +               todo = min(bytelen - upto, (1 << 16) - 4);
>                 ret = spi_rx_tx(spi_slave, todo, &din, &dout, flags);
>                 if (ret)
>                         break;
> --
> 1.7.4.4
>

--
Thanks,
Jagan.
diff mbox

Patch

diff --git a/arch/arm/include/asm/arch-exynos/spi.h b/arch/arm/include/asm/arch-exynos/spi.h
index 7cab1e9..e67ad27 100644
--- a/arch/arm/include/asm/arch-exynos/spi.h
+++ b/arch/arm/include/asm/arch-exynos/spi.h
@@ -34,7 +34,7 @@  struct exynos_spi {
 	unsigned int		rx_data;	/* 0x1c */
 	unsigned int		pkt_cnt;	/* 0x20 */
 	unsigned char		reserved2[4];
-	unsigned char		reserved3[4];
+	unsigned int		swap_cfg;	/* 0x28 */
 	unsigned int		fb_clk;		/* 0x2c */
 	unsigned char		padding[0xffd0];
 };
@@ -74,5 +74,14 @@  struct exynos_spi {
 /* Packet Count */
 #define SPI_PACKET_CNT_EN	(1 << 16)
 
+/* Swap config */
+#define SPI_TX_SWAP_EN		(1 << 0)
+#define SPI_TX_BYTE_SWAP	(1 << 2)
+#define SPI_TX_HWORD_SWAP	(1 << 3)
+#define SPI_TX_BYTE_SWAP	(1 << 2)
+#define SPI_RX_SWAP_EN		(1 << 4)
+#define SPI_RX_BYTE_SWAP	(1 << 6)
+#define SPI_RX_HWORD_SWAP	(1 << 7)
+
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/drivers/spi/exynos_spi.c b/drivers/spi/exynos_spi.c
index bcca3d6..4dda23b 100644
--- a/drivers/spi/exynos_spi.c
+++ b/drivers/spi/exynos_spi.c
@@ -216,12 +216,29 @@  static void spi_get_fifo_levels(struct exynos_spi *regs,
  *
  * @param regs	SPI peripheral registers
  * @param count	Number of bytes to transfer
+ * @param step	Number of bytes to transfer in each packet (1 or 4)
  */
-static void spi_request_bytes(struct exynos_spi *regs, int count)
+static void spi_request_bytes(struct exynos_spi *regs, int count, int step)
 {
+	/* For word address we need to swap bytes */
+	if (step == 4) {
+		setbits_le32(&regs->mode_cfg,
+			     SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD);
+		count /= 4;
+		setbits_le32(&regs->swap_cfg, SPI_TX_SWAP_EN | SPI_RX_SWAP_EN |
+			SPI_TX_BYTE_SWAP | SPI_RX_BYTE_SWAP |
+			SPI_TX_HWORD_SWAP | SPI_RX_HWORD_SWAP);
+	} else {
+		/* Select byte access and clear the swap configuration */
+		clrbits_le32(&regs->mode_cfg,
+			     SPI_MODE_CH_WIDTH_WORD | SPI_MODE_BUS_WIDTH_WORD);
+		writel(0, &regs->swap_cfg);
+	}
+
 	assert(count && count < (1 << 16));
 	setbits_le32(&regs->ch_cfg, SPI_CH_RST);
 	clrbits_le32(&regs->ch_cfg, SPI_CH_RST);
+
 	writel(count | SPI_PACKET_CNT_EN, &regs->pkt_cnt);
 }
 
@@ -236,6 +253,7 @@  static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
 	int toread;
 	unsigned start = get_timer(0);
 	int stopping;
+	int step;
 
 	out_bytes = in_bytes = todo;
 
@@ -243,10 +261,19 @@  static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
 					!(spi_slave->mode & SPI_SLAVE);
 
 	/*
+	 * Try to transfer words if we can. This helps read performance at
+	 * SPI clock speeds above about 20MHz.
+	 */
+	step = 1;
+	if (!((todo | (uintptr_t)rxp | (uintptr_t)txp) & 3) &&
+	    !spi_slave->skip_preamble)
+		step = 4;
+
+	/*
 	 * If there's something to send, do a software reset and set a
 	 * transaction size.
 	 */
-	spi_request_bytes(regs, todo);
+	spi_request_bytes(regs, todo, step);
 
 	/*
 	 * Bytes are transmitted/received in pairs. Wait to receive all the
@@ -259,14 +286,26 @@  static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
 
 		/* Keep the fifos full/empty. */
 		spi_get_fifo_levels(regs, &rx_lvl, &tx_lvl);
+
+		/*
+		 * Don't completely fill the txfifo, since we don't want our
+		 * rxfifo to overflow, and it may already contain data.
+		 */
 		while (tx_lvl < spi_slave->fifo_size/2 && out_bytes) {
-			temp = txp ? *txp++ : 0xff;
+			if (!txp)
+				temp = -1;
+			else if (step == 4)
+				temp = *(uint32_t *)txp;
+			else
+				temp = *txp;
 			writel(temp, &regs->tx_data);
-			out_bytes--;
-			tx_lvl++;
+			out_bytes -= step;
+			if (txp)
+				txp += step;
+			tx_lvl += step;
 		}
-		if (rx_lvl > 0) {
-			while (rx_lvl > 0) {
+		if (rx_lvl >= step) {
+			while (rx_lvl >= step) {
 				temp = readl(&regs->rx_data);
 				if (spi_slave->skip_preamble) {
 					if (temp == SPI_PREAMBLE_END_BYTE) {
@@ -274,12 +313,18 @@  static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
 						stopping = 0;
 					}
 				} else {
-					if (rxp || stopping)
-						*rxp++ = temp;
-					in_bytes--;
+					if (rxp || stopping) {
+						if (step == 4)
+							*(uint32_t *)rxp = temp;
+						else
+							*rxp = temp;
+						rxp += step;
+					}
+					in_bytes -= step;
 				}
-				toread--;
-				rx_lvl--;
+				toread -= step;
+				rx_lvl -= step;
+			}
 		} else if (!toread) {
 			/*
 			 * We have run out of input data, but haven't read
@@ -291,7 +336,7 @@  static int spi_rx_tx(struct exynos_spi_slave *spi_slave, int todo,
 			out_bytes = in_bytes;
 			toread = in_bytes;
 			txp = NULL;
-			spi_request_bytes(regs, toread);
+			spi_request_bytes(regs, toread, step);
 		}
 		if (spi_slave->skip_preamble && get_timer(start) > 100) {
 			printf("SPI timeout: in_bytes=%d, out_bytes=%d, ",
@@ -335,10 +380,14 @@  int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
 	if ((flags & SPI_XFER_BEGIN))
 		spi_cs_activate(slave);
 
-	/* Exynos SPI limits each transfer to 65535 bytes */
+	/*
+	 * Exynos SPI limits each transfer to 65535 transfers. To keep
+	 * things simple, allow a maximum of 65532 bytes. We could allow
+	 * more in word mode, but the performance difference is small.
+	 */
 	bytelen =  bitlen / 8;
 	for (upto = 0; !ret && upto < bytelen; upto += todo) {
-		todo = min(bytelen - upto, (1 << 16) - 1);
+		todo = min(bytelen - upto, (1 << 16) - 4);
 		ret = spi_rx_tx(spi_slave, todo, &din, &dout, flags);
 		if (ret)
 			break;