diff mbox series

[v1,5/8] spi: dw: Speed up transfer loops

Message ID 20200305191925.959494-6-seanga2@gmail.com
State New
Headers show
Series riscv: Add SPI support for Kendryte K210 | expand

Commit Message

Sean Anderson March 5, 2020, 7:19 p.m. UTC
The transfer loops are very tight on some platforms (especially on higher
speeds). If we don't read/write fast enough we can run into over-/under-
flow problems. This patch removes several divisions and log statements,
and simplifies the read logic.

Signed-off-by: Sean Anderson <seanga2 at gmail.com>
---

 drivers/spi/designware_spi.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

Comments

Marek Vasut March 22, 2020, 1:49 a.m. UTC | #1
On 3/5/20 8:19 PM, Sean Anderson wrote:
> The transfer loops are very tight on some platforms (especially on higher
> speeds). If we don't read/write fast enough we can run into over-/under-
> flow problems. This patch removes several divisions and log statements,
> and simplifies the read logic.
> 
> Signed-off-by: Sean Anderson <seanga2 at gmail.com>
> ---
> 
>  drivers/spi/designware_spi.c | 29 +++++++++--------------------
>  1 file changed, 9 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c
> index 38c24fe550..613eb0d0e6 100644
> --- a/drivers/spi/designware_spi.c
> +++ b/drivers/spi/designware_spi.c
> @@ -304,7 +304,7 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>  {
>  	u32 tx_left, tx_room, rxtx_gap;
>  
> -	tx_left = (priv->tx_end - priv->tx) / (priv->bits_per_word >> 3);
> +	tx_left = priv->tx_end - priv->tx;
>  	tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
>  
>  	/*
> @@ -315,8 +315,7 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>  	 * shift registers. So a control from sw point of
>  	 * view is taken.
>  	 */
> -	rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx)) /
> -		(priv->bits_per_word >> 3);
> +	rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx));
>  
>  	return min3(tx_left, tx_room, (u32)(priv->fifo_len - rxtx_gap));
>  }
> @@ -324,7 +323,7 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>  /* Return the max entries we should read out of rx fifo */
>  static inline u32 rx_max(struct dw_spi_priv *priv)
>  {
> -	u32 rx_left = (priv->rx_end - priv->rx) / (priv->bits_per_word >> 3);
> +	u32 rx_left = priv->rx_end - priv->rx;
>  
>  	return min_t(u32, rx_left, dw_read(priv, DW_SPI_RXFLR));
>  }
> @@ -336,15 +335,10 @@ static void dw_writer(struct dw_spi_priv *priv)
>  
>  	while (max--) {
>  		/* Set the tx word if the transfer's original "tx" is not null */
> -		if (priv->tx_end - priv->len) {
> -			if (priv->bits_per_word == 8)
> -				txw = *(u8 *)(priv->tx);
> -			else
> -				txw = *(u16 *)(priv->tx);
> -		}
> +		if (priv->tx_end - priv->len)
> +			txw = *(u8 *)(priv->tx);
>  		dw_write(priv, DW_SPI_DR, txw);
> -		debug("%s: tx=0x%02x\n", __func__, txw);
> -		priv->tx += priv->bits_per_word >> 3;
> +		priv->tx++;

This breaks 16 bits per word transfers, NAK.

The compiler should be able to figure out the rest of the optimizations
in this patch.
Sean Anderson March 22, 2020, 3:54 a.m. UTC | #2
On 3/21/20 9:49 PM, Marek Vasut wrote:
> On 3/5/20 8:19 PM, Sean Anderson wrote:
>> The transfer loops are very tight on some platforms (especially on higher
>> speeds). If we don't read/write fast enough we can run into over-/under-
>> flow problems. This patch removes several divisions and log statements,
>> and simplifies the read logic.
>>
>> Signed-off-by: Sean Anderson <seanga2 at gmail.com>
>> ---
>>
>>  drivers/spi/designware_spi.c | 29 +++++++++--------------------
>>  1 file changed, 9 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c
>> index 38c24fe550..613eb0d0e6 100644
>> --- a/drivers/spi/designware_spi.c
>> +++ b/drivers/spi/designware_spi.c
>> @@ -304,7 +304,7 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>>  {
>>  	u32 tx_left, tx_room, rxtx_gap;
>>  
>> -	tx_left = (priv->tx_end - priv->tx) / (priv->bits_per_word >> 3);
>> +	tx_left = priv->tx_end - priv->tx;
>>  	tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
>>  
>>  	/*
>> @@ -315,8 +315,7 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>>  	 * shift registers. So a control from sw point of
>>  	 * view is taken.
>>  	 */
>> -	rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx)) /
>> -		(priv->bits_per_word >> 3);
>> +	rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx));
>>  
>>  	return min3(tx_left, tx_room, (u32)(priv->fifo_len - rxtx_gap));
>>  }
>> @@ -324,7 +323,7 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>>  /* Return the max entries we should read out of rx fifo */
>>  static inline u32 rx_max(struct dw_spi_priv *priv)
>>  {
>> -	u32 rx_left = (priv->rx_end - priv->rx) / (priv->bits_per_word >> 3);
>> +	u32 rx_left = priv->rx_end - priv->rx;
>>  
>>  	return min_t(u32, rx_left, dw_read(priv, DW_SPI_RXFLR));
>>  }
>> @@ -336,15 +335,10 @@ static void dw_writer(struct dw_spi_priv *priv)
>>  
>>  	while (max--) {
>>  		/* Set the tx word if the transfer's original "tx" is not null */
>> -		if (priv->tx_end - priv->len) {
>> -			if (priv->bits_per_word == 8)
>> -				txw = *(u8 *)(priv->tx);
>> -			else
>> -				txw = *(u16 *)(priv->tx);
>> -		}
>> +		if (priv->tx_end - priv->len)
>> +			txw = *(u8 *)(priv->tx);
>>  		dw_write(priv, DW_SPI_DR, txw);
>> -		debug("%s: tx=0x%02x\n", __func__, txw);
>> -		priv->tx += priv->bits_per_word >> 3;
>> +		priv->tx++;
> 
> This breaks 16 bits per word transfers, NAK.
> 
> The compiler should be able to figure out the rest of the optimizations
> in this patch.
> 

Hm, I will try with just the debug symbols removed.

--Sean
diff mbox series

Patch

diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c
index 38c24fe550..613eb0d0e6 100644
--- a/drivers/spi/designware_spi.c
+++ b/drivers/spi/designware_spi.c
@@ -304,7 +304,7 @@  static inline u32 tx_max(struct dw_spi_priv *priv)
 {
 	u32 tx_left, tx_room, rxtx_gap;
 
-	tx_left = (priv->tx_end - priv->tx) / (priv->bits_per_word >> 3);
+	tx_left = priv->tx_end - priv->tx;
 	tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
 
 	/*
@@ -315,8 +315,7 @@  static inline u32 tx_max(struct dw_spi_priv *priv)
 	 * shift registers. So a control from sw point of
 	 * view is taken.
 	 */
-	rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx)) /
-		(priv->bits_per_word >> 3);
+	rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx));
 
 	return min3(tx_left, tx_room, (u32)(priv->fifo_len - rxtx_gap));
 }
@@ -324,7 +323,7 @@  static inline u32 tx_max(struct dw_spi_priv *priv)
 /* Return the max entries we should read out of rx fifo */
 static inline u32 rx_max(struct dw_spi_priv *priv)
 {
-	u32 rx_left = (priv->rx_end - priv->rx) / (priv->bits_per_word >> 3);
+	u32 rx_left = priv->rx_end - priv->rx;
 
 	return min_t(u32, rx_left, dw_read(priv, DW_SPI_RXFLR));
 }
@@ -336,15 +335,10 @@  static void dw_writer(struct dw_spi_priv *priv)
 
 	while (max--) {
 		/* Set the tx word if the transfer's original "tx" is not null */
-		if (priv->tx_end - priv->len) {
-			if (priv->bits_per_word == 8)
-				txw = *(u8 *)(priv->tx);
-			else
-				txw = *(u16 *)(priv->tx);
-		}
+		if (priv->tx_end - priv->len)
+			txw = *(u8 *)(priv->tx);
 		dw_write(priv, DW_SPI_DR, txw);
-		debug("%s: tx=0x%02x\n", __func__, txw);
-		priv->tx += priv->bits_per_word >> 3;
+		priv->tx++;
 	}
 }
 
@@ -355,16 +349,11 @@  static void dw_reader(struct dw_spi_priv *priv)
 
 	while (max--) {
 		rxw = dw_read(priv, DW_SPI_DR);
-		debug("%s: rx=0x%02x\n", __func__, rxw);
 
 		/* Care about rx if the transfer's original "rx" is not null */
-		if (priv->rx_end - priv->len) {
-			if (priv->bits_per_word == 8)
-				*(u8 *)(priv->rx) = rxw;
-			else
-				*(u16 *)(priv->rx) = rxw;
-		}
-		priv->rx += priv->bits_per_word >> 3;
+		if (priv->rx_end - priv->len)
+			*(u8 *)(priv->rx) = rxw;
+		priv->rx++;
 	}
 }