diff mbox series

[1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic

Message ID 20230502190641.657483-1-shenwei.wang@nxp.com
State Superseded
Headers show
Series [1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic | expand

Commit Message

Shenwei Wang May 2, 2023, 7:06 p.m. UTC
At low baud rates, the DMA transfer may end prematurely due to the timer,
even during an active transfer. This does not accurately simulate an EOP
event as intended. We expect the timer to only complete a DMA transfer
once the idle period satisfies a specified interval.

The patch checks the DMA residue count before copying data to the TTY
buffer. If the residue count remains unchanged since the last interrupt,
that indicates no new data was received. In this case, the DMA should
complete as an EOP event. Instead, the timer restarts.

Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
---
 drivers/tty/serial/fsl_lpuart.c | 52 ++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 4 deletions(-)

Comments

Ilpo Järvinen May 3, 2023, 9:03 a.m. UTC | #1
On Tue, 2 May 2023, Shenwei Wang wrote:

> At low baud rates, the DMA transfer may end prematurely due to the timer,
> even during an active transfer. This does not accurately simulate an EOP
> event as intended. We expect the timer to only complete a DMA transfer
> once the idle period satisfies a specified interval.
> 
> The patch checks the DMA residue count before copying data to the TTY
> buffer. If the residue count remains unchanged since the last interrupt,
> that indicates no new data was received. In this case, the DMA should
> complete as an EOP event. Instead, the timer restarts.

This description is lacking something. It does not explain why the stuff 
in second paragraph is necessary at all as setting a longer timer based on
the (lower) baud rate would avoid the need to do the timer restart.

> Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
> ---
>  drivers/tty/serial/fsl_lpuart.c | 52 ++++++++++++++++++++++++++++++---
>  1 file changed, 48 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
> index c91916e13648..8d21351fb3bd 100644
> --- a/drivers/tty/serial/fsl_lpuart.c
> +++ b/drivers/tty/serial/fsl_lpuart.c
> @@ -238,6 +238,7 @@
>  
>  /* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
>  #define DMA_RX_TIMEOUT		(10)
> +#define DMA_RX_IDLE_CHARS	(8)
>  #define UART_AUTOSUSPEND_TIMEOUT	3000
>  
>  #define DRIVER_NAME	"fsl-lpuart"
> @@ -282,6 +283,7 @@ struct lpuart_port {
>  	struct scatterlist	rx_sgl, tx_sgl[2];
>  	struct circ_buf		rx_ring;
>  	int			rx_dma_rng_buf_len;
> +	int                     last_residue;
>  	unsigned int		dma_tx_nents;
>  	wait_queue_head_t	dma_wait;
>  	bool			is_cs7; /* Set to true when character size is 7 */
> @@ -331,7 +333,7 @@ static struct lpuart_soc_data imx8qxp_data = {
>  	.devtype = IMX8QXP_LPUART,
>  	.iotype = UPIO_MEM32,
>  	.reg_off = IMX_REG_OFF,
> -	.rx_watermark = 31,
> +	.rx_watermark = 8, /* A lower watermark is ideal for low baud rates. */
>  };
>  static struct lpuart_soc_data imxrt1050_data = {
>  	.devtype = IMXRT1050_LPUART,
> @@ -1255,6 +1257,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
>  		sport->port.icount.rx += copied;
>  	}
>  
> +	sport->last_residue = state.residue;
> +
>  exit:
>  	dma_sync_sg_for_device(chan->device->dev, &sport->rx_sgl, 1,
>  			       DMA_FROM_DEVICE);
> @@ -1272,11 +1276,40 @@ static void lpuart_dma_rx_complete(void *arg)
>  	lpuart_copy_rx_to_tty(sport);
>  }
>  
> +/*
> + * Timer function to simulate the hardware EOP(End Of Package) event.

Missing space

> + * The timer callback is to check for new RX data and copy to TTY buffer.
> + * If no new data since last interrupt, restart timer. Otherwise, copy data
> + * and continue normal logic.
> + */
>  static void lpuart_timer_func(struct timer_list *t)
>  {
>  	struct lpuart_port *sport = from_timer(sport, t, lpuart_timer);
> +	struct dma_chan *chan = sport->dma_rx_chan;
> +	struct circ_buf *ring = &sport->rx_ring;
> +	struct dma_tx_state state;
> +	unsigned long flags;
> +	int count = 0;
>  
> -	lpuart_copy_rx_to_tty(sport);
> +	dmaengine_tx_status(chan, sport->dma_rx_cookie, &state);

> +	ring->head = sport->rx_sgl.length - state.residue;
> +
> +	if (ring->head < ring->tail)
> +		count = sport->rx_sgl.length - ring->tail;
> +	else if (ring->tail < ring->head)
> +		count = ring->head - ring->tail;

linux/circ_buf.h has functions which likely handle what you want to do 
here. They will get you true count across wrap too which this above does 
not do.

Given this is essentially duplicates count calculation some refactor 
would seem more useful here rather than recalculating the count again in 
lpuart_copy_rx_to_tty().

Also lpuart_handle_sysrq() duplicates the same calculations.

> +
> +	/* Check if new data received before copying */
> +	if ((count != 0) && (sport->last_residue == state.residue))

I'm unsure about this condition being right.

What will happen when rx_sgl.length (or -1 of that, I'm not sure which way 
"full size" is here) worth of data has been DMA'ed. Does this condition 
end up delaying copy such that it's done only on every other call here?

Also, should you reset last_residue in lpuart_start_rx_dma() ? I think 
that would solve the "full size" problem.

> +		lpuart_copy_rx_to_tty(sport);
> +	else
> +		mod_timer(&sport->lpuart_timer,
> +				jiffies + sport->dma_rx_timeout);
> +
> +	if (spin_trylock_irqsave(&sport->port.lock, flags)) {
> +		sport->last_residue = state.residue;
> +		spin_unlock_irqrestore(&sport->port.lock, flags);
> +	}
>  }
>  
>  static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
> @@ -1297,9 +1330,19 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
>  	 */
>  	sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud /  bits / 1000) * 2;
>  	sport->rx_dma_rng_buf_len = (1 << fls(sport->rx_dma_rng_buf_len));
> +	if (sport->rx_dma_rng_buf_len < sport->rxfifo_size * 2)
> +		sport->rx_dma_rng_buf_len = sport->rxfifo_size * 2;

max_t()

> +
> +	/*
> +	 * Keep this condition check in case rxfifo_size is unavailable
> +	 * for some SoCs.
> +	 */
>  	if (sport->rx_dma_rng_buf_len < 16)
>  		sport->rx_dma_rng_buf_len = 16;
>  
> +	sport->dma_rx_timeout =
> +		msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud + 1);

There's ->frame_time these days in uart_port which you should base frame 
timing related calculations. I wouldn't mind if that existing ->frame_time 
math that is visible in your patch's context is also converted (in a 
separate patch).

I'm assuming that magic 10 is assumed number of bits and 1000 
MSEC_PER_SEC. That +1 seems odd, did you mean DIV_ROUND_UP() ?

> +
>  	ring->buf = kzalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
>  	if (!ring->buf)
>  		return -ENOMEM;
> @@ -1687,12 +1730,13 @@ static void lpuart_rx_dma_startup(struct lpuart_port *sport)
>  	if (!sport->dma_rx_chan)
>  		goto err;
>  
> +	/* set default Rx DMA timeout */
> +	sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> +
>  	ret = lpuart_start_rx_dma(sport);
>  	if (ret)
>  		goto err;
>  
> -	/* set Rx DMA timeout */
> -	sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
>  	if (!sport->dma_rx_timeout)
>  		sport->dma_rx_timeout = 1;
>  
>
Shenwei Wang May 3, 2023, 6:38 p.m. UTC | #2
> -----Original Message-----
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Sent: Wednesday, May 3, 2023 4:03 AM
> To: Shenwei Wang <shenwei.wang@nxp.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>; Jiri Slaby
> <jirislaby@kernel.org>; linux-serial <linux-serial@vger.kernel.org>;
> imx@lists.linux.dev; dl-linux-imx <linux-imx@nxp.com>
> Subject: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based
> EOP logic
> 
> Caution: This is an external email. Please take care when clicking links or
> opening attachments. When in doubt, report the message using the 'Report this
> email' button
> 
> 
> On Tue, 2 May 2023, Shenwei Wang wrote:
> 
> > At low baud rates, the DMA transfer may end prematurely due to the
> > timer, even during an active transfer. This does not accurately
> > simulate an EOP event as intended. We expect the timer to only
> > complete a DMA transfer once the idle period satisfies a specified interval.
> >
> > The patch checks the DMA residue count before copying data to the TTY
> > buffer. If the residue count remains unchanged since the last
> > interrupt, that indicates no new data was received. In this case, the
> > DMA should complete as an EOP event. Instead, the timer restarts.
> 
> This description is lacking something. It does not explain why the stuff in second
> paragraph is necessary at all as setting a longer timer based on the (lower) baud
> rate would avoid the need to do the timer restart.
> 

Agree. Would add the following to the last sentence: "if no new characters are received, the timer just restarts". 

> > Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
> > ---
> >  drivers/tty/serial/fsl_lpuart.c | 52
> > ++++++++++++++++++++++++++++++---
> >  1 file changed, 48 insertions(+), 4 deletions(-)
> >  }
> >
> > +/*
> > + * Timer function to simulate the hardware EOP(End Of Package) event.
> 
> Missing space
> 

Will fix it in next version.

> > + * The timer callback is to check for new RX data and copy to TTY buffer.
> > + * If no new data since last interrupt, restart timer. Otherwise,
> > + copy data
> > + * and continue normal logic.
> > + */
> >  static void lpuart_timer_func(struct timer_list *t)  {
> >       struct lpuart_port *sport = from_timer(sport, t, lpuart_timer);
> > +     struct dma_chan *chan = sport->dma_rx_chan;
> > +     struct circ_buf *ring = &sport->rx_ring;
> > +     struct dma_tx_state state;
> > +     unsigned long flags;
> > +     int count = 0;
> >
> > -     lpuart_copy_rx_to_tty(sport);
> > +     dmaengine_tx_status(chan, sport->dma_rx_cookie, &state);
> 
> > +     ring->head = sport->rx_sgl.length - state.residue;
> > +
> > +     if (ring->head < ring->tail)
> > +             count = sport->rx_sgl.length - ring->tail;
> > +     else if (ring->tail < ring->head)
> > +             count = ring->head - ring->tail;
> 
> linux/circ_buf.h has functions which likely handle what you want to do here.
> They will get you true count across wrap too which this above does not do.
> 
> Given this is essentially duplicates count calculation some refactor would seem
> more useful here rather than recalculating the count again in
> lpuart_copy_rx_to_tty().
> 
> Also lpuart_handle_sysrq() duplicates the same calculations.
> 

Just had a check with the circ_buf.h, and this piece of codes can be simplified
with the CIRC_CNT() function.

The other part you mentioned should be optimized as well. I will submit a separate
patch to do that after finishing this one.

> > +
> > +     /* Check if new data received before copying */
> > +     if ((count != 0) && (sport->last_residue == state.residue))
> 
> I'm unsure about this condition being right.
> 
> What will happen when rx_sgl.length (or -1 of that, I'm not sure which way "full
> size" is here) worth of data has been DMA'ed. Does this condition end up
> delaying copy such that it's done only on every other call here?
> 

The timer function will only complete a DMA transfer when there is new data in the buffer 
and the data has been idle for the specified interval. 

The "full buffer" situation should be handled by the DMA completion callback itself.  A "full" buffer 
means the DMA transaction has received sufficient data and invoked the completion callback.

> Also, should you reset last_residue in lpuart_start_rx_dma() ? I think that would
> solve the "full size" problem.
> 
> > +             lpuart_copy_rx_to_tty(sport);
> > +     else
> > +             mod_timer(&sport->lpuart_timer,
> > +                             jiffies + sport->dma_rx_timeout);
> > +
> > +     if (spin_trylock_irqsave(&sport->port.lock, flags)) {
> > +             sport->last_residue = state.residue;
> > +             spin_unlock_irqrestore(&sport->port.lock, flags);
> > +     }
> >  }
> >
> >  static inline int lpuart_start_rx_dma(struct lpuart_port *sport) @@
> > -1297,9 +1330,19 @@ static inline int lpuart_start_rx_dma(struct lpuart_port
> *sport)
> >        */
> >       sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud /  bits / 1000) * 2;
> >       sport->rx_dma_rng_buf_len = (1 <<
> > fls(sport->rx_dma_rng_buf_len));
> > +     if (sport->rx_dma_rng_buf_len < sport->rxfifo_size * 2)
> > +             sport->rx_dma_rng_buf_len = sport->rxfifo_size * 2;
> 
> max_t()
> 
> > +
> > +     /*
> > +      * Keep this condition check in case rxfifo_size is unavailable
> > +      * for some SoCs.
> > +      */
> >       if (sport->rx_dma_rng_buf_len < 16)
> >               sport->rx_dma_rng_buf_len = 16;
> >
> > +     sport->dma_rx_timeout =
> > +             msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud
> > + + 1);
> 
> There's ->frame_time these days in uart_port which you should base frame
> timing related calculations. I wouldn't mind if that existing ->frame_time math
> that is visible in your patch's context is also converted (in a separate patch).
> 
> I'm assuming that magic 10 is assumed number of bits and 1000 MSEC_PER_SEC.
> That +1 seems odd, did you mean DIV_ROUND_UP() ?

Yes, it is 10 bits and 1000 ms. +1 here is similar to the result of round up. 
And also the ->frame_time could be used for simplicity.

> 
> > +
> >       ring->buf = kzalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
> >       if (!ring->buf)
> >               return -ENOMEM;
> > @@ -1687,12 +1730,13 @@ static void lpuart_rx_dma_startup(struct
> lpuart_port *sport)
> >       if (!sport->dma_rx_chan)
> >               goto err;
> >
> > +     /* set default Rx DMA timeout */
> > +     sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> > +
> >       ret = lpuart_start_rx_dma(sport);
> >       if (ret)
> >               goto err;
> >
> > -     /* set Rx DMA timeout */
> > -     sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> >       if (!sport->dma_rx_timeout)
> >               sport->dma_rx_timeout = 1;
> >
> >
> 
> --
>  i.
Shenwei Wang May 4, 2023, 2:46 p.m. UTC | #3
> -----Original Message-----
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Sent: Thursday, May 4, 2023 8:02 AM
> To: Shenwei Wang <shenwei.wang@nxp.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>; Jiri Slaby
> <jirislaby@kernel.org>; linux-serial <linux-serial@vger.kernel.org>;
> imx@lists.linux.dev; dl-linux-imx <linux-imx@nxp.com>
> Subject: RE: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer
> based EOP logic
> 
> Caution: This is an external email. Please take care when clicking links or
> opening attachments. When in doubt, report the message using the 'Report this
> email' button
> 
> 
> On Wed, 3 May 2023, Shenwei Wang wrote:
> > From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > > On Tue, 2 May 2023, Shenwei Wang wrote:
> > >
> > > > At low baud rates, the DMA transfer may end prematurely due to the
> > > > timer, even during an active transfer. This does not accurately
> > > > simulate an EOP event as intended. We expect the timer to only
> > > > complete a DMA transfer once the idle period satisfies a specified interval.
> > > >
> > > > The patch checks the DMA residue count before copying data to the
> > > > TTY buffer. If the residue count remains unchanged since the last
> > > > interrupt, that indicates no new data was received. In this case,
> > > > the DMA should complete as an EOP event. Instead, the timer restarts.
> > >
> > > This description is lacking something. It does not explain why the
> > > stuff in second paragraph is necessary at all as setting a longer
> > > timer based on the (lower) baud rate would avoid the need to do the timer
> restart.
> > >
> >
> > Agree. Would add the following to the last sentence: "if no new
> > characters are received, the timer just restarts".
> 
> That, however, is unfortunately not the case I was interested in here. The code
> does restart the timer if new characters _were received_ (residue changed), no?
> So my request for clarification to the changelong still stands, why is rearming
> the timer necessary instead of simply setting a longer timeout right from the
> start?
> 

Once new characters are received within the timer interval, the "residue" will 
be updated by the DMA driver, but the DMA transfer will only complete once it receives 
the specified number of characters. The purpose of this patch is to prevent the long delay 
of the DMA transfer when it cannot receive a sufficient number of characters.

> (In the first paragraph, you stated the problem is about timer triggering
> prematurely with low baud rates.)
> 
> This is not to say that the new approach is wrong but the changelog fails to
> explain all facets of what is wrong with the old approach adequately.
>

The timer is used here to simulate the EOP behavior. This means that when a new character is received  
within the interval, the timer needs restart. If no new character is received within the specified interval, 
the timer callback will complete the DMA transfer as if an EOP event occurred.
The old approach did not exhibit this expected behavior. It would complete the DMA transfer at the fixed 
interval regardless of whether new characters were received or not. This incorrect behavior can be easily 
observed at low baud rates, but it also for high baud rates.   

> > Just had a check with the circ_buf.h, and this piece of codes can be
> > simplified with the CIRC_CNT() function.
> >
> > The other part you mentioned should be optimized as well. I will
> > submit a separate patch to do that after finishing this one.
> 
> Okay. You might also find CIRC_CNT_TO_END() useful in those inner functions
> to calculate the length before the wrap.
> 
> > > > +
> > > > +     /* Check if new data received before copying */
> > > > +     if ((count != 0) && (sport->last_residue == state.residue))
> > >
> > > I'm unsure about this condition being right.
> > >
> > > What will happen when rx_sgl.length (or -1 of that, I'm not sure
> > > which way "full size" is here) worth of data has been DMA'ed. Does
> > > this condition end up delaying copy such that it's done only on every other
> call here?
> > >
> >
> > The timer function will only complete a DMA transfer when there is new
> > data in the buffer and the data has been idle for the specified interval.
> >
> > The "full buffer" situation should be handled by the DMA completion
> > callback itself.  A "full" buffer means the DMA transaction has received
> sufficient data and invoked the completion callback.
> >
> > > Also, should you reset last_residue in lpuart_start_rx_dma() ? I
> > > think that would solve the "full size" problem.
> 
> What about this part? If the transfer always does n chars, the left over residue
> can match spuriously for the new transfer and trigger the copy because last and
> current residue happen to match (kinda by chance but could be simply due to
> repetitive transfer pattern)?
> 

Yes, there is a chance. Will reset it when start_rx_dma.

Thanks,
Shenwei

> > > > +     sport->dma_rx_timeout =
> > > > +             msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) /
> > > > + baud
> > > > + + 1);
> > >
> > > There's ->frame_time these days in uart_port which you should base
> > > frame timing related calculations. I wouldn't mind if that existing
> > > ->frame_time math that is visible in your patch's context is also converted (in
> a separate patch).
> > >
> > > I'm assuming that magic 10 is assumed number of bits and 1000
> MSEC_PER_SEC.
> > > That +1 seems odd, did you mean DIV_ROUND_UP() ?
> >
> > Yes, it is 10 bits and 1000 ms. +1 here is similar to the result of round up.
> > And also the ->frame_time could be used for simplicity.
> 
> Yes, please use ->frame_time. I added it exactly to allow this kind of calculations
> to be easily based on actual frame timing (the other questions were just to
> gauge if I understood right what is behind your math).
> 
> 
> --
>  i.
diff mbox series

Patch

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index c91916e13648..8d21351fb3bd 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -238,6 +238,7 @@ 
 
 /* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
 #define DMA_RX_TIMEOUT		(10)
+#define DMA_RX_IDLE_CHARS	(8)
 #define UART_AUTOSUSPEND_TIMEOUT	3000
 
 #define DRIVER_NAME	"fsl-lpuart"
@@ -282,6 +283,7 @@  struct lpuart_port {
 	struct scatterlist	rx_sgl, tx_sgl[2];
 	struct circ_buf		rx_ring;
 	int			rx_dma_rng_buf_len;
+	int                     last_residue;
 	unsigned int		dma_tx_nents;
 	wait_queue_head_t	dma_wait;
 	bool			is_cs7; /* Set to true when character size is 7 */
@@ -331,7 +333,7 @@  static struct lpuart_soc_data imx8qxp_data = {
 	.devtype = IMX8QXP_LPUART,
 	.iotype = UPIO_MEM32,
 	.reg_off = IMX_REG_OFF,
-	.rx_watermark = 31,
+	.rx_watermark = 8, /* A lower watermark is ideal for low baud rates. */
 };
 static struct lpuart_soc_data imxrt1050_data = {
 	.devtype = IMXRT1050_LPUART,
@@ -1255,6 +1257,8 @@  static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
 		sport->port.icount.rx += copied;
 	}
 
+	sport->last_residue = state.residue;
+
 exit:
 	dma_sync_sg_for_device(chan->device->dev, &sport->rx_sgl, 1,
 			       DMA_FROM_DEVICE);
@@ -1272,11 +1276,40 @@  static void lpuart_dma_rx_complete(void *arg)
 	lpuart_copy_rx_to_tty(sport);
 }
 
+/*
+ * Timer function to simulate the hardware EOP(End Of Package) event.
+ * The timer callback is to check for new RX data and copy to TTY buffer.
+ * If no new data since last interrupt, restart timer. Otherwise, copy data
+ * and continue normal logic.
+ */
 static void lpuart_timer_func(struct timer_list *t)
 {
 	struct lpuart_port *sport = from_timer(sport, t, lpuart_timer);
+	struct dma_chan *chan = sport->dma_rx_chan;
+	struct circ_buf *ring = &sport->rx_ring;
+	struct dma_tx_state state;
+	unsigned long flags;
+	int count = 0;
 
-	lpuart_copy_rx_to_tty(sport);
+	dmaengine_tx_status(chan, sport->dma_rx_cookie, &state);
+	ring->head = sport->rx_sgl.length - state.residue;
+
+	if (ring->head < ring->tail)
+		count = sport->rx_sgl.length - ring->tail;
+	else if (ring->tail < ring->head)
+		count = ring->head - ring->tail;
+
+	/* Check if new data received before copying */
+	if ((count != 0) && (sport->last_residue == state.residue))
+		lpuart_copy_rx_to_tty(sport);
+	else
+		mod_timer(&sport->lpuart_timer,
+				jiffies + sport->dma_rx_timeout);
+
+	if (spin_trylock_irqsave(&sport->port.lock, flags)) {
+		sport->last_residue = state.residue;
+		spin_unlock_irqrestore(&sport->port.lock, flags);
+	}
 }
 
 static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
@@ -1297,9 +1330,19 @@  static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
 	 */
 	sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud /  bits / 1000) * 2;
 	sport->rx_dma_rng_buf_len = (1 << fls(sport->rx_dma_rng_buf_len));
+	if (sport->rx_dma_rng_buf_len < sport->rxfifo_size * 2)
+		sport->rx_dma_rng_buf_len = sport->rxfifo_size * 2;
+
+	/*
+	 * Keep this condition check in case rxfifo_size is unavailable
+	 * for some SoCs.
+	 */
 	if (sport->rx_dma_rng_buf_len < 16)
 		sport->rx_dma_rng_buf_len = 16;
 
+	sport->dma_rx_timeout =
+		msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud + 1);
+
 	ring->buf = kzalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
 	if (!ring->buf)
 		return -ENOMEM;
@@ -1687,12 +1730,13 @@  static void lpuart_rx_dma_startup(struct lpuart_port *sport)
 	if (!sport->dma_rx_chan)
 		goto err;
 
+	/* set default Rx DMA timeout */
+	sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
+
 	ret = lpuart_start_rx_dma(sport);
 	if (ret)
 		goto err;
 
-	/* set Rx DMA timeout */
-	sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
 	if (!sport->dma_rx_timeout)
 		sport->dma_rx_timeout = 1;