diff mbox series

[1/8] serial: qcom-geni: fix fifo polling timeout

Message ID 20240902152451.862-2-johan+linaro@kernel.org
State New
Headers show
Series [1/8] serial: qcom-geni: fix fifo polling timeout | expand

Commit Message

Johan Hovold Sept. 2, 2024, 3:24 p.m. UTC
The qcom_geni_serial_poll_bit() can be used to wait for events like
command completion and is supposed to wait for the time it takes to
clear a full fifo before timing out.

As noted by Doug, the current implementation does not account for start,
stop and parity bits when determining the timeout. The helper also does
not currently account for the shift register and the two-word
intermediate transfer register.

Instead of determining the fifo timeout on every call, store the timeout
when updating it in set_termios() and wait for up to 19/16 the time it
takes to clear the 16 word fifo to account for the shift and
intermediate registers. Note that serial core has already added a 20 ms
margin to the fifo timeout.

Also note that the current uart_fifo_timeout() interface does
unnecessary calculations on every call and also did not exists in
earlier kernels so only store its result once. This also facilitates
backports as earlier kernels can derive the timeout from uport->timeout,
which has since been removed.

Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Cc: stable@vger.kernel.org	# 4.17
Reported-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

Comments

Doug Anderson Sept. 4, 2024, 9:50 p.m. UTC | #1
Hi,

On Mon, Sep 2, 2024 at 8:26 AM Johan Hovold <johan+linaro@kernel.org> wrote:
>
> The qcom_geni_serial_poll_bit() can be used to wait for events like
> command completion and is supposed to wait for the time it takes to
> clear a full fifo before timing out.
>
> As noted by Doug, the current implementation does not account for start,
> stop and parity bits when determining the timeout. The helper also does
> not currently account for the shift register and the two-word
> intermediate transfer register.
>
> Instead of determining the fifo timeout on every call, store the timeout
> when updating it in set_termios() and wait for up to 19/16 the time it
> takes to clear the 16 word fifo to account for the shift and
> intermediate registers. Note that serial core has already added a 20 ms
> margin to the fifo timeout.
>
> Also note that the current uart_fifo_timeout() interface does
> unnecessary calculations on every call and also did not exists in
> earlier kernels so only store its result once. This also facilitates
> backports as earlier kernels can derive the timeout from uport->timeout,
> which has since been removed.
>
> Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
> Cc: stable@vger.kernel.org      # 4.17
> Reported-by: Douglas Anderson <dianders@chromium.org>
> Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
> ---
>  drivers/tty/serial/qcom_geni_serial.c | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> index 69a632fefc41..e1926124339d 100644
> --- a/drivers/tty/serial/qcom_geni_serial.c
> +++ b/drivers/tty/serial/qcom_geni_serial.c
> @@ -124,7 +124,7 @@ struct qcom_geni_serial_port {
>         dma_addr_t tx_dma_addr;
>         dma_addr_t rx_dma_addr;
>         bool setup;
> -       unsigned int baud;
> +       unsigned long fifo_timeout_us;
>         unsigned long clk_rate;
>         void *rx_buf;
>         u32 loopback;
> @@ -270,22 +270,21 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
>  {
>         u32 reg;
>         struct qcom_geni_serial_port *port;
> -       unsigned int baud;
> -       unsigned int fifo_bits;
>         unsigned long timeout_us = 20000;
>         struct qcom_geni_private_data *private_data = uport->private_data;
>
>         if (private_data->drv) {
>                 port = to_dev_port(uport);
> -               baud = port->baud;
> -               if (!baud)
> -                       baud = 115200;
> -               fifo_bits = port->tx_fifo_depth * port->tx_fifo_width;
> +
>                 /*
> -                * Total polling iterations based on FIFO worth of bytes to be
> -                * sent at current baud. Add a little fluff to the wait.
> +                * Wait up to 19/16 the time it would take to clear a full
> +                * FIFO, which accounts for the three words in the shift and
> +                * intermediate registers.
> +                *
> +                * Note that fifo_timeout_us already has a 20 ms margin.
>                  */
> -               timeout_us = ((fifo_bits * USEC_PER_SEC) / baud) + 500;
> +               if (port->fifo_timeout_us)
> +                       timeout_us = 19 * port->fifo_timeout_us / 16;

It made me giggle a bit that part of the justification for caching
"fifo_timeout_us" was to avoid calculations each time through the
function. ...but then the code does the "19/16" math here instead of
just including it in the cache. ;-) ;-) ;-)

That being said, I'm not really a fan of the "19 / 16" anyway. The 16
value is calculated elsewhere in the code as:

port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se);
port->tx_fifo_width = geni_se_get_tx_fifo_width(&port->se);
port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se);
uport->fifosize =
  (port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE;

...and here you're just hardcoding it to 16. Then there's also the
fact that the "19 / 16" will also multiply the 20 ms "slop" added by
uart_fifo_timeout() which doesn't seem ideal.

How about this: we just change "uport->fifosize" to account for the 3
extra words? So it can be:

((port->tx_fifo_depth + 3) * port->tx_fifo_width) / BITS_PER_BYTE;

...then the cache will be correct and everything will work out. What
do you think?

-Doug
Johan Hovold Sept. 5, 2024, 8:48 a.m. UTC | #2
On Wed, Sep 04, 2024 at 02:50:57PM -0700, Doug Anderson wrote:
> On Mon, Sep 2, 2024 at 8:26 AM Johan Hovold <johan+linaro@kernel.org> wrote:
> >
> > The qcom_geni_serial_poll_bit() can be used to wait for events like
> > command completion and is supposed to wait for the time it takes to
> > clear a full fifo before timing out.
> >
> > As noted by Doug, the current implementation does not account for start,
> > stop and parity bits when determining the timeout. The helper also does
> > not currently account for the shift register and the two-word
> > intermediate transfer register.
> >
> > Instead of determining the fifo timeout on every call, store the timeout
> > when updating it in set_termios() and wait for up to 19/16 the time it
> > takes to clear the 16 word fifo to account for the shift and
> > intermediate registers. Note that serial core has already added a 20 ms
> > margin to the fifo timeout.
> >
> > Also note that the current uart_fifo_timeout() interface does
> > unnecessary calculations on every call and also did not exists in
> > earlier kernels so only store its result once. This also facilitates
> > backports as earlier kernels can derive the timeout from uport->timeout,
> > which has since been removed.

> > @@ -270,22 +270,21 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
> >  {
> >         u32 reg;
> >         struct qcom_geni_serial_port *port;
> > -       unsigned int baud;
> > -       unsigned int fifo_bits;
> >         unsigned long timeout_us = 20000;
> >         struct qcom_geni_private_data *private_data = uport->private_data;
> >
> >         if (private_data->drv) {
> >                 port = to_dev_port(uport);
> > -               baud = port->baud;
> > -               if (!baud)
> > -                       baud = 115200;
> > -               fifo_bits = port->tx_fifo_depth * port->tx_fifo_width;
> > +
> >                 /*
> > -                * Total polling iterations based on FIFO worth of bytes to be
> > -                * sent at current baud. Add a little fluff to the wait.
> > +                * Wait up to 19/16 the time it would take to clear a full
> > +                * FIFO, which accounts for the three words in the shift and
> > +                * intermediate registers.
> > +                *
> > +                * Note that fifo_timeout_us already has a 20 ms margin.
> >                  */
> > -               timeout_us = ((fifo_bits * USEC_PER_SEC) / baud) + 500;
> > +               if (port->fifo_timeout_us)
> > +                       timeout_us = 19 * port->fifo_timeout_us / 16;
> 
> It made me giggle a bit that part of the justification for caching
> "fifo_timeout_us" was to avoid calculations each time through the
> function. ...but then the code does the "19/16" math here instead of
> just including it in the cache. ;-) ;-) ;-)

Heh, yeah, but I was really talking about uart_fifo_timeout() doing
unnecessary calculations on each call (and that value used to be
calculated once and stored for later use).

I also realised that we need to account for the intermediate register
after I wrote the initial commit message, and before that this was just
a shift and add.

> That being said, I'm not really a fan of the "19 / 16" anyway. The 16
> value is calculated elsewhere in the code as:
> 
> port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se);
> port->tx_fifo_width = geni_se_get_tx_fifo_width(&port->se);
> port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se);
> uport->fifosize =
>   (port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE;
> 
> ...and here you're just hardcoding it to 16. Then there's also the
> fact that the "19 / 16" will also multiply the 20 ms "slop" added by
> uart_fifo_timeout() which doesn't seem ideal.

Indeed, and the early console code also hardcodes this to 16.

I don't care about the slop being 20 ms or 23.5, this is just a timeout
for the error case.

This will over count a bit if there is uart hw with 256 B fifos, but
could potentially undercount if there is hw with less than 16 words. I'm
not sure if such hw exists, but I'll see what I can find out.

> How about this: we just change "uport->fifosize" to account for the 3
> extra words? So it can be:
> 
> ((port->tx_fifo_depth + 3) * port->tx_fifo_width) / BITS_PER_BYTE;
> 
> ...then the cache will be correct and everything will work out. What
> do you think?

I don't think uart_fifo_timeout traditionally accounts for the shift
register and we wait up to *twice* the time it takes to clear to fifo
anyway (in wait_until_sent). The intermediate register I found here
could perhaps be considered part of the fifo however.

I'll give this some more thought.

Johan
Johan Hovold Sept. 6, 2024, 1:23 p.m. UTC | #3
On Thu, Sep 05, 2024 at 10:48:13AM +0200, Johan Hovold wrote:
> On Wed, Sep 04, 2024 at 02:50:57PM -0700, Doug Anderson wrote:

> > How about this: we just change "uport->fifosize" to account for the 3
> > extra words? So it can be:
> > 
> > ((port->tx_fifo_depth + 3) * port->tx_fifo_width) / BITS_PER_BYTE;
> > 
> > ...then the cache will be correct and everything will work out. What
> > do you think?
> 
> I don't think uart_fifo_timeout traditionally accounts for the shift
> register and we wait up to *twice* the time it takes to clear to fifo
> anyway (in wait_until_sent). The intermediate register I found here
> could perhaps be considered part of the fifo however.
> 
> I'll give this some more thought.

I decided to keep the fifo size as-is (e.g. as it is exported to user
space) and only account for the shift and intermediate registers in the
driver. I'm using the fifo size reported by the hardware to determine
the timeout in v2.

Johan
diff mbox series

Patch

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 69a632fefc41..e1926124339d 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -124,7 +124,7 @@  struct qcom_geni_serial_port {
 	dma_addr_t tx_dma_addr;
 	dma_addr_t rx_dma_addr;
 	bool setup;
-	unsigned int baud;
+	unsigned long fifo_timeout_us;
 	unsigned long clk_rate;
 	void *rx_buf;
 	u32 loopback;
@@ -270,22 +270,21 @@  static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
 {
 	u32 reg;
 	struct qcom_geni_serial_port *port;
-	unsigned int baud;
-	unsigned int fifo_bits;
 	unsigned long timeout_us = 20000;
 	struct qcom_geni_private_data *private_data = uport->private_data;
 
 	if (private_data->drv) {
 		port = to_dev_port(uport);
-		baud = port->baud;
-		if (!baud)
-			baud = 115200;
-		fifo_bits = port->tx_fifo_depth * port->tx_fifo_width;
+
 		/*
-		 * Total polling iterations based on FIFO worth of bytes to be
-		 * sent at current baud. Add a little fluff to the wait.
+		 * Wait up to 19/16 the time it would take to clear a full
+		 * FIFO, which accounts for the three words in the shift and
+		 * intermediate registers.
+		 *
+		 * Note that fifo_timeout_us already has a 20 ms margin.
 		 */
-		timeout_us = ((fifo_bits * USEC_PER_SEC) / baud) + 500;
+		if (port->fifo_timeout_us)
+			timeout_us = 19 * port->fifo_timeout_us / 16;
 	}
 
 	/*
@@ -1248,7 +1247,6 @@  static void qcom_geni_serial_set_termios(struct uart_port *uport,
 	qcom_geni_serial_stop_rx(uport);
 	/* baud rate */
 	baud = uart_get_baud_rate(uport, termios, old, 300, 4000000);
-	port->baud = baud;
 
 	sampling_rate = UART_OVERSAMPLING;
 	/* Sampling rate is halved for IP versions >= 2.5 */
@@ -1326,8 +1324,10 @@  static void qcom_geni_serial_set_termios(struct uart_port *uport,
 	else
 		tx_trans_cfg |= UART_CTS_MASK;
 
-	if (baud)
+	if (baud) {
 		uart_update_timeout(uport, termios->c_cflag, baud);
+		port->fifo_timeout_us = jiffies_to_usecs(uart_fifo_timeout(uport));
+	}
 
 	if (!uart_console(uport))
 		writel(port->loopback,