diff mbox series

[v2] drm/msm/dp: do not complete dp_aux_cmd_fifo_tx() if irq is not for aux transfer

Message ID 1671129159-31105-1-git-send-email-quic_khsieh@quicinc.com
State New
Headers show
Series [v2] drm/msm/dp: do not complete dp_aux_cmd_fifo_tx() if irq is not for aux transfer | expand

Commit Message

Kuogee Hsieh Dec. 15, 2022, 6:32 p.m. UTC
There are 3 possible interrupt sources are handled by DP controller,
HPDstatus, Controller state changes and Aux read/write transaction.
At every irq, DP controller have to check isr status of every interrupt
sources and service the interrupt if its isr status bits shows interrupts
are pending. There is potential race condition may happen at current aux
isr handler implementation since it is always complete dp_aux_cmd_fifo_tx()
even irq is not for aux read or write transaction. This may cause aux read
transaction return premature if host aux data read is in the middle of
waiting for sink to complete transferring data to host while irq happen.
This will cause host's receiving buffer contains unexpected data. This
patch fixes this problem by checking aux isr and return immediately at
aux isr handler if there are no any isr status bits set.

Current there is a bug report regrading eDP edid corruption happen during
system booting up. After lengthy debugging to found that VIDEO_READY
interrupt was continuously firing during system booting up which cause
dp_aux_isr() to complete dp_aux_cmd_fifo_tx() prematurely to retrieve data
from aux hardware buffer which is not yet contains complete data transfer
from sink. This cause edid corruption.

Follows are the signature at kernel logs when problem happen,
EDID has corrupt header
panel-simple-dp-aux aux-aea0000.edp: Couldn't identify panel via EDID
panel-simple-dp-aux aux-aea0000.edp: error -EIO: Couldn't detect panel nor find a fallback

Changes in v2:
-- do complete if (ret == IRQ_HANDLED) ay dp-aux_isr()
-- add more commit text

Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support")

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
Tested-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
---
 drivers/gpu/drm/msm/dp/dp_aux.c | 87 +++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 24 deletions(-)

Comments

Dmitry Baryshkov Dec. 15, 2022, 6:46 p.m. UTC | #1
On 15/12/2022 20:32, Kuogee Hsieh wrote:
> There are 3 possible interrupt sources are handled by DP controller,
> HPDstatus, Controller state changes and Aux read/write transaction.
> At every irq, DP controller have to check isr status of every interrupt
> sources and service the interrupt if its isr status bits shows interrupts
> are pending. There is potential race condition may happen at current aux
> isr handler implementation since it is always complete dp_aux_cmd_fifo_tx()
> even irq is not for aux read or write transaction. This may cause aux read
> transaction return premature if host aux data read is in the middle of
> waiting for sink to complete transferring data to host while irq happen.
> This will cause host's receiving buffer contains unexpected data. This
> patch fixes this problem by checking aux isr and return immediately at
> aux isr handler if there are no any isr status bits set.
> 
> Current there is a bug report regrading eDP edid corruption happen during
> system booting up. After lengthy debugging to found that VIDEO_READY
> interrupt was continuously firing during system booting up which cause
> dp_aux_isr() to complete dp_aux_cmd_fifo_tx() prematurely to retrieve data
> from aux hardware buffer which is not yet contains complete data transfer
> from sink. This cause edid corruption.
> 
> Follows are the signature at kernel logs when problem happen,
> EDID has corrupt header
> panel-simple-dp-aux aux-aea0000.edp: Couldn't identify panel via EDID
> panel-simple-dp-aux aux-aea0000.edp: error -EIO: Couldn't detect panel nor find a fallback
> 
> Changes in v2:
> -- do complete if (ret == IRQ_HANDLED) ay dp-aux_isr()
> -- add more commit text

Usually it's a single dash.

> 
> Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support")
> 
> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>

There should be no empty lines between the tags.

> Tested-by: Douglas Anderson <dianders@chromium.org>
> Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
> ---
>   drivers/gpu/drm/msm/dp/dp_aux.c | 87 +++++++++++++++++++++++++++++------------
>   1 file changed, 63 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
> index d030a93..f31e5c1 100644
> --- a/drivers/gpu/drm/msm/dp/dp_aux.c
> +++ b/drivers/gpu/drm/msm/dp/dp_aux.c
> @@ -162,45 +162,78 @@ static ssize_t dp_aux_cmd_fifo_rx(struct dp_aux_private *aux,
>   	return i;
>   }
>   
> -static void dp_aux_native_handler(struct dp_aux_private *aux, u32 isr)
> +static irqreturn_t dp_aux_native_handler(struct dp_aux_private *aux, u32 isr)
>   {
> -	if (isr & DP_INTR_AUX_I2C_DONE)
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	if (isr & DP_INTR_AUX_I2C_DONE) {
>   		aux->aux_error_num = DP_AUX_ERR_NONE;
> -	else if (isr & DP_INTR_WRONG_ADDR)
> +		ret = IRQ_HANDLED;
> +	} else if (isr & DP_INTR_WRONG_ADDR) {
>   		aux->aux_error_num = DP_AUX_ERR_ADDR;
> -	else if (isr & DP_INTR_TIMEOUT)
> +		ret = IRQ_HANDLED;
> +	} else if (isr & DP_INTR_TIMEOUT) {
>   		aux->aux_error_num = DP_AUX_ERR_TOUT;
> -	if (isr & DP_INTR_NACK_DEFER)
> +		ret = IRQ_HANDLED;
> +	}
> +
> +	if (isr & DP_INTR_NACK_DEFER) {
>   		aux->aux_error_num = DP_AUX_ERR_NACK;
> +		ret = IRQ_HANDLED;
> +	}
> +
>   	if (isr & DP_INTR_AUX_ERROR) {
>   		aux->aux_error_num = DP_AUX_ERR_PHY;
>   		dp_catalog_aux_clear_hw_interrupts(aux->catalog);
> +		ret = IRQ_HANDLED;
>   	}
> +
> +	return ret;
>   }
>   
> -static void dp_aux_i2c_handler(struct dp_aux_private *aux, u32 isr)
> +static irqreturn_t dp_aux_i2c_handler(struct dp_aux_private *aux, u32 isr)
>   {
> +	irqreturn_t ret = IRQ_NONE;
> +
>   	if (isr & DP_INTR_AUX_I2C_DONE) {
>   		if (isr & (DP_INTR_I2C_NACK | DP_INTR_I2C_DEFER))
>   			aux->aux_error_num = DP_AUX_ERR_NACK;
>   		else
>   			aux->aux_error_num = DP_AUX_ERR_NONE;
> -	} else {
> -		if (isr & DP_INTR_WRONG_ADDR)
> -			aux->aux_error_num = DP_AUX_ERR_ADDR;
> -		else if (isr & DP_INTR_TIMEOUT)
> -			aux->aux_error_num = DP_AUX_ERR_TOUT;
> -		if (isr & DP_INTR_NACK_DEFER)
> -			aux->aux_error_num = DP_AUX_ERR_NACK_DEFER;
> -		if (isr & DP_INTR_I2C_NACK)
> -			aux->aux_error_num = DP_AUX_ERR_NACK;
> -		if (isr & DP_INTR_I2C_DEFER)
> -			aux->aux_error_num = DP_AUX_ERR_DEFER;
> -		if (isr & DP_INTR_AUX_ERROR) {
> -			aux->aux_error_num = DP_AUX_ERR_PHY;
> -			dp_catalog_aux_clear_hw_interrupts(aux->catalog);
> -		}
> +
> +		return IRQ_HANDLED;
> +	}
> +
> +	if (isr & DP_INTR_WRONG_ADDR) {
> +		aux->aux_error_num = DP_AUX_ERR_ADDR;
> +		ret = IRQ_HANDLED;
> +	} else if (isr & DP_INTR_TIMEOUT) {
> +		aux->aux_error_num = DP_AUX_ERR_TOUT;
> +		ret = IRQ_HANDLED;
>   	}
> +
> +	if (isr & DP_INTR_NACK_DEFER) {
> +		aux->aux_error_num = DP_AUX_ERR_NACK_DEFER;
> +		ret = IRQ_HANDLED;
> +	}
> +
> +	if (isr & DP_INTR_I2C_NACK) {
> +		aux->aux_error_num = DP_AUX_ERR_NACK;
> +		ret = IRQ_HANDLED;
> +	}
> +
> +	if (isr & DP_INTR_I2C_DEFER) {
> +		aux->aux_error_num = DP_AUX_ERR_DEFER;
> +		ret = IRQ_HANDLED;
> +	}
> +
> +	if (isr & DP_INTR_AUX_ERROR) {
> +		aux->aux_error_num = DP_AUX_ERR_PHY;
> +		dp_catalog_aux_clear_hw_interrupts(aux->catalog);
> +		ret = IRQ_HANDLED;
> +	}
> +
> +	return ret;
>   }
>   
>   static void dp_aux_update_offset_and_segment(struct dp_aux_private *aux,
> @@ -413,6 +446,7 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux)
>   {
>   	u32 isr;
>   	struct dp_aux_private *aux;
> +	irqreturn_t ret = IRQ_NONE;

No need to assign a value here. It will be overwritten in both of code 
branches.

>   
>   	if (!dp_aux) {
>   		DRM_ERROR("invalid input\n");
> @@ -423,15 +457,20 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux)
>   
>   	isr = dp_catalog_aux_get_irq(aux->catalog);
>   
> +	/* no interrupts pending, return immediately */
> +	if (!isr)
> +		return;
> +

A separate commit please.

>   	if (!aux->cmd_busy)
>   		return;
>   
>   	if (aux->native)
> -		dp_aux_native_handler(aux, isr);
> +		ret = dp_aux_native_handler(aux, isr);
>   	else
> -		dp_aux_i2c_handler(aux, isr);
> +		ret = dp_aux_i2c_handler(aux, isr);
>   
> -	complete(&aux->comp);
> +	if (ret == IRQ_HANDLED)
> +		complete(&aux->comp);

Can you just move the complete() into the individual handling functions? 
Then you won't have to return the error code from dp_aux_*_handler() at 
all. You can check `isr' in that function and call complete if there was 
any error.

Also could you please describe, why is it necessary to complete() 
condition at all? Judging from your commit message the `if (!isr) 
return;' part should be enough.

>   }
>   
>   void dp_aux_reconfig(struct drm_dp_aux *dp_aux)
Stephen Boyd Dec. 15, 2022, 8:10 p.m. UTC | #2
Quoting Dmitry Baryshkov (2022-12-15 10:46:42)
> On 15/12/2022 20:32, Kuogee Hsieh wrote:
> >       if (!aux->cmd_busy)
> >               return;
> >
> >       if (aux->native)
> > -             dp_aux_native_handler(aux, isr);
> > +             ret = dp_aux_native_handler(aux, isr);
> >       else
> > -             dp_aux_i2c_handler(aux, isr);
> > +             ret = dp_aux_i2c_handler(aux, isr);
> >
> > -     complete(&aux->comp);
> > +     if (ret == IRQ_HANDLED)
> > +             complete(&aux->comp);
>
> Can you just move the complete() into the individual handling functions?
> Then you won't have to return the error code from dp_aux_*_handler() at
> all. You can check `isr' in that function and call complete if there was
> any error.

I'd prefer we apply my patch and pass the irqreturn_t variable to the
caller of this function so spurious irqs are shutdown. Should I send it
as a proper patch?
Kuogee Hsieh Dec. 15, 2022, 8:12 p.m. UTC | #3
On 12/15/2022 12:10 PM, Stephen Boyd wrote:
> Quoting Dmitry Baryshkov (2022-12-15 10:46:42)
>> On 15/12/2022 20:32, Kuogee Hsieh wrote:
>>>        if (!aux->cmd_busy)
>>>                return;
>>>
>>>        if (aux->native)
>>> -             dp_aux_native_handler(aux, isr);
>>> +             ret = dp_aux_native_handler(aux, isr);
>>>        else
>>> -             dp_aux_i2c_handler(aux, isr);
>>> +             ret = dp_aux_i2c_handler(aux, isr);
>>>
>>> -     complete(&aux->comp);
>>> +     if (ret == IRQ_HANDLED)
>>> +             complete(&aux->comp);
>> Can you just move the complete() into the individual handling functions?
>> Then you won't have to return the error code from dp_aux_*_handler() at
>> all. You can check `isr' in that function and call complete if there was
>> any error.
> I'd prefer we apply my patch and pass the irqreturn_t variable to the
> caller of this function so spurious irqs are shutdown. Should I send it
> as a proper patch?
yes, please
Kuogee Hsieh Dec. 15, 2022, 9:23 p.m. UTC | #4
On 12/15/2022 1:15 PM, Dmitry Baryshkov wrote:
> On 15/12/2022 22:10, Stephen Boyd wrote:
>> Quoting Dmitry Baryshkov (2022-12-15 10:46:42)
>>> On 15/12/2022 20:32, Kuogee Hsieh wrote:
>>>>        if (!aux->cmd_busy)
>>>>                return;
>>>>
>>>>        if (aux->native)
>>>> -             dp_aux_native_handler(aux, isr);
>>>> +             ret = dp_aux_native_handler(aux, isr);
>>>>        else
>>>> -             dp_aux_i2c_handler(aux, isr);
>>>> +             ret = dp_aux_i2c_handler(aux, isr);
>>>>
>>>> -     complete(&aux->comp);
>>>> +     if (ret == IRQ_HANDLED)
>>>> +             complete(&aux->comp);
>>>
>>> Can you just move the complete() into the individual handling 
>>> functions?
>>> Then you won't have to return the error code from dp_aux_*_handler() at
>>> all. You can check `isr' in that function and call complete if there 
>>> was
>>> any error.
>>
>> I'd prefer we apply my patch and pass the irqreturn_t variable to the
>> caller of this function so spurious irqs are shutdown. Should I send it
>> as a proper patch?
>
> I'm for handling the spurious IRQs in a proper way. However I believe 
> that it's not related to the issue Kuogee is trying to fix.
>
> Thus I think we should have two separate patches: one fixing the EDID 
> corruption issue (for which the proper fix is !isr check, IIUC) and 
> the irqreturn_t. And for the irqreturn_t it might be beneficial to 
> move complete() call to the dp_aux_foo_handler(). Or might be not. 
> That would depend on the patch itself.
>
>
ok, I will split this patch into two.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index d030a93..f31e5c1 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -162,45 +162,78 @@  static ssize_t dp_aux_cmd_fifo_rx(struct dp_aux_private *aux,
 	return i;
 }
 
-static void dp_aux_native_handler(struct dp_aux_private *aux, u32 isr)
+static irqreturn_t dp_aux_native_handler(struct dp_aux_private *aux, u32 isr)
 {
-	if (isr & DP_INTR_AUX_I2C_DONE)
+	irqreturn_t ret = IRQ_NONE;
+
+	if (isr & DP_INTR_AUX_I2C_DONE) {
 		aux->aux_error_num = DP_AUX_ERR_NONE;
-	else if (isr & DP_INTR_WRONG_ADDR)
+		ret = IRQ_HANDLED;
+	} else if (isr & DP_INTR_WRONG_ADDR) {
 		aux->aux_error_num = DP_AUX_ERR_ADDR;
-	else if (isr & DP_INTR_TIMEOUT)
+		ret = IRQ_HANDLED;
+	} else if (isr & DP_INTR_TIMEOUT) {
 		aux->aux_error_num = DP_AUX_ERR_TOUT;
-	if (isr & DP_INTR_NACK_DEFER)
+		ret = IRQ_HANDLED;
+	}
+
+	if (isr & DP_INTR_NACK_DEFER) {
 		aux->aux_error_num = DP_AUX_ERR_NACK;
+		ret = IRQ_HANDLED;
+	}
+
 	if (isr & DP_INTR_AUX_ERROR) {
 		aux->aux_error_num = DP_AUX_ERR_PHY;
 		dp_catalog_aux_clear_hw_interrupts(aux->catalog);
+		ret = IRQ_HANDLED;
 	}
+
+	return ret;
 }
 
-static void dp_aux_i2c_handler(struct dp_aux_private *aux, u32 isr)
+static irqreturn_t dp_aux_i2c_handler(struct dp_aux_private *aux, u32 isr)
 {
+	irqreturn_t ret = IRQ_NONE;
+
 	if (isr & DP_INTR_AUX_I2C_DONE) {
 		if (isr & (DP_INTR_I2C_NACK | DP_INTR_I2C_DEFER))
 			aux->aux_error_num = DP_AUX_ERR_NACK;
 		else
 			aux->aux_error_num = DP_AUX_ERR_NONE;
-	} else {
-		if (isr & DP_INTR_WRONG_ADDR)
-			aux->aux_error_num = DP_AUX_ERR_ADDR;
-		else if (isr & DP_INTR_TIMEOUT)
-			aux->aux_error_num = DP_AUX_ERR_TOUT;
-		if (isr & DP_INTR_NACK_DEFER)
-			aux->aux_error_num = DP_AUX_ERR_NACK_DEFER;
-		if (isr & DP_INTR_I2C_NACK)
-			aux->aux_error_num = DP_AUX_ERR_NACK;
-		if (isr & DP_INTR_I2C_DEFER)
-			aux->aux_error_num = DP_AUX_ERR_DEFER;
-		if (isr & DP_INTR_AUX_ERROR) {
-			aux->aux_error_num = DP_AUX_ERR_PHY;
-			dp_catalog_aux_clear_hw_interrupts(aux->catalog);
-		}
+
+		return IRQ_HANDLED;
+	}
+
+	if (isr & DP_INTR_WRONG_ADDR) {
+		aux->aux_error_num = DP_AUX_ERR_ADDR;
+		ret = IRQ_HANDLED;
+	} else if (isr & DP_INTR_TIMEOUT) {
+		aux->aux_error_num = DP_AUX_ERR_TOUT;
+		ret = IRQ_HANDLED;
 	}
+
+	if (isr & DP_INTR_NACK_DEFER) {
+		aux->aux_error_num = DP_AUX_ERR_NACK_DEFER;
+		ret = IRQ_HANDLED;
+	}
+
+	if (isr & DP_INTR_I2C_NACK) {
+		aux->aux_error_num = DP_AUX_ERR_NACK;
+		ret = IRQ_HANDLED;
+	}
+
+	if (isr & DP_INTR_I2C_DEFER) {
+		aux->aux_error_num = DP_AUX_ERR_DEFER;
+		ret = IRQ_HANDLED;
+	}
+
+	if (isr & DP_INTR_AUX_ERROR) {
+		aux->aux_error_num = DP_AUX_ERR_PHY;
+		dp_catalog_aux_clear_hw_interrupts(aux->catalog);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
 }
 
 static void dp_aux_update_offset_and_segment(struct dp_aux_private *aux,
@@ -413,6 +446,7 @@  void dp_aux_isr(struct drm_dp_aux *dp_aux)
 {
 	u32 isr;
 	struct dp_aux_private *aux;
+	irqreturn_t ret = IRQ_NONE;
 
 	if (!dp_aux) {
 		DRM_ERROR("invalid input\n");
@@ -423,15 +457,20 @@  void dp_aux_isr(struct drm_dp_aux *dp_aux)
 
 	isr = dp_catalog_aux_get_irq(aux->catalog);
 
+	/* no interrupts pending, return immediately */
+	if (!isr)
+		return;
+
 	if (!aux->cmd_busy)
 		return;
 
 	if (aux->native)
-		dp_aux_native_handler(aux, isr);
+		ret = dp_aux_native_handler(aux, isr);
 	else
-		dp_aux_i2c_handler(aux, isr);
+		ret = dp_aux_i2c_handler(aux, isr);
 
-	complete(&aux->comp);
+	if (ret == IRQ_HANDLED)
+		complete(&aux->comp);
 }
 
 void dp_aux_reconfig(struct drm_dp_aux *dp_aux)