diff mbox series

[v3,6/7] drm/msm/dp: add pm_runtime_force_suspend()/resume()

Message ID 1694813901-26952-7-git-send-email-quic_khsieh@quicinc.com
State Superseded
Headers show
Series incorporate pm runtime framework and eDP clean up | expand

Commit Message

Kuogee Hsieh Sept. 15, 2023, 9:38 p.m. UTC
Add pm_runtime_force_suspend()/resume() to complete incorporating pm
runtime framework into DP driver. Both dp_pm_prepare() and dp_pm_complete()
are added to set hpd_state to correct state. After resume, DP driver will
re training its main link after .hpd_enable() callback enabled HPD
interrupts and bring up display accordingly.

Changes in v3:
-- replace dp_pm_suspend() with pm_runtime_force_suspend()
-- replace dp_pm_resume() with pm_runtime_force_resume()

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 87 +++++--------------------------------
 1 file changed, 10 insertions(+), 77 deletions(-)

Comments

Dmitry Baryshkov Sept. 16, 2023, 1:21 a.m. UTC | #1
On Sat, 16 Sept 2023 at 00:38, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
>
> Add pm_runtime_force_suspend()/resume() to complete incorporating pm
> runtime framework into DP driver. Both dp_pm_prepare() and dp_pm_complete()
> are added to set hpd_state to correct state. After resume, DP driver will
> re training its main link after .hpd_enable() callback enabled HPD
> interrupts and bring up display accordingly.

How will it re-train the main link? What is the code path for that?

I think this is a misuse for prepare/complete callbacks, at least
judging from their documentation.

>
> Changes in v3:
> -- replace dp_pm_suspend() with pm_runtime_force_suspend()
> -- replace dp_pm_resume() with pm_runtime_force_resume()
>
> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
> ---
>  drivers/gpu/drm/msm/dp/dp_display.c | 87 +++++--------------------------------
>  1 file changed, 10 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
> index b6992202..b58cb02 100644
> --- a/drivers/gpu/drm/msm/dp/dp_display.c
> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
> @@ -1333,101 +1333,35 @@ static int dp_pm_runtime_resume(struct device *dev)
>         return 0;
>  }
>
> -static int dp_pm_resume(struct device *dev)
> +static void dp_pm_complete(struct device *dev)
>  {
> -       struct platform_device *pdev = to_platform_device(dev);
> -       struct msm_dp *dp_display = platform_get_drvdata(pdev);
> -       struct dp_display_private *dp;
> -       int sink_count = 0;
> -
> -       dp = container_of(dp_display, struct dp_display_private, dp_display);
> +       struct dp_display_private *dp = dev_get_dp_display_private(dev);
>
>         mutex_lock(&dp->event_mutex);
>
>         drm_dbg_dp(dp->drm_dev,
> -               "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
> +               "type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>                 dp->dp_display.connector_type, dp->core_initialized,
> -               dp->phy_initialized, dp_display->power_on);
> +               dp->phy_initialized, dp->dp_display.power_on);
>
>         /* start from disconnected state */
>         dp->hpd_state = ST_DISCONNECTED;
>
> -       /* turn on dp ctrl/phy */
> -       dp_display_host_init(dp);
> -
> -       if (dp_display->is_edp)
> -               dp_catalog_ctrl_hpd_enable(dp->catalog);
> -
> -       if (dp_catalog_link_is_connected(dp->catalog)) {
> -               /*
> -                * set sink to normal operation mode -- D0
> -                * before dpcd read
> -                */
> -               dp_display_host_phy_init(dp);
> -               dp_link_psm_config(dp->link, &dp->panel->link_info, false);
> -               sink_count = drm_dp_read_sink_count(dp->aux);
> -               if (sink_count < 0)
> -                       sink_count = 0;
> -
> -               dp_display_host_phy_exit(dp);
> -       }
> -
> -       dp->link->sink_count = sink_count;
> -       /*
> -        * can not declared display is connected unless
> -        * HDMI cable is plugged in and sink_count of
> -        * dongle become 1
> -        * also only signal audio when disconnected
> -        */
> -       if (dp->link->sink_count) {
> -               dp->dp_display.link_ready = true;
> -       } else {
> -               dp->dp_display.link_ready = false;
> -               dp_display_handle_plugged_change(dp_display, false);
> -       }
> -
> -       drm_dbg_dp(dp->drm_dev,
> -               "After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
> -               dp->dp_display.connector_type, dp->link->sink_count,
> -               dp->dp_display.link_ready, dp->core_initialized,
> -               dp->phy_initialized, dp_display->power_on);
> -
>         mutex_unlock(&dp->event_mutex);
> -
> -       return 0;
>  }
>
> -static int dp_pm_suspend(struct device *dev)
> +static int dp_pm_prepare(struct device *dev)
>  {
> -       struct platform_device *pdev = to_platform_device(dev);
> -       struct msm_dp *dp_display = platform_get_drvdata(pdev);
> -       struct dp_display_private *dp;
> -
> -       dp = container_of(dp_display, struct dp_display_private, dp_display);
> +       struct dp_display_private *dp = dev_get_dp_display_private(dev);
>
>         mutex_lock(&dp->event_mutex);
>
> -       drm_dbg_dp(dp->drm_dev,
> -               "Before, type=%d core_inited=%d  phy_inited=%d power_on=%d\n",
> -               dp->dp_display.connector_type, dp->core_initialized,
> -               dp->phy_initialized, dp_display->power_on);
> -
>         /* mainlink enabled */
>         if (dp_power_clk_status(dp->power, DP_CTRL_PM))
>                 dp_ctrl_off_link_stream(dp->ctrl);
>
> -       dp_display_host_phy_exit(dp);
> -
> -       /* host_init will be called at pm_resume */
> -       dp_display_host_deinit(dp);
> -
>         dp->hpd_state = ST_SUSPENDED;
>
> -       drm_dbg_dp(dp->drm_dev,
> -               "After, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
> -               dp->dp_display.connector_type, dp->core_initialized,
> -               dp->phy_initialized, dp_display->power_on);
> -
>         mutex_unlock(&dp->event_mutex);
>
>         return 0;
> @@ -1435,8 +1369,10 @@ static int dp_pm_suspend(struct device *dev)
>
>  static const struct dev_pm_ops dp_pm_ops = {
>         SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL)
> -       .suspend = dp_pm_suspend,
> -       .resume =  dp_pm_resume,
> +       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
> +                                pm_runtime_force_resume)
> +       .prepare = dp_pm_prepare,
> +       .complete = dp_pm_complete,
>  };
>
>  static struct platform_driver dp_display_driver = {
> @@ -1670,9 +1606,6 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
>
>         dp_display = container_of(dp, struct dp_display_private, dp_display);
>
> -       if (dp->is_edp)
> -               dp_hpd_unplug_handle(dp_display, 0);
> -
>         mutex_lock(&dp_display->event_mutex);
>
>         state = dp_display->hpd_state;
> --
> 2.7.4
>
Kuogee Hsieh Sept. 18, 2023, 5:47 p.m. UTC | #2
On 9/15/2023 6:21 PM, Dmitry Baryshkov wrote:
> On Sat, 16 Sept 2023 at 00:38, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
>> Add pm_runtime_force_suspend()/resume() to complete incorporating pm
>> runtime framework into DP driver. Both dp_pm_prepare() and dp_pm_complete()
>> are added to set hpd_state to correct state. After resume, DP driver will
>> re training its main link after .hpd_enable() callback enabled HPD
>> interrupts and bring up display accordingly.
> How will it re-train the main link? What is the code path for that?

1) for edp, dp_bridge_atomic_enable(), called from framework, to start 
link training and bring up display.

2) for external DP, HPD_PLUG_INT will be generated to start link 
training and bring up display.

>
> I think this is a misuse for prepare/complete callbacks, at least
> judging from their documentation.

1) dp_pm_prepare() is called to make sure eDP/DP related power/clocks 
are off and set hpd_state  to ST_SUSPENDED and nothing else.

2) dp_pm_completed() is called to set hpd_state to ST_ST_DISCONNECTED 
(default state) and nothing else.

I think both are doing proper action.


>
>> Changes in v3:
>> -- replace dp_pm_suspend() with pm_runtime_force_suspend()
>> -- replace dp_pm_resume() with pm_runtime_force_resume()
>>
>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
>> ---
>>   drivers/gpu/drm/msm/dp/dp_display.c | 87 +++++--------------------------------
>>   1 file changed, 10 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
>> index b6992202..b58cb02 100644
>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
>> @@ -1333,101 +1333,35 @@ static int dp_pm_runtime_resume(struct device *dev)
>>          return 0;
>>   }
>>
>> -static int dp_pm_resume(struct device *dev)
>> +static void dp_pm_complete(struct device *dev)
>>   {
>> -       struct platform_device *pdev = to_platform_device(dev);
>> -       struct msm_dp *dp_display = platform_get_drvdata(pdev);
>> -       struct dp_display_private *dp;
>> -       int sink_count = 0;
>> -
>> -       dp = container_of(dp_display, struct dp_display_private, dp_display);
>> +       struct dp_display_private *dp = dev_get_dp_display_private(dev);
>>
>>          mutex_lock(&dp->event_mutex);
>>
>>          drm_dbg_dp(dp->drm_dev,
>> -               "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>> +               "type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>>                  dp->dp_display.connector_type, dp->core_initialized,
>> -               dp->phy_initialized, dp_display->power_on);
>> +               dp->phy_initialized, dp->dp_display.power_on);
>>
>>          /* start from disconnected state */
>>          dp->hpd_state = ST_DISCONNECTED;
>>
>> -       /* turn on dp ctrl/phy */
>> -       dp_display_host_init(dp);
>> -
>> -       if (dp_display->is_edp)
>> -               dp_catalog_ctrl_hpd_enable(dp->catalog);
>> -
>> -       if (dp_catalog_link_is_connected(dp->catalog)) {
>> -               /*
>> -                * set sink to normal operation mode -- D0
>> -                * before dpcd read
>> -                */
>> -               dp_display_host_phy_init(dp);
>> -               dp_link_psm_config(dp->link, &dp->panel->link_info, false);
>> -               sink_count = drm_dp_read_sink_count(dp->aux);
>> -               if (sink_count < 0)
>> -                       sink_count = 0;
>> -
>> -               dp_display_host_phy_exit(dp);
>> -       }
>> -
>> -       dp->link->sink_count = sink_count;
>> -       /*
>> -        * can not declared display is connected unless
>> -        * HDMI cable is plugged in and sink_count of
>> -        * dongle become 1
>> -        * also only signal audio when disconnected
>> -        */
>> -       if (dp->link->sink_count) {
>> -               dp->dp_display.link_ready = true;
>> -       } else {
>> -               dp->dp_display.link_ready = false;
>> -               dp_display_handle_plugged_change(dp_display, false);
>> -       }
>> -
>> -       drm_dbg_dp(dp->drm_dev,
>> -               "After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
>> -               dp->dp_display.connector_type, dp->link->sink_count,
>> -               dp->dp_display.link_ready, dp->core_initialized,
>> -               dp->phy_initialized, dp_display->power_on);
>> -
>>          mutex_unlock(&dp->event_mutex);
>> -
>> -       return 0;
>>   }
>>
>> -static int dp_pm_suspend(struct device *dev)
>> +static int dp_pm_prepare(struct device *dev)
>>   {
>> -       struct platform_device *pdev = to_platform_device(dev);
>> -       struct msm_dp *dp_display = platform_get_drvdata(pdev);
>> -       struct dp_display_private *dp;
>> -
>> -       dp = container_of(dp_display, struct dp_display_private, dp_display);
>> +       struct dp_display_private *dp = dev_get_dp_display_private(dev);
>>
>>          mutex_lock(&dp->event_mutex);
>>
>> -       drm_dbg_dp(dp->drm_dev,
>> -               "Before, type=%d core_inited=%d  phy_inited=%d power_on=%d\n",
>> -               dp->dp_display.connector_type, dp->core_initialized,
>> -               dp->phy_initialized, dp_display->power_on);
>> -
>>          /* mainlink enabled */
>>          if (dp_power_clk_status(dp->power, DP_CTRL_PM))
>>                  dp_ctrl_off_link_stream(dp->ctrl);
>>
>> -       dp_display_host_phy_exit(dp);
>> -
>> -       /* host_init will be called at pm_resume */
>> -       dp_display_host_deinit(dp);
>> -
>>          dp->hpd_state = ST_SUSPENDED;
>>
>> -       drm_dbg_dp(dp->drm_dev,
>> -               "After, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>> -               dp->dp_display.connector_type, dp->core_initialized,
>> -               dp->phy_initialized, dp_display->power_on);
>> -
>>          mutex_unlock(&dp->event_mutex);
>>
>>          return 0;
>> @@ -1435,8 +1369,10 @@ static int dp_pm_suspend(struct device *dev)
>>
>>   static const struct dev_pm_ops dp_pm_ops = {
>>          SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL)
>> -       .suspend = dp_pm_suspend,
>> -       .resume =  dp_pm_resume,
>> +       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
>> +                                pm_runtime_force_resume)
>> +       .prepare = dp_pm_prepare,
>> +       .complete = dp_pm_complete,
>>   };
>>
>>   static struct platform_driver dp_display_driver = {
>> @@ -1670,9 +1606,6 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
>>
>>          dp_display = container_of(dp, struct dp_display_private, dp_display);
>>
>> -       if (dp->is_edp)
>> -               dp_hpd_unplug_handle(dp_display, 0);
>> -
>>          mutex_lock(&dp_display->event_mutex);
>>
>>          state = dp_display->hpd_state;
>> --
>> 2.7.4
>>
>
Kuogee Hsieh Sept. 20, 2023, 7:49 p.m. UTC | #3
On 9/19/2023 2:50 AM, Dmitry Baryshkov wrote:
> On Mon, 18 Sept 2023 at 20:48, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
>>
>> On 9/15/2023 6:21 PM, Dmitry Baryshkov wrote:
>>> On Sat, 16 Sept 2023 at 00:38, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
>>>> Add pm_runtime_force_suspend()/resume() to complete incorporating pm
>>>> runtime framework into DP driver. Both dp_pm_prepare() and dp_pm_complete()
>>>> are added to set hpd_state to correct state. After resume, DP driver will
>>>> re training its main link after .hpd_enable() callback enabled HPD
>>>> interrupts and bring up display accordingly.
>>> How will it re-train the main link? What is the code path for that?
>> 1) for edp, dp_bridge_atomic_enable(), called from framework, to start
>> link training and bring up display.
> And this path doesn't use .hpd_enable() which you have mentioned in
> the commit message. Please don't try to shorten the commit message.
> You see, I have had questions to several of them, which means that
> they were not verbose enough.

ok, my bad,

I will add more explain to commit text.

>> 2) for external DP, HPD_PLUG_INT will be generated to start link
>> training and bring up display.
> This should be hpd_notify, who starts link training, not some event.
>
>>> I think this is a misuse for prepare/complete callbacks, at least
>>> judging from their documentation.
>> 1) dp_pm_prepare() is called to make sure eDP/DP related power/clocks
>> are off and set hpd_state  to ST_SUSPENDED and nothing else.
>>
>> 2) dp_pm_completed() is called to set hpd_state to ST_ST_DISCONNECTED
>> (default state) and nothing else.
>>
>> I think both are doing proper action.
> Have you read the prepare() / complete() documentation? Does your
> usage follow the documented use case?
I think I can just remove both dp_pm_prepare and dp_pm_complete fro 
this  patch.
>
>>
>>>> Changes in v3:
>>>> -- replace dp_pm_suspend() with pm_runtime_force_suspend()
>>>> -- replace dp_pm_resume() with pm_runtime_force_resume()
>>>>
>>>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
>>>> ---
>>>>    drivers/gpu/drm/msm/dp/dp_display.c | 87 +++++--------------------------------
>>>>    1 file changed, 10 insertions(+), 77 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
>>>> index b6992202..b58cb02 100644
>>>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
>>>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
>>>> @@ -1333,101 +1333,35 @@ static int dp_pm_runtime_resume(struct device *dev)
>>>>           return 0;
>>>>    }
>>>>
>>>> -static int dp_pm_resume(struct device *dev)
>>>> +static void dp_pm_complete(struct device *dev)
>>>>    {
>>>> -       struct platform_device *pdev = to_platform_device(dev);
>>>> -       struct msm_dp *dp_display = platform_get_drvdata(pdev);
>>>> -       struct dp_display_private *dp;
>>>> -       int sink_count = 0;
>>>> -
>>>> -       dp = container_of(dp_display, struct dp_display_private, dp_display);
>>>> +       struct dp_display_private *dp = dev_get_dp_display_private(dev);
>>>>
>>>>           mutex_lock(&dp->event_mutex);
>>>>
>>>>           drm_dbg_dp(dp->drm_dev,
>>>> -               "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>>>> +               "type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>>>>                   dp->dp_display.connector_type, dp->core_initialized,
>>>> -               dp->phy_initialized, dp_display->power_on);
>>>> +               dp->phy_initialized, dp->dp_display.power_on);
>>>>
>>>>           /* start from disconnected state */
>>>>           dp->hpd_state = ST_DISCONNECTED;
>>>>
>>>> -       /* turn on dp ctrl/phy */
>>>> -       dp_display_host_init(dp);
>>>> -
>>>> -       if (dp_display->is_edp)
>>>> -               dp_catalog_ctrl_hpd_enable(dp->catalog);
>>>> -
>>>> -       if (dp_catalog_link_is_connected(dp->catalog)) {
>>>> -               /*
>>>> -                * set sink to normal operation mode -- D0
>>>> -                * before dpcd read
>>>> -                */
>>>> -               dp_display_host_phy_init(dp);
>>>> -               dp_link_psm_config(dp->link, &dp->panel->link_info, false);
>>>> -               sink_count = drm_dp_read_sink_count(dp->aux);
>>>> -               if (sink_count < 0)
>>>> -                       sink_count = 0;
>>>> -
>>>> -               dp_display_host_phy_exit(dp);
>>>> -       }
>>>> -
>>>> -       dp->link->sink_count = sink_count;
>>>> -       /*
>>>> -        * can not declared display is connected unless
>>>> -        * HDMI cable is plugged in and sink_count of
>>>> -        * dongle become 1
>>>> -        * also only signal audio when disconnected
>>>> -        */
>>>> -       if (dp->link->sink_count) {
>>>> -               dp->dp_display.link_ready = true;
>>>> -       } else {
>>>> -               dp->dp_display.link_ready = false;
>>>> -               dp_display_handle_plugged_change(dp_display, false);
>>>> -       }
>>>> -
>>>> -       drm_dbg_dp(dp->drm_dev,
>>>> -               "After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
>>>> -               dp->dp_display.connector_type, dp->link->sink_count,
>>>> -               dp->dp_display.link_ready, dp->core_initialized,
>>>> -               dp->phy_initialized, dp_display->power_on);
>>>> -
>>>>           mutex_unlock(&dp->event_mutex);
>>>> -
>>>> -       return 0;
>>>>    }
>>>>
>>>> -static int dp_pm_suspend(struct device *dev)
>>>> +static int dp_pm_prepare(struct device *dev)
>>>>    {
>>>> -       struct platform_device *pdev = to_platform_device(dev);
>>>> -       struct msm_dp *dp_display = platform_get_drvdata(pdev);
>>>> -       struct dp_display_private *dp;
>>>> -
>>>> -       dp = container_of(dp_display, struct dp_display_private, dp_display);
>>>> +       struct dp_display_private *dp = dev_get_dp_display_private(dev);
>>>>
>>>>           mutex_lock(&dp->event_mutex);
>>>>
>>>> -       drm_dbg_dp(dp->drm_dev,
>>>> -               "Before, type=%d core_inited=%d  phy_inited=%d power_on=%d\n",
>>>> -               dp->dp_display.connector_type, dp->core_initialized,
>>>> -               dp->phy_initialized, dp_display->power_on);
>>>> -
>>>>           /* mainlink enabled */
>>>>           if (dp_power_clk_status(dp->power, DP_CTRL_PM))
>>>>                   dp_ctrl_off_link_stream(dp->ctrl);
>>>>
>>>> -       dp_display_host_phy_exit(dp);
>>>> -
>>>> -       /* host_init will be called at pm_resume */
>>>> -       dp_display_host_deinit(dp);
>>>> -
>>>>           dp->hpd_state = ST_SUSPENDED;
>>>>
>>>> -       drm_dbg_dp(dp->drm_dev,
>>>> -               "After, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
>>>> -               dp->dp_display.connector_type, dp->core_initialized,
>>>> -               dp->phy_initialized, dp_display->power_on);
>>>> -
>>>>           mutex_unlock(&dp->event_mutex);
>>>>
>>>>           return 0;
>>>> @@ -1435,8 +1369,10 @@ static int dp_pm_suspend(struct device *dev)
>>>>
>>>>    static const struct dev_pm_ops dp_pm_ops = {
>>>>           SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL)
>>>> -       .suspend = dp_pm_suspend,
>>>> -       .resume =  dp_pm_resume,
>>>> +       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
>>>> +                                pm_runtime_force_resume)
>>>> +       .prepare = dp_pm_prepare,
>>>> +       .complete = dp_pm_complete,
>>>>    };
>>>>
>>>>    static struct platform_driver dp_display_driver = {
>>>> @@ -1670,9 +1606,6 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
>>>>
>>>>           dp_display = container_of(dp, struct dp_display_private, dp_display);
>>>>
>>>> -       if (dp->is_edp)
>>>> -               dp_hpd_unplug_handle(dp_display, 0);
>>>> -
>>>>           mutex_lock(&dp_display->event_mutex);
>>>>
>>>>           state = dp_display->hpd_state;
>>>> --
>>>> 2.7.4
>>>>
>
>
Stephen Boyd Sept. 22, 2023, 9:54 p.m. UTC | #4
Quoting Dmitry Baryshkov (2023-09-19 02:50:12)
> On Mon, 18 Sept 2023 at 20:48, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
> >
> >
> > On 9/15/2023 6:21 PM, Dmitry Baryshkov wrote:
> > > On Sat, 16 Sept 2023 at 00:38, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
> > >> Add pm_runtime_force_suspend()/resume() to complete incorporating pm
> > >> runtime framework into DP driver. Both dp_pm_prepare() and dp_pm_complete()
> > >> are added to set hpd_state to correct state. After resume, DP driver will
> > >> re training its main link after .hpd_enable() callback enabled HPD
> > >> interrupts and bring up display accordingly.
> > > How will it re-train the main link? What is the code path for that?
> >
> > 1) for edp, dp_bridge_atomic_enable(), called from framework, to start
> > link training and bring up display.
>
> And this path doesn't use .hpd_enable() which you have mentioned in
> the commit message. Please don't try to shorten the commit message.
> You see, I have had questions to several of them, which means that
> they were not verbose enough.
>
> >
> > 2) for external DP, HPD_PLUG_INT will be generated to start link
> > training and bring up display.
>
> This should be hpd_notify, who starts link training, not some event.

I think this driver should train the link during atomic_enable(), not
hpd_notify() (or directly from the irq handler). The drm_bridge_funcs
talk a bit about when the clocks and timing signals are supposed to be
enabled. For example, struct drm_bridge_funcs::atomic_pre_enable() says
the "display pipe (i.e.  clocks and timing signals) feeding this bridge
will not yet be running when this callback is called". And struct
drm_bridge_funcs::atomic_enable() says "this callback must enable the
display link feeding the next bridge in the chain if there is one."

That looks to me like link training, i.e. the display link, should
happen in the enable path and not hpd_notify. It looks like link
training could fail, but when that happens I believe the driver should
call drm_connector_set_link_status_property() with
DRM_MODE_LINK_STATUS_BAD. The two callers of that which exist in the
tree also call drm_kms_helper_hotplug_event() or
drm_kms_helper_connector_hotplug_event() after updating the link so that
userspace knows to try again.

It would be nice if there was some drm_bridge_set_link_status_bad() API
that bridge drivers could use to signal that the link status is bad and
call the hotplug helper. Maybe it could also record some diagnostics
about which bridge failed to setup the link and stop the atomic_enable()
chain for that connector.
Abhinav Kumar Sept. 23, 2023, 1:35 a.m. UTC | #5
Hi Stephen

On 9/22/2023 2:54 PM, Stephen Boyd wrote:
> Quoting Dmitry Baryshkov (2023-09-19 02:50:12)
>> On Mon, 18 Sept 2023 at 20:48, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
>>>
>>>
>>> On 9/15/2023 6:21 PM, Dmitry Baryshkov wrote:
>>>> On Sat, 16 Sept 2023 at 00:38, Kuogee Hsieh <quic_khsieh@quicinc.com> wrote:
>>>>> Add pm_runtime_force_suspend()/resume() to complete incorporating pm
>>>>> runtime framework into DP driver. Both dp_pm_prepare() and dp_pm_complete()
>>>>> are added to set hpd_state to correct state. After resume, DP driver will
>>>>> re training its main link after .hpd_enable() callback enabled HPD
>>>>> interrupts and bring up display accordingly.
>>>> How will it re-train the main link? What is the code path for that?
>>>
>>> 1) for edp, dp_bridge_atomic_enable(), called from framework, to start
>>> link training and bring up display.
>>
>> And this path doesn't use .hpd_enable() which you have mentioned in
>> the commit message. Please don't try to shorten the commit message.
>> You see, I have had questions to several of them, which means that
>> they were not verbose enough.
>>
>>>
>>> 2) for external DP, HPD_PLUG_INT will be generated to start link
>>> training and bring up display.
>>
>> This should be hpd_notify, who starts link training, not some event.
> 
> I think this driver should train the link during atomic_enable(), not
> hpd_notify() (or directly from the irq handler). The drm_bridge_funcs
> talk a bit about when the clocks and timing signals are supposed to be
> enabled. For example, struct drm_bridge_funcs::atomic_pre_enable() says
> the "display pipe (i.e.  clocks and timing signals) feeding this bridge
> will not yet be running when this callback is called". And struct
> drm_bridge_funcs::atomic_enable() says "this callback must enable the
> display link feeding the next bridge in the chain if there is one."
> 
> That looks to me like link training, i.e. the display link, should
> happen in the enable path and not hpd_notify. It looks like link
> training could fail, but when that happens I believe the driver should
> call drm_connector_set_link_status_property() with
> DRM_MODE_LINK_STATUS_BAD. The two callers of that which exist in the
> tree also call drm_kms_helper_hotplug_event() or
> drm_kms_helper_connector_hotplug_event() after updating the link so that
> userspace knows to try again.
> 
> It would be nice if there was some drm_bridge_set_link_status_bad() API
> that bridge drivers could use to signal that the link status is bad and
> call the hotplug helper. Maybe it could also record some diagnostics
> about which bridge failed to setup the link and stop the atomic_enable()
> chain for that connector.

Doing link training when we get hpd instead of atomic_enable() is a 
design choice we have been following for a while because for the case 
when link training fails in atomic_enable() and setting the link status 
property as you mentioned, the compositor needs to be able to handle 
that and also needs to try with a different resolution or take some 
other corrective action. We have seen many compositors not able to 
handle this complexity. So the design sends the hotplug to usermode only 
after link training succeeds.

I do not think we should change this design unless prototyped with an 
existing compositor such as chrome or android at this point.

Thanks

Abhinav
Kuogee Hsieh Sept. 25, 2023, 4:07 p.m. UTC | #6
On 9/22/2023 6:35 PM, Abhinav Kumar wrote:
> Hi Stephen
>
> On 9/22/2023 2:54 PM, Stephen Boyd wrote:
>> Quoting Dmitry Baryshkov (2023-09-19 02:50:12)
>>> On Mon, 18 Sept 2023 at 20:48, Kuogee Hsieh 
>>> <quic_khsieh@quicinc.com> wrote:
>>>>
>>>>
>>>> On 9/15/2023 6:21 PM, Dmitry Baryshkov wrote:
>>>>> On Sat, 16 Sept 2023 at 00:38, Kuogee Hsieh 
>>>>> <quic_khsieh@quicinc.com> wrote:
>>>>>> Add pm_runtime_force_suspend()/resume() to complete incorporating pm
>>>>>> runtime framework into DP driver. Both dp_pm_prepare() and 
>>>>>> dp_pm_complete()
>>>>>> are added to set hpd_state to correct state. After resume, DP 
>>>>>> driver will
>>>>>> re training its main link after .hpd_enable() callback enabled HPD
>>>>>> interrupts and bring up display accordingly.
>>>>> How will it re-train the main link? What is the code path for that?
>>>>
>>>> 1) for edp, dp_bridge_atomic_enable(), called from framework, to start
>>>> link training and bring up display.
>>>
>>> And this path doesn't use .hpd_enable() which you have mentioned in
>>> the commit message. Please don't try to shorten the commit message.
>>> You see, I have had questions to several of them, which means that
>>> they were not verbose enough.
>>>
>>>>
>>>> 2) for external DP, HPD_PLUG_INT will be generated to start link
>>>> training and bring up display.
>>>
>>> This should be hpd_notify, who starts link training, not some event.
>>
>> I think this driver should train the link during atomic_enable(), not
>> hpd_notify() (or directly from the irq handler). The drm_bridge_funcs
>> talk a bit about when the clocks and timing signals are supposed to be
>> enabled. For example, struct drm_bridge_funcs::atomic_pre_enable() says
>> the "display pipe (i.e.  clocks and timing signals) feeding this bridge
>> will not yet be running when this callback is called". And struct
>> drm_bridge_funcs::atomic_enable() says "this callback must enable the
>> display link feeding the next bridge in the chain if there is one."
>>
>> That looks to me like link training, i.e. the display link, should
>> happen in the enable path and not hpd_notify. It looks like link
>> training could fail, but when that happens I believe the driver should
>> call drm_connector_set_link_status_property() with
>> DRM_MODE_LINK_STATUS_BAD. The two callers of that which exist in the
>> tree also call drm_kms_helper_hotplug_event() or
>> drm_kms_helper_connector_hotplug_event() after updating the link so that
>> userspace knows to try again.
>>
>> It would be nice if there was some drm_bridge_set_link_status_bad() API
>> that bridge drivers could use to signal that the link status is bad and
>> call the hotplug helper. Maybe it could also record some diagnostics
>> about which bridge failed to setup the link and stop the atomic_enable()
>> chain for that connector.
>
> Doing link training when we get hpd instead of atomic_enable() is a 
> design choice we have been following for a while because for the case 
> when link training fails in atomic_enable() and setting the link 
> status property as you mentioned, the compositor needs to be able to 
> handle that and also needs to try with a different resolution or take 
> some other corrective action. We have seen many compositors not able 
> to handle this complexity. So the design sends the hotplug to usermode 
> only after link training succeeds.
>
> I do not think we should change this design unless prototyped with an 
> existing compositor such as chrome or android at this point.
>
> Thanks
>
> Abhinav


We did perform link training at atomic_enable() at eDP case since we can 
assume link training will always success without link rate or link lane 
being reduced.

However for external DP case, link training can not be guarantee always 
success without link rate or lane being reduced as Abhinav mentioned.

In addition,  CTS (compliance test) it required to complete link 
training within 10ms after hpd asserted.

I am not sure do link training at atomic_enable() can meet this timing 
requirement.
Dmitry Baryshkov Sept. 27, 2023, 10:04 p.m. UTC | #7
On Thu, 28 Sept 2023 at 01:01, Stephen Boyd <swboyd@chromium.org> wrote:
>
> Quoting Kuogee Hsieh (2023-09-25 09:07:18)
> >
> > On 9/22/2023 6:35 PM, Abhinav Kumar wrote:
> > >
> > > Doing link training when we get hpd instead of atomic_enable() is a
> > > design choice we have been following for a while because for the case
> > > when link training fails in atomic_enable() and setting the link
> > > status property as you mentioned, the compositor needs to be able to
> > > handle that and also needs to try with a different resolution or take
> > > some other corrective action. We have seen many compositors not able
> > > to handle this complexity. So the design sends the hotplug to usermode
> > > only after link training succeeds.
> > >
> > > I do not think we should change this design unless prototyped with an
> > > existing compositor such as chrome or android at this point.
> > >
> > > Thanks
> > >
> > > Abhinav
> >
> >
> > We did perform link training at atomic_enable() at eDP case since we can
> > assume link training will always success without link rate or link lane
> > being reduced.
> >
> > However for external DP case, link training can not be guarantee always
> > success without link rate or lane being reduced as Abhinav mentioned.
> >
> > In addition,  CTS (compliance test) it required to complete link
> > training within 10ms after hpd asserted.
>
> Is it possible to change that timeout? I have to look around for the CTS
> parameters because I'm pretty confused how it can work. What do we do if
> DP wakes the system from suspend and asserts HPD? We need resume time to
> be < 10ms?  That's not realistic.
>
> >
> > I am not sure do link training at atomic_enable() can meet this timing
> > requirement.
> >
>
> At least in the DP spec itself it doesn't require the link to be trained
> within 10ms of HPD being asserted. Instead it simply recommends that the
> OS start configuring the display promptly after HPD is asserted, e.g.
> within 100ms. There's some strict timing on IRQ_HPD, so the driver must
> read DPCD registers within 100ms of IRQ_HPD rising edge; maybe that is
> what CTS is checking for?
>
> TL;DR: I don't see why CTS should stop us from link training in
> atomic_enable(). It would be beneficial to do so to make eDP and DP the
> same. It would also help to report a drm connector being connected
> _before_ link training so that userspace knows the link itself is the
> bad part of the equation (and not that the DP connector looks
> disconnected to userspace when in fact it really is connected and the
> monitor is asserting HPD, just the link training failed).

Also this will move us closer to i915 user experience: failing link
training in the display enablement part. So that a part of xrandr
calls can retry link training.
Abhinav Kumar Sept. 28, 2023, 10:35 p.m. UTC | #8
On 9/27/2023 2:41 PM, Stephen Boyd wrote:
> Quoting Abhinav Kumar (2023-09-22 18:35:27)
>> On 9/22/2023 2:54 PM, Stephen Boyd wrote:
>>> Quoting Dmitry Baryshkov (2023-09-19 02:50:12)
>>>>
>>>> This should be hpd_notify, who starts link training, not some event.
>>>
>>> I think this driver should train the link during atomic_enable(), not
>>> hpd_notify() (or directly from the irq handler). The drm_bridge_funcs
>>> talk a bit about when the clocks and timing signals are supposed to be
>>> enabled. For example, struct drm_bridge_funcs::atomic_pre_enable() says
>>> the "display pipe (i.e.  clocks and timing signals) feeding this bridge
>>> will not yet be running when this callback is called". And struct
>>> drm_bridge_funcs::atomic_enable() says "this callback must enable the
>>> display link feeding the next bridge in the chain if there is one."
>>>
>>> That looks to me like link training, i.e. the display link, should
>>> happen in the enable path and not hpd_notify. It looks like link
>>> training could fail, but when that happens I believe the driver should
>>> call drm_connector_set_link_status_property() with
>>> DRM_MODE_LINK_STATUS_BAD. The two callers of that which exist in the
>>> tree also call drm_kms_helper_hotplug_event() or
>>> drm_kms_helper_connector_hotplug_event() after updating the link so that
>>> userspace knows to try again.
>>>
>>> It would be nice if there was some drm_bridge_set_link_status_bad() API
>>> that bridge drivers could use to signal that the link status is bad and
>>> call the hotplug helper. Maybe it could also record some diagnostics
>>> about which bridge failed to setup the link and stop the atomic_enable()
>>> chain for that connector.
>>
>> Doing link training when we get hpd instead of atomic_enable() is a
>> design choice we have been following for a while because for the case
>> when link training fails in atomic_enable() and setting the link status
>> property as you mentioned, the compositor needs to be able to handle
>> that and also needs to try with a different resolution or take some
>> other corrective action. We have seen many compositors not able to
>> handle this complexity.
> 
> The chrome compositor handles this case[1]. If the link status is set to
> bad and there are non-zero number of modes on a connected connector it
> resets the status to good to try again.
> 

Thanks for the link. Just resetting the property alone and trying again 
is going to lead to the same failure again. So that alone is 
insufficient and doesn't sound right.

As documented here:

https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties

"When user-space receives the hotplug uevent and detects a "BAD" 
link-status, the sink doesn't receive pixels anymore (e.g. the screen 
becomes completely black). The list of available modes may have changed. 
User-space is expected to pick a new mode if the current one has 
disappeared and perform a new modeset with link-status set to "GOOD" to 
re-enable the connector."

Picking a different mode is a reasonable attempt to try again but even 
that might fail again if it picks a mode which falls in the same link rate.

Thats why, to re-iterate what i mentioned, we need to see if some sort 
of a handshake fallback exists or can be implemented. It will need 
compositor support as well as driver change to maybe remove that mode etc.

We prioritized user experience here to make sure display_enable() wont 
fail otherwise the user might keep waiting for the screen to come up 
forever. With the driver ensuring link is trained and then reporting to 
usermode, its a safer option as the driver will train with the highest 
link rate / lane combination supported and also remove modes which dont 
fall in this bucket in dp_bridge_mode_valid.

Till we validate this, I would prefer to keep this change out of this 
series.

>> So the design sends the hotplug to usermode only
>> after link training succeeds.
> 
> I suspect this is why my trogdor device turns off after rebooting when I
> apply a ChromeOS update with the lid closed and DP connected. Userspace
> wants to know that a display is connected, but this driver is still link
> training by the time userspace boots up so we don't see any drm
> connector indicating status is connected. No drm connectors connected
> means the OS should shutdown.
> 

Interesting use case but I am not sure if thats whats happening till we 
debug that. Why should OS shutdown if connectors are not in "connected" 
state? Agreed, display will be off. But shouldnt compositor be alive in 
case it receives hotplug? The user (in this case you) never turned the 
device off so why should the OS shutdown?

>>
>> I do not think we should change this design unless prototyped with an
>> existing compositor such as chrome or android at this point.
> 
> Is this driver used with android?
> 

There are some internal efforts ongoing with prototyping this but I 
cannot comment more on this right now.

> [1] https://source.chromium.org/chromium/chromium/src/+/main:ui/ozone/platform/drm/gpu/hardware_display_plane_manager_atomic.cc;l=114;drc=67520ac99db89395b10f2ab728b540eef0da8292
Abhinav Kumar Sept. 29, 2023, 12:46 a.m. UTC | #9
On 9/27/2023 3:01 PM, Stephen Boyd wrote:
> Quoting Kuogee Hsieh (2023-09-25 09:07:18)
>>
>> On 9/22/2023 6:35 PM, Abhinav Kumar wrote:
>>>
>>> Doing link training when we get hpd instead of atomic_enable() is a
>>> design choice we have been following for a while because for the case
>>> when link training fails in atomic_enable() and setting the link
>>> status property as you mentioned, the compositor needs to be able to
>>> handle that and also needs to try with a different resolution or take
>>> some other corrective action. We have seen many compositors not able
>>> to handle this complexity. So the design sends the hotplug to usermode
>>> only after link training succeeds.
>>>
>>> I do not think we should change this design unless prototyped with an
>>> existing compositor such as chrome or android at this point.
>>>
>>> Thanks
>>>
>>> Abhinav
>>
>>
>> We did perform link training at atomic_enable() at eDP case since we can
>> assume link training will always success without link rate or link lane
>> being reduced.
>>
>> However for external DP case, link training can not be guarantee always
>> success without link rate or lane being reduced as Abhinav mentioned.
>>
>> In addition,  CTS (compliance test) it required to complete link
>> training within 10ms after hpd asserted.
> 
> Is it possible to change that timeout? I have to look around for the CTS
> parameters because I'm pretty confused how it can work. What do we do if
> DP wakes the system from suspend and asserts HPD? We need resume time to
> be < 10ms?  That's not realistic.
> 

No, the CTS doesnt say we need to finish link training within 10ms after 
HPD is asserted. It says it must be completed in 10ms after 
TRAINING_PATTERN_SET dpcd write.

"Wait until the Source DUT writes 00h to the TRAINING_PATTERN_SET byte 
of Reference Sink DPCD Link Configuration Field to indicate the end of 
the link training. Stop the link training timer. Verify that link 
training completed in 10ms or less"

That needs to be done independent of HPD so we can ignore the CTS point.

>>
>> I am not sure do link training at atomic_enable() can meet this timing
>> requirement.
>>
> 
> At least in the DP spec itself it doesn't require the link to be trained
> within 10ms of HPD being asserted. Instead it simply recommends that the
> OS start configuring the display promptly after HPD is asserted, e.g.
> within 100ms. There's some strict timing on IRQ_HPD, so the driver must
> read DPCD registers within 100ms of IRQ_HPD rising edge; maybe that is
> what CTS is checking for?
> 
> TL;DR: I don't see why CTS should stop us from link training in
> atomic_enable(). It would be beneficial to do so to make eDP and DP the
> same. It would also help to report a drm connector being connected
> _before_ link training so that userspace knows the link itself is the
> bad part of the equation (and not that the DP connector looks
> disconnected to userspace when in fact it really is connected and the
> monitor is asserting HPD, just the link training failed).

Its the corrective action of the userspace when it finds link is bad is 
the concern as I highlighted in the other response. Just reading and 
resetting link_status is not enough to recover.
Stephen Boyd Oct. 2, 2023, 10:58 p.m. UTC | #10
Quoting Abhinav Kumar (2023-09-28 17:46:11)
> On 9/27/2023 3:01 PM, Stephen Boyd wrote:
> > Quoting Kuogee Hsieh (2023-09-25 09:07:18)
> >>
> >> However for external DP case, link training can not be guarantee always
> >> success without link rate or lane being reduced as Abhinav mentioned.
> >>
> >> In addition,  CTS (compliance test) it required to complete link
> >> training within 10ms after hpd asserted.
> >
> > Is it possible to change that timeout? I have to look around for the CTS
> > parameters because I'm pretty confused how it can work. What do we do if
> > DP wakes the system from suspend and asserts HPD? We need resume time to
> > be < 10ms?  That's not realistic.
> >
>
> No, the CTS doesnt say we need to finish link training within 10ms after
> HPD is asserted. It says it must be completed in 10ms after
> TRAINING_PATTERN_SET dpcd write.
>
> "Wait until the Source DUT writes 00h to the TRAINING_PATTERN_SET byte
> of Reference Sink DPCD Link Configuration Field to indicate the end of
> the link training. Stop the link training timer. Verify that link
> training completed in 10ms or less"
>
> That needs to be done independent of HPD so we can ignore the CTS point.

Great!

>
> >>
> >> I am not sure do link training at atomic_enable() can meet this timing
> >> requirement.

Why? It's putting some time bound on link training in general to only
take 10ms, right?

> >>
> >
> > At least in the DP spec itself it doesn't require the link to be trained
> > within 10ms of HPD being asserted. Instead it simply recommends that the
> > OS start configuring the display promptly after HPD is asserted, e.g.
> > within 100ms. There's some strict timing on IRQ_HPD, so the driver must
> > read DPCD registers within 100ms of IRQ_HPD rising edge; maybe that is
> > what CTS is checking for?
> >
> > TL;DR: I don't see why CTS should stop us from link training in
> > atomic_enable(). It would be beneficial to do so to make eDP and DP the
> > same. It would also help to report a drm connector being connected
> > _before_ link training so that userspace knows the link itself is the
> > bad part of the equation (and not that the DP connector looks
> > disconnected to userspace when in fact it really is connected and the
> > monitor is asserting HPD, just the link training failed).
>
> Its the corrective action of the userspace when it finds link is bad is
> the concern as I highlighted in the other response. Just reading and
> resetting link_status is not enough to recover.

What needs to be done to recover? Userspace will try to set a mode on
the connector again if the link status is bad and there were some modes
available. If there are zero modes and the link is bad, then it ignores
the connector. I'm not sure what else could be done to recover besides
try again and stop trying if no modes exist.

Acting like the connector isn't connected makes the situation worse for
ChromeOS because userspace thinks there's nothing there so it can't try
to retrain the link again. Instead, userspace has to rely on the kernel
driver to train the link again. The kernel should just tell userspace
the link is bad so userspace can implement the policy to either ignore
the connector entirely or to consider it a display that is having link
training problems.

So again, I see no reason why the kernel driver thinks it can implement
a policy to train the link before indicating the drm connector is
connected. It should stop doing that. Instead it should tell userspace
that the connector is connected and then train the link when there's a
modeset. If the modeset fails then userspace can take action to either
figure out that the link is bad, or notify the user that the cable is
bad, or to try replugging or power cycle the monitor, etc. None of that
can be done if the kernel lies about the state of the connector because
the link training failed.
Abhinav Kumar Oct. 3, 2023, 1:32 a.m. UTC | #11
On 10/2/2023 3:58 PM, Stephen Boyd wrote:
> Quoting Abhinav Kumar (2023-09-28 17:46:11)
>> On 9/27/2023 3:01 PM, Stephen Boyd wrote:
>>> Quoting Kuogee Hsieh (2023-09-25 09:07:18)
>>>>
>>>> However for external DP case, link training can not be guarantee always
>>>> success without link rate or lane being reduced as Abhinav mentioned.
>>>>
>>>> In addition,  CTS (compliance test) it required to complete link
>>>> training within 10ms after hpd asserted.
>>>
>>> Is it possible to change that timeout? I have to look around for the CTS
>>> parameters because I'm pretty confused how it can work. What do we do if
>>> DP wakes the system from suspend and asserts HPD? We need resume time to
>>> be < 10ms?  That's not realistic.
>>>
>>
>> No, the CTS doesnt say we need to finish link training within 10ms after
>> HPD is asserted. It says it must be completed in 10ms after
>> TRAINING_PATTERN_SET dpcd write.
>>
>> "Wait until the Source DUT writes 00h to the TRAINING_PATTERN_SET byte
>> of Reference Sink DPCD Link Configuration Field to indicate the end of
>> the link training. Stop the link training timer. Verify that link
>> training completed in 10ms or less"
>>
>> That needs to be done independent of HPD so we can ignore the CTS point.
> 
> Great!
> 
>>
>>>>
>>>> I am not sure do link training at atomic_enable() can meet this timing
>>>> requirement.
> 
> Why? It's putting some time bound on link training in general to only
> take 10ms, right?
> 

Like I said, CTS is mentioning 10ms to finish link training after the 
DUT writes 00h to the TRAINING_PATTERN_SET byte. So for this discussion 
lets leave out CTS for now.

>>>>
>>>
>>> At least in the DP spec itself it doesn't require the link to be trained
>>> within 10ms of HPD being asserted. Instead it simply recommends that the
>>> OS start configuring the display promptly after HPD is asserted, e.g.
>>> within 100ms. There's some strict timing on IRQ_HPD, so the driver must
>>> read DPCD registers within 100ms of IRQ_HPD rising edge; maybe that is
>>> what CTS is checking for?
>>>
>>> TL;DR: I don't see why CTS should stop us from link training in
>>> atomic_enable(). It would be beneficial to do so to make eDP and DP the
>>> same. It would also help to report a drm connector being connected
>>> _before_ link training so that userspace knows the link itself is the
>>> bad part of the equation (and not that the DP connector looks
>>> disconnected to userspace when in fact it really is connected and the
>>> monitor is asserting HPD, just the link training failed).
>>
>> Its the corrective action of the userspace when it finds link is bad is
>> the concern as I highlighted in the other response. Just reading and
>> resetting link_status is not enough to recover.
> 
> What needs to be done to recover? Userspace will try to set a mode on
> the connector again if the link status is bad and there were some modes
> available. If there are zero modes and the link is bad, then it ignores
> the connector. I'm not sure what else could be done to recover besides
> try again and stop trying if no modes exist.
> 

Let me re-explain if I didnt make this clear last time.

You are right. Thats all the "userspace" can do which is basically retry 
the mode. And like I said, its again only going to fail. All the 
corrective actions you mentioned below like ignoring the connector 
entirely or consider that the display has link training problems are not 
something we decided to go with on a commercial device where we expect 
things to be more reliable.

Let me re-explain what I explained in the prev response.

If driver issues hot-plug after link-training:

It would have implemented all the link training mechanisms such as 
trying lower rates/number of lanes and made sure that when the usermode 
queries the list of modes, only the modes which fit into the link rate 
which was link trained successfully will be exposed and the chances of a 
user ending up with a blank screen on connection are pretty high.

This reduces the dependency on usermodes to be smart enough to implement 
such policies and we would rather not depend on those unless we have 
some reference to a compositor which is more sturdy. I do not think the 
CrOS code you have pointed to is more sturdy than the driver mechanism 
explained above.

As opposed to this, if we just issue hotplug without any of this, 
usermode does not know which mode to retry as we do not remove or edit 
the mode list once link training fails.

> Acting like the connector isn't connected makes the situation worse for
> ChromeOS because userspace thinks there's nothing there so it can't try
> to retrain the link again. Instead, userspace has to rely on the kernel
> driver to train the link again. The kernel should just tell userspace
> the link is bad so userspace can implement the policy to either ignore
> the connector entirely or to consider it a display that is having link
> training problems.
> 

What gain will it give if it retries the same mode blindly as opposed to 
the safer option I have explained above. None of the policies you have 
highlighted seem like something an end user will be satisfied with.

> So again, I see no reason why the kernel driver thinks it can implement
> a policy to train the link before indicating the drm connector is
> connected. It should stop doing that. Instead it should tell userspace
> that the connector is connected and then train the link when there's a
> modeset. If the modeset fails then userspace can take action to either
> figure out that the link is bad, or notify the user that the cable is
> bad, or to try replugging or power cycle the monitor, etc. None of that
> can be done if the kernel lies about the state of the connector because
> the link training failed.

Usermode is unable to take the corrective action without proper support 
from the kernel like removing unsupported modes etc and I dont see other 
drivers taking an action like that. Kernel is not lying. Its delaying 
the status to a point where usermode can safely handle.

Please explain to me how any of the policies you have explained usermode 
can take are safer and have more chance of success than what we have now.
Dmitry Baryshkov Oct. 3, 2023, 8 a.m. UTC | #12
On Tue, 3 Oct 2023 at 04:33, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
>
>
> On 10/2/2023 3:58 PM, Stephen Boyd wrote:
> > Quoting Abhinav Kumar (2023-09-28 17:46:11)
> >> On 9/27/2023 3:01 PM, Stephen Boyd wrote:
> >>> Quoting Kuogee Hsieh (2023-09-25 09:07:18)
> >>>>
> >>>> However for external DP case, link training can not be guarantee always
> >>>> success without link rate or lane being reduced as Abhinav mentioned.
> >>>>
> >>>> In addition,  CTS (compliance test) it required to complete link
> >>>> training within 10ms after hpd asserted.
> >>>
> >>> Is it possible to change that timeout? I have to look around for the CTS
> >>> parameters because I'm pretty confused how it can work. What do we do if
> >>> DP wakes the system from suspend and asserts HPD? We need resume time to
> >>> be < 10ms?  That's not realistic.
> >>>
> >>
> >> No, the CTS doesnt say we need to finish link training within 10ms after
> >> HPD is asserted. It says it must be completed in 10ms after
> >> TRAINING_PATTERN_SET dpcd write.
> >>
> >> "Wait until the Source DUT writes 00h to the TRAINING_PATTERN_SET byte
> >> of Reference Sink DPCD Link Configuration Field to indicate the end of
> >> the link training. Stop the link training timer. Verify that link
> >> training completed in 10ms or less"
> >>
> >> That needs to be done independent of HPD so we can ignore the CTS point.
> >
> > Great!
> >
> >>
> >>>>
> >>>> I am not sure do link training at atomic_enable() can meet this timing
> >>>> requirement.
> >
> > Why? It's putting some time bound on link training in general to only
> > take 10ms, right?
> >
>
> Like I said, CTS is mentioning 10ms to finish link training after the
> DUT writes 00h to the TRAINING_PATTERN_SET byte. So for this discussion
> lets leave out CTS for now.
>
> >>>>
> >>>
> >>> At least in the DP spec itself it doesn't require the link to be trained
> >>> within 10ms of HPD being asserted. Instead it simply recommends that the
> >>> OS start configuring the display promptly after HPD is asserted, e.g.
> >>> within 100ms. There's some strict timing on IRQ_HPD, so the driver must
> >>> read DPCD registers within 100ms of IRQ_HPD rising edge; maybe that is
> >>> what CTS is checking for?
> >>>
> >>> TL;DR: I don't see why CTS should stop us from link training in
> >>> atomic_enable(). It would be beneficial to do so to make eDP and DP the
> >>> same. It would also help to report a drm connector being connected
> >>> _before_ link training so that userspace knows the link itself is the
> >>> bad part of the equation (and not that the DP connector looks
> >>> disconnected to userspace when in fact it really is connected and the
> >>> monitor is asserting HPD, just the link training failed).
> >>
> >> Its the corrective action of the userspace when it finds link is bad is
> >> the concern as I highlighted in the other response. Just reading and
> >> resetting link_status is not enough to recover.
> >
> > What needs to be done to recover? Userspace will try to set a mode on
> > the connector again if the link status is bad and there were some modes
> > available. If there are zero modes and the link is bad, then it ignores
> > the connector. I'm not sure what else could be done to recover besides
> > try again and stop trying if no modes exist.
> >
>
> Let me re-explain if I didnt make this clear last time.
>
> You are right. Thats all the "userspace" can do which is basically retry
> the mode. And like I said, its again only going to fail. All the
> corrective actions you mentioned below like ignoring the connector
> entirely or consider that the display has link training problems are not
> something we decided to go with on a commercial device where we expect
> things to be more reliable.

I have had link training issues with one of my laptops (x86) and USB-C
dock. Usually switching to lower resolution works in such cases.
Moreover, in some cases after switching to low res, I can successfully
switch to high res.

>
> Let me re-explain what I explained in the prev response.
>
> If driver issues hot-plug after link-training:
>
> It would have implemented all the link training mechanisms such as
> trying lower rates/number of lanes and made sure that when the usermode
> queries the list of modes, only the modes which fit into the link rate
> which was link trained successfully will be exposed and the chances of a
> user ending up with a blank screen on connection are pretty high.
>
> This reduces the dependency on usermodes to be smart enough to implement
> such policies and we would rather not depend on those unless we have
> some reference to a compositor which is more sturdy. I do not think the
> CrOS code you have pointed to is more sturdy than the driver mechanism
> explained above.
>
> As opposed to this, if we just issue hotplug without any of this,
> usermode does not know which mode to retry as we do not remove or edit
> the mode list once link training fails.

I think we are trying to be overprotective here. From my point of
view, there are two kinds of issues:
1) We know that some modes can not be supported (e.g. because of the
amount of lanes available or because of the board link rate
limitations).
Of course the kernel should not present these modes to userspace

2) Modes that pass known limitations, but can not be set e.g. because
of the bad cable or dirty connector.
Neither kernel nor userspace have control here. Judging from my
experience with x86, we should pass all these modes to userspace. Then
the user can select what seems to be working.

>
> > Acting like the connector isn't connected makes the situation worse for
> > ChromeOS because userspace thinks there's nothing there so it can't try
> > to retrain the link again. Instead, userspace has to rely on the kernel
> > driver to train the link again. The kernel should just tell userspace
> > the link is bad so userspace can implement the policy to either ignore
> > the connector entirely or to consider it a display that is having link
> > training problems.
> >
>
> What gain will it give if it retries the same mode blindly as opposed to
> the safer option I have explained above. None of the policies you have
> highlighted seem like something an end user will be satisfied with.
>
> > So again, I see no reason why the kernel driver thinks it can implement
> > a policy to train the link before indicating the drm connector is
> > connected. It should stop doing that. Instead it should tell userspace
> > that the connector is connected and then train the link when there's a
> > modeset. If the modeset fails then userspace can take action to either
> > figure out that the link is bad, or notify the user that the cable is
> > bad, or to try replugging or power cycle the monitor, etc. None of that
> > can be done if the kernel lies about the state of the connector because
> > the link training failed.
>
> Usermode is unable to take the corrective action without proper support
> from the kernel like removing unsupported modes etc and I dont see other
> drivers taking an action like that. Kernel is not lying. Its delaying
> the status to a point where usermode can safely handle.
>
> Please explain to me how any of the policies you have explained usermode
> can take are safer and have more chance of success than what we have now.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index b6992202..b58cb02 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1333,101 +1333,35 @@  static int dp_pm_runtime_resume(struct device *dev)
 	return 0;
 }
 
-static int dp_pm_resume(struct device *dev)
+static void dp_pm_complete(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct msm_dp *dp_display = platform_get_drvdata(pdev);
-	struct dp_display_private *dp;
-	int sink_count = 0;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
+	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 
 	mutex_lock(&dp->event_mutex);
 
 	drm_dbg_dp(dp->drm_dev,
-		"Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
+		"type=%d core_inited=%d phy_inited=%d power_on=%d\n",
 		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
+		dp->phy_initialized, dp->dp_display.power_on);
 
 	/* start from disconnected state */
 	dp->hpd_state = ST_DISCONNECTED;
 
-	/* turn on dp ctrl/phy */
-	dp_display_host_init(dp);
-
-	if (dp_display->is_edp)
-		dp_catalog_ctrl_hpd_enable(dp->catalog);
-
-	if (dp_catalog_link_is_connected(dp->catalog)) {
-		/*
-		 * set sink to normal operation mode -- D0
-		 * before dpcd read
-		 */
-		dp_display_host_phy_init(dp);
-		dp_link_psm_config(dp->link, &dp->panel->link_info, false);
-		sink_count = drm_dp_read_sink_count(dp->aux);
-		if (sink_count < 0)
-			sink_count = 0;
-
-		dp_display_host_phy_exit(dp);
-	}
-
-	dp->link->sink_count = sink_count;
-	/*
-	 * can not declared display is connected unless
-	 * HDMI cable is plugged in and sink_count of
-	 * dongle become 1
-	 * also only signal audio when disconnected
-	 */
-	if (dp->link->sink_count) {
-		dp->dp_display.link_ready = true;
-	} else {
-		dp->dp_display.link_ready = false;
-		dp_display_handle_plugged_change(dp_display, false);
-	}
-
-	drm_dbg_dp(dp->drm_dev,
-		"After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
-		dp->dp_display.connector_type, dp->link->sink_count,
-		dp->dp_display.link_ready, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
 	mutex_unlock(&dp->event_mutex);
-
-	return 0;
 }
 
-static int dp_pm_suspend(struct device *dev)
+static int dp_pm_prepare(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct msm_dp *dp_display = platform_get_drvdata(pdev);
-	struct dp_display_private *dp;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
+	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 
 	mutex_lock(&dp->event_mutex);
 
-	drm_dbg_dp(dp->drm_dev,
-		"Before, type=%d core_inited=%d  phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
 	/* mainlink enabled */
 	if (dp_power_clk_status(dp->power, DP_CTRL_PM))
 		dp_ctrl_off_link_stream(dp->ctrl);
 
-	dp_display_host_phy_exit(dp);
-
-	/* host_init will be called at pm_resume */
-	dp_display_host_deinit(dp);
-
 	dp->hpd_state = ST_SUSPENDED;
 
-	drm_dbg_dp(dp->drm_dev,
-		"After, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
 	mutex_unlock(&dp->event_mutex);
 
 	return 0;
@@ -1435,8 +1369,10 @@  static int dp_pm_suspend(struct device *dev)
 
 static const struct dev_pm_ops dp_pm_ops = {
 	SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL)
-	.suspend = dp_pm_suspend,
-	.resume =  dp_pm_resume,
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                                pm_runtime_force_resume)
+	.prepare = dp_pm_prepare,
+	.complete = dp_pm_complete,
 };
 
 static struct platform_driver dp_display_driver = {
@@ -1670,9 +1606,6 @@  void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
 
 	dp_display = container_of(dp, struct dp_display_private, dp_display);
 
-	if (dp->is_edp)
-		dp_hpd_unplug_handle(dp_display, 0);
-
 	mutex_lock(&dp_display->event_mutex);
 
 	state = dp_display->hpd_state;