[4/7] drm/omap: Implement CTM property for CRTC using OVL managers CPR matrix

Message ID 20190902125359.18001-5-tomi.valkeinen@ti.com
State New
Headers show
Series
  • drm/omap: misc improvements
Related show

Commit Message

Tomi Valkeinen Sept. 2, 2019, 12:53 p.m.
From: Jyri Sarha <jsarha@ti.com>

Implement CTM color management property for OMAP CRTC using DSS
overlay manager's Color Phase Rotation matrix. The CPR matrix does not
exactly match the CTM property documentation. On DSS the CPR matrix is
applied after gamma table look up. However, it seems stupid to add a
custom property just for that.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

Comments

Laurent Pinchart Sept. 3, 2019, 3:24 p.m. | #1
Hi Tomi,

Thank you for the patch.

On Mon, Sep 02, 2019 at 03:53:56PM +0300, Tomi Valkeinen wrote:
> From: Jyri Sarha <jsarha@ti.com>
> 
> Implement CTM color management property for OMAP CRTC using DSS
> overlay manager's Color Phase Rotation matrix. The CPR matrix does not
> exactly match the CTM property documentation. On DSS the CPR matrix is
> applied after gamma table look up. However, it seems stupid to add a
> custom property just for that.

In that case the DRM documentation should be updated to mention that
both options are allowed.

> Signed-off-by: Jyri Sarha <jsarha@ti.com>
> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
> ---
>  drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
>  1 file changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
> index 3c5ddbf30e97..d63213dd7d83 100644
> --- a/drivers/gpu/drm/omapdrm/omap_crtc.c
> +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
> @@ -391,6 +391,32 @@ static void omap_crtc_manual_display_update(struct work_struct *data)
>  	}
>  }
>  
> +static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
> +{
> +	uint64_t sign_bit = 1ULL << 63;
> +	uint64_t cbits = (uint64_t) coef;

s/uint64_t/u64/ for both lines as we're dealing with kernel code. And
there's no need for a space before coef.

> +	s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
> +
> +	if (cbits & sign_bit)
> +		ret = -ret;
> +
> +	return ret;

Can't this be simplified to 

	s16 ret = (coef >> 24) & 0x1ff;

	return coef < 0 ? -ret : ret;

> +}
> +
> +static void omap_crtc_cpr_coefs_from_ctm(const struct drm_color_ctm *ctm,
> +					 struct omap_dss_cpr_coefs *cpr)
> +{
> +	cpr->rr = omap_crtc_S31_32_to_s2_8(ctm->matrix[0]);
> +	cpr->rg = omap_crtc_S31_32_to_s2_8(ctm->matrix[1]);
> +	cpr->rb = omap_crtc_S31_32_to_s2_8(ctm->matrix[2]);
> +	cpr->gr = omap_crtc_S31_32_to_s2_8(ctm->matrix[3]);
> +	cpr->gg = omap_crtc_S31_32_to_s2_8(ctm->matrix[4]);
> +	cpr->gb = omap_crtc_S31_32_to_s2_8(ctm->matrix[5]);
> +	cpr->br = omap_crtc_S31_32_to_s2_8(ctm->matrix[6]);
> +	cpr->bg = omap_crtc_S31_32_to_s2_8(ctm->matrix[7]);
> +	cpr->bb = omap_crtc_S31_32_to_s2_8(ctm->matrix[8]);
> +}
> +
>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>  {
>  	struct omap_drm_private *priv = crtc->dev->dev_private;
> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>  	info.default_color = 0x000000;
>  	info.trans_enabled = false;
>  	info.partial_alpha_enabled = false;
> -	info.cpr_enable = false;
> +
> +	if (crtc->state->ctm) {
> +		struct drm_color_ctm *ctm =
> +			(struct drm_color_ctm *) crtc->state->ctm->data;
> +
> +		info.cpr_enable = true;
> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);

As an optimisation it would be nice to only write the coefficients when
they actually change. That could be implemented on top of this series.

> +	} else {
> +		info.cpr_enable = false;
> +	}
>  
>  	priv->dispc_ops->mgr_setup(priv->dispc, omap_crtc->channel, &info);
>  }
> @@ -836,7 +871,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
>  	if (priv->dispc_ops->mgr_gamma_size(priv->dispc, channel)) {
>  		unsigned int gamma_lut_size = 256;
>  
> -		drm_crtc_enable_color_mgmt(crtc, 0, false, gamma_lut_size);
> +		drm_crtc_enable_color_mgmt(crtc, 0, true, gamma_lut_size);
>  		drm_mode_crtc_set_gamma_size(crtc, gamma_lut_size);
>  	}
>
Jyri Sarha Sept. 4, 2019, 7:17 a.m. | #2
On 03/09/2019 18:24, Laurent Pinchart wrote:
> Hi Tomi,
> 
> Thank you for the patch.
> 
> On Mon, Sep 02, 2019 at 03:53:56PM +0300, Tomi Valkeinen wrote:
>> From: Jyri Sarha <jsarha@ti.com>
>>
>> Implement CTM color management property for OMAP CRTC using DSS
>> overlay manager's Color Phase Rotation matrix. The CPR matrix does not
>> exactly match the CTM property documentation. On DSS the CPR matrix is
>> applied after gamma table look up. However, it seems stupid to add a
>> custom property just for that.
> 
> In that case the DRM documentation should be updated to mention that
> both options are allowed.
> 

Ok, if that is alright. But if we do that, then I guess all the drivers
implementing CTM should document the point where it is applied in the
pipeline.

>> Signed-off-by: Jyri Sarha <jsarha@ti.com>
>> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
>> ---
>>  drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
>>  1 file changed, 37 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
>> index 3c5ddbf30e97..d63213dd7d83 100644
>> --- a/drivers/gpu/drm/omapdrm/omap_crtc.c
>> +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
>> @@ -391,6 +391,32 @@ static void omap_crtc_manual_display_update(struct work_struct *data)
>>  	}
>>  }
>>  
>> +static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
>> +{
>> +	uint64_t sign_bit = 1ULL << 63;
>> +	uint64_t cbits = (uint64_t) coef;
> 
> s/uint64_t/u64/ for both lines as we're dealing with kernel code. And
> there's no need for a space before coef.
> 
>> +	s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
>> +
>> +	if (cbits & sign_bit)
>> +		ret = -ret;
>> +
>> +	return ret;
> 
> Can't this be simplified to 
> 
> 	s16 ret = (coef >> 24) & 0x1ff;
> 
> 	return coef < 0 ? -ret : ret;
> 

No. Clamping is different thing. If the original value is greater than
what we can present with our 2 magnitude bit, we want to use the maximum
value, not something that we may have in the LSB end of bits. e.g if
user-space tries to set the value to 2.0 (= 0x200) we rather present it
as 1.996 (= 0x1FF) than 0.0 (= 0x000).

>> +}
>> +
>> +static void omap_crtc_cpr_coefs_from_ctm(const struct drm_color_ctm *ctm,
>> +					 struct omap_dss_cpr_coefs *cpr)
>> +{
>> +	cpr->rr = omap_crtc_S31_32_to_s2_8(ctm->matrix[0]);
>> +	cpr->rg = omap_crtc_S31_32_to_s2_8(ctm->matrix[1]);
>> +	cpr->rb = omap_crtc_S31_32_to_s2_8(ctm->matrix[2]);
>> +	cpr->gr = omap_crtc_S31_32_to_s2_8(ctm->matrix[3]);
>> +	cpr->gg = omap_crtc_S31_32_to_s2_8(ctm->matrix[4]);
>> +	cpr->gb = omap_crtc_S31_32_to_s2_8(ctm->matrix[5]);
>> +	cpr->br = omap_crtc_S31_32_to_s2_8(ctm->matrix[6]);
>> +	cpr->bg = omap_crtc_S31_32_to_s2_8(ctm->matrix[7]);
>> +	cpr->bb = omap_crtc_S31_32_to_s2_8(ctm->matrix[8]);
>> +}
>> +
>>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>  {
>>  	struct omap_drm_private *priv = crtc->dev->dev_private;
>> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>  	info.default_color = 0x000000;
>>  	info.trans_enabled = false;
>>  	info.partial_alpha_enabled = false;
>> -	info.cpr_enable = false;
>> +
>> +	if (crtc->state->ctm) {
>> +		struct drm_color_ctm *ctm =
>> +			(struct drm_color_ctm *) crtc->state->ctm->data;
>> +
>> +		info.cpr_enable = true;
>> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
> 
> As an optimisation it would be nice to only write the coefficients when
> they actually change. That could be implemented on top of this series.
> 

E.g. apply this ?

- if (crtc->state->ctm)
+ if (crtc->state->color_mgmt_changed && crtc->state->ctm)


>> +	} else {
>> +		info.cpr_enable = false;
>> +	}
>>  
>>  	priv->dispc_ops->mgr_setup(priv->dispc, omap_crtc->channel, &info);
>>  }
>> @@ -836,7 +871,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
>>  	if (priv->dispc_ops->mgr_gamma_size(priv->dispc, channel)) {
>>  		unsigned int gamma_lut_size = 256;
>>  
>> -		drm_crtc_enable_color_mgmt(crtc, 0, false, gamma_lut_size);
>> +		drm_crtc_enable_color_mgmt(crtc, 0, true, gamma_lut_size);
>>  		drm_mode_crtc_set_gamma_size(crtc, gamma_lut_size);
>>  	}
>>  
>
Laurent Pinchart Sept. 4, 2019, 11:11 a.m. | #3
Hi Jyri,

On Wed, Sep 04, 2019 at 10:17:00AM +0300, Jyri Sarha wrote:
> On 03/09/2019 18:24, Laurent Pinchart wrote:
> > On Mon, Sep 02, 2019 at 03:53:56PM +0300, Tomi Valkeinen wrote:
> >> From: Jyri Sarha <jsarha@ti.com>
> >>
> >> Implement CTM color management property for OMAP CRTC using DSS
> >> overlay manager's Color Phase Rotation matrix. The CPR matrix does not
> >> exactly match the CTM property documentation. On DSS the CPR matrix is
> >> applied after gamma table look up. However, it seems stupid to add a
> >> custom property just for that.
> > 
> > In that case the DRM documentation should be updated to mention that
> > both options are allowed.
> 
> Ok, if that is alright. But if we do that, then I guess all the drivers
> implementing CTM should document the point where it is applied in the
> pipeline.

Whatever solution we end up picking, I think it should at least be
discussed with a broader upstream audience and not just swept under the
omapdrm carpet :-)

> >> Signed-off-by: Jyri Sarha <jsarha@ti.com>
> >> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
> >> ---
> >>  drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
> >>  1 file changed, 37 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
> >> index 3c5ddbf30e97..d63213dd7d83 100644
> >> --- a/drivers/gpu/drm/omapdrm/omap_crtc.c
> >> +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
> >> @@ -391,6 +391,32 @@ static void omap_crtc_manual_display_update(struct work_struct *data)
> >>  	}
> >>  }
> >>  
> >> +static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
> >> +{
> >> +	uint64_t sign_bit = 1ULL << 63;
> >> +	uint64_t cbits = (uint64_t) coef;
> > 
> > s/uint64_t/u64/ for both lines as we're dealing with kernel code. And
> > there's no need for a space before coef.
> > 
> >> +	s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
> >> +
> >> +	if (cbits & sign_bit)
> >> +		ret = -ret;
> >> +
> >> +	return ret;
> > 
> > Can't this be simplified to 
> > 
> > 	s16 ret = (coef >> 24) & 0x1ff;
> > 
> > 	return coef < 0 ? -ret : ret;
> > 
> 
> No. Clamping is different thing. If the original value is greater than
> what we can present with our 2 magnitude bit, we want to use the maximum
> value, not something that we may have in the LSB end of bits. e.g if
> user-space tries to set the value to 2.0 (= 0x200) we rather present it
> as 1.996 (= 0x1FF) than 0.0 (= 0x000).

Of course, my bad.

Perhaps a stupid question, should we reject out of range values at
atomic check time ?

> >> +}
> >> +
> >> +static void omap_crtc_cpr_coefs_from_ctm(const struct drm_color_ctm *ctm,
> >> +					 struct omap_dss_cpr_coefs *cpr)
> >> +{
> >> +	cpr->rr = omap_crtc_S31_32_to_s2_8(ctm->matrix[0]);
> >> +	cpr->rg = omap_crtc_S31_32_to_s2_8(ctm->matrix[1]);
> >> +	cpr->rb = omap_crtc_S31_32_to_s2_8(ctm->matrix[2]);
> >> +	cpr->gr = omap_crtc_S31_32_to_s2_8(ctm->matrix[3]);
> >> +	cpr->gg = omap_crtc_S31_32_to_s2_8(ctm->matrix[4]);
> >> +	cpr->gb = omap_crtc_S31_32_to_s2_8(ctm->matrix[5]);
> >> +	cpr->br = omap_crtc_S31_32_to_s2_8(ctm->matrix[6]);
> >> +	cpr->bg = omap_crtc_S31_32_to_s2_8(ctm->matrix[7]);
> >> +	cpr->bb = omap_crtc_S31_32_to_s2_8(ctm->matrix[8]);
> >> +}
> >> +
> >>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
> >>  {
> >>  	struct omap_drm_private *priv = crtc->dev->dev_private;
> >> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
> >>  	info.default_color = 0x000000;
> >>  	info.trans_enabled = false;
> >>  	info.partial_alpha_enabled = false;
> >> -	info.cpr_enable = false;
> >> +
> >> +	if (crtc->state->ctm) {
> >> +		struct drm_color_ctm *ctm =
> >> +			(struct drm_color_ctm *) crtc->state->ctm->data;
> >> +
> >> +		info.cpr_enable = true;
> >> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
> > 
> > As an optimisation it would be nice to only write the coefficients when
> > they actually change. That could be implemented on top of this series.
> 
> E.g. apply this ?
> 
> - if (crtc->state->ctm)
> + if (crtc->state->color_mgmt_changed && crtc->state->ctm)

Something like that, but .mgr_setup() should then be taught not to write
unchanged CTM tables to registers. Do you think it would be worth it ?

> >> +	} else {
> >> +		info.cpr_enable = false;
> >> +	}
> >>  
> >>  	priv->dispc_ops->mgr_setup(priv->dispc, omap_crtc->channel, &info);
> >>  }
> >> @@ -836,7 +871,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
> >>  	if (priv->dispc_ops->mgr_gamma_size(priv->dispc, channel)) {
> >>  		unsigned int gamma_lut_size = 256;
> >>  
> >> -		drm_crtc_enable_color_mgmt(crtc, 0, false, gamma_lut_size);
> >> +		drm_crtc_enable_color_mgmt(crtc, 0, true, gamma_lut_size);
> >>  		drm_mode_crtc_set_gamma_size(crtc, gamma_lut_size);
> >>  	}
> >>
Jyri Sarha Sept. 4, 2019, 8:08 p.m. | #4
On 04/09/2019 14:11, Laurent Pinchart wrote:
> Hi Jyri,
> 
> On Wed, Sep 04, 2019 at 10:17:00AM +0300, Jyri Sarha wrote:
>> On 03/09/2019 18:24, Laurent Pinchart wrote:
>>> On Mon, Sep 02, 2019 at 03:53:56PM +0300, Tomi Valkeinen wrote:
>>>> From: Jyri Sarha <jsarha@ti.com>
>>>>
>>>> Implement CTM color management property for OMAP CRTC using DSS
>>>> overlay manager's Color Phase Rotation matrix. The CPR matrix does not
>>>> exactly match the CTM property documentation. On DSS the CPR matrix is
>>>> applied after gamma table look up. However, it seems stupid to add a
>>>> custom property just for that.
>>>
>>> In that case the DRM documentation should be updated to mention that
>>> both options are allowed.
>>
>> Ok, if that is alright. But if we do that, then I guess all the drivers
>> implementing CTM should document the point where it is applied in the
>> pipeline.
> 
> Whatever solution we end up picking, I think it should at least be
> discussed with a broader upstream audience and not just swept under the
> omapdrm carpet :-)
> 

I'll try to write something and send the next series to wider audience.
Let's see what jury says.

>>>> Signed-off-by: Jyri Sarha <jsarha@ti.com>
>>>> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
>>>> ---
>>>>  drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
>>>>  1 file changed, 37 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
>>>> index 3c5ddbf30e97..d63213dd7d83 100644
>>>> --- a/drivers/gpu/drm/omapdrm/omap_crtc.c
>>>> +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
>>>> @@ -391,6 +391,32 @@ static void omap_crtc_manual_display_update(struct work_struct *data)
>>>>  	}
>>>>  }
>>>>  
>>>> +static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
>>>> +{
>>>> +	uint64_t sign_bit = 1ULL << 63;
>>>> +	uint64_t cbits = (uint64_t) coef;
>>>
>>> s/uint64_t/u64/ for both lines as we're dealing with kernel code. And
>>> there's no need for a space before coef.
>>>
>>>> +	s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
>>>> +
>>>> +	if (cbits & sign_bit)
>>>> +		ret = -ret;
>>>> +
>>>> +	return ret;
>>>
>>> Can't this be simplified to 
>>>
>>> 	s16 ret = (coef >> 24) & 0x1ff;
>>>
>>> 	return coef < 0 ? -ret : ret;
>>>
>>
>> No. Clamping is different thing. If the original value is greater than
>> what we can present with our 2 magnitude bit, we want to use the maximum
>> value, not something that we may have in the LSB end of bits. e.g if
>> user-space tries to set the value to 2.0 (= 0x200) we rather present it
>> as 1.996 (= 0x1FF) than 0.0 (= 0x000).
> 
> Of course, my bad.
> 
> Perhaps a stupid question, should we reject out of range values at
> atomic check time ?
> 

I've at least seen CSC matrices with 2.0 values, so I think we should
accept those and use clamping, but maybe we should reject CTMs with
values far bigger than what we can represent. Such matrices would hardly
work the way they were intended.

>>>> +}
>>>> +
>>>> +static void omap_crtc_cpr_coefs_from_ctm(const struct drm_color_ctm *ctm,
>>>> +					 struct omap_dss_cpr_coefs *cpr)
>>>> +{
>>>> +	cpr->rr = omap_crtc_S31_32_to_s2_8(ctm->matrix[0]);
>>>> +	cpr->rg = omap_crtc_S31_32_to_s2_8(ctm->matrix[1]);
>>>> +	cpr->rb = omap_crtc_S31_32_to_s2_8(ctm->matrix[2]);
>>>> +	cpr->gr = omap_crtc_S31_32_to_s2_8(ctm->matrix[3]);
>>>> +	cpr->gg = omap_crtc_S31_32_to_s2_8(ctm->matrix[4]);
>>>> +	cpr->gb = omap_crtc_S31_32_to_s2_8(ctm->matrix[5]);
>>>> +	cpr->br = omap_crtc_S31_32_to_s2_8(ctm->matrix[6]);
>>>> +	cpr->bg = omap_crtc_S31_32_to_s2_8(ctm->matrix[7]);
>>>> +	cpr->bb = omap_crtc_S31_32_to_s2_8(ctm->matrix[8]);
>>>> +}
>>>> +
>>>>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>>>  {
>>>>  	struct omap_drm_private *priv = crtc->dev->dev_private;
>>>> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>>>  	info.default_color = 0x000000;
>>>>  	info.trans_enabled = false;
>>>>  	info.partial_alpha_enabled = false;
>>>> -	info.cpr_enable = false;
>>>> +
>>>> +	if (crtc->state->ctm) {
>>>> +		struct drm_color_ctm *ctm =
>>>> +			(struct drm_color_ctm *) crtc->state->ctm->data;
>>>> +
>>>> +		info.cpr_enable = true;
>>>> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
>>>
>>> As an optimisation it would be nice to only write the coefficients when
>>> they actually change. That could be implemented on top of this series.
>>
>> E.g. apply this ?
>>
>> - if (crtc->state->ctm)
>> + if (crtc->state->color_mgmt_changed && crtc->state->ctm)
> 
> Something like that, but .mgr_setup() should then be taught not to write
> unchanged CTM tables to registers. Do you think it would be worth it ?
> 

Hmmm, jess I should do it like this:
if (crtc->state->color_mgmt_changed) {
	if (crtc->state->ctm) {
...
>>>> +	} else {
>>>> +		info.cpr_enable = false;
>>>> +	}
}

This way the whole CPR functionality is turned off, if the there is no
CTM in the crtc state.

>>>>  
>>>>  	priv->dispc_ops->mgr_setup(priv->dispc, omap_crtc->channel, &info);
>>>>  }
>>>> @@ -836,7 +871,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
>>>>  	if (priv->dispc_ops->mgr_gamma_size(priv->dispc, channel)) {
>>>>  		unsigned int gamma_lut_size = 256;
>>>>  
>>>> -		drm_crtc_enable_color_mgmt(crtc, 0, false, gamma_lut_size);
>>>> +		drm_crtc_enable_color_mgmt(crtc, 0, true, gamma_lut_size);
>>>>  		drm_mode_crtc_set_gamma_size(crtc, gamma_lut_size);
>>>>  	}
>>>>  
>
Ilia Mirkin Sept. 4, 2019, 8:20 p.m. | #5
On Wed, Sep 4, 2019 at 4:08 PM Jyri Sarha <jsarha@ti.com> wrote:
>
> On 04/09/2019 14:11, Laurent Pinchart wrote:
> > Hi Jyri,
> >
> > On Wed, Sep 04, 2019 at 10:17:00AM +0300, Jyri Sarha wrote:
> >> On 03/09/2019 18:24, Laurent Pinchart wrote:
> >>> On Mon, Sep 02, 2019 at 03:53:56PM +0300, Tomi Valkeinen wrote:
> >>>> From: Jyri Sarha <jsarha@ti.com>
> >>>>
> >>>> Implement CTM color management property for OMAP CRTC using DSS
> >>>> overlay manager's Color Phase Rotation matrix. The CPR matrix does not
> >>>> exactly match the CTM property documentation. On DSS the CPR matrix is
> >>>> applied after gamma table look up. However, it seems stupid to add a
> >>>> custom property just for that.
> >>>
> >>> In that case the DRM documentation should be updated to mention that
> >>> both options are allowed.
> >>
> >> Ok, if that is alright. But if we do that, then I guess all the drivers
> >> implementing CTM should document the point where it is applied in the
> >> pipeline.
> >
> > Whatever solution we end up picking, I think it should at least be
> > discussed with a broader upstream audience and not just swept under the
> > omapdrm carpet :-)
> >
>
> I'll try to write something and send the next series to wider audience.
> Let's see what jury says.

In case it's useful ... the pipeline normally goes degamma -> ctm ->
gamma. If your ctm is applied after gamma, perhaps you can just rename
"gamma" to "degamma" and be done? (There's the unfortunate case of
legacy gamma which does end up in "GAMMA" when using atomic helpers.
But in such a case, you won't have a CTM.)

>
> >>>> Signed-off-by: Jyri Sarha <jsarha@ti.com>
> >>>> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
> >>>> ---
> >>>>  drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
> >>>>  1 file changed, 37 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
> >>>> index 3c5ddbf30e97..d63213dd7d83 100644
> >>>> --- a/drivers/gpu/drm/omapdrm/omap_crtc.c
> >>>> +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
> >>>> @@ -391,6 +391,32 @@ static void omap_crtc_manual_display_update(struct work_struct *data)
> >>>>    }
> >>>>  }
> >>>>
> >>>> +static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
> >>>> +{
> >>>> +  uint64_t sign_bit = 1ULL << 63;
> >>>> +  uint64_t cbits = (uint64_t) coef;
> >>>
> >>> s/uint64_t/u64/ for both lines as we're dealing with kernel code. And
> >>> there's no need for a space before coef.
> >>>
> >>>> +  s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
> >>>> +
> >>>> +  if (cbits & sign_bit)
> >>>> +          ret = -ret;
> >>>> +
> >>>> +  return ret;
> >>>
> >>> Can't this be simplified to
> >>>
> >>>     s16 ret = (coef >> 24) & 0x1ff;
> >>>
> >>>     return coef < 0 ? -ret : ret;
> >>>
> >>
> >> No. Clamping is different thing. If the original value is greater than
> >> what we can present with our 2 magnitude bit, we want to use the maximum
> >> value, not something that we may have in the LSB end of bits. e.g if
> >> user-space tries to set the value to 2.0 (= 0x200) we rather present it
> >> as 1.996 (= 0x1FF) than 0.0 (= 0x000).
> >
> > Of course, my bad.
> >
> > Perhaps a stupid question, should we reject out of range values at
> > atomic check time ?
> >
>
> I've at least seen CSC matrices with 2.0 values, so I think we should
> accept those and use clamping, but maybe we should reject CTMs with
> values far bigger than what we can represent. Such matrices would hardly
> work the way they were intended.

I clamped in nouveau. Not 100% sure it's the right policy. Having
something consistent across drivers is probably good. I don't think I
came up with clamping all by myself, so someone else must have been
doing it. (https://cgit.freedesktop.org/drm/drm/commit/drivers/gpu/drm/nouveau?id=88b703527ba70659365d989f29579f1292ebf9c3
-- see csc_drm_to_base.)

Cheers,

  -ilia
Laurent Pinchart Sept. 4, 2019, 9:52 p.m. | #6
Hi Jyri,

On Wed, Sep 04, 2019 at 11:08:20PM +0300, Jyri Sarha wrote:
> On 04/09/2019 14:11, Laurent Pinchart wrote:
> > On Wed, Sep 04, 2019 at 10:17:00AM +0300, Jyri Sarha wrote:
> >> On 03/09/2019 18:24, Laurent Pinchart wrote:
> >>> On Mon, Sep 02, 2019 at 03:53:56PM +0300, Tomi Valkeinen wrote:
> >>>> From: Jyri Sarha <jsarha@ti.com>
> >>>>
> >>>> Implement CTM color management property for OMAP CRTC using DSS
> >>>> overlay manager's Color Phase Rotation matrix. The CPR matrix does not
> >>>> exactly match the CTM property documentation. On DSS the CPR matrix is
> >>>> applied after gamma table look up. However, it seems stupid to add a
> >>>> custom property just for that.
> >>>
> >>> In that case the DRM documentation should be updated to mention that
> >>> both options are allowed.
> >>
> >> Ok, if that is alright. But if we do that, then I guess all the drivers
> >> implementing CTM should document the point where it is applied in the
> >> pipeline.
> > 
> > Whatever solution we end up picking, I think it should at least be
> > discussed with a broader upstream audience and not just swept under the
> > omapdrm carpet :-)
> 
> I'll try to write something and send the next series to wider audience.
> Let's see what jury says.
> 
> >>>> Signed-off-by: Jyri Sarha <jsarha@ti.com>
> >>>> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
> >>>> ---
> >>>>  drivers/gpu/drm/omapdrm/omap_crtc.c | 39 +++++++++++++++++++++++++++--
> >>>>  1 file changed, 37 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
> >>>> index 3c5ddbf30e97..d63213dd7d83 100644
> >>>> --- a/drivers/gpu/drm/omapdrm/omap_crtc.c
> >>>> +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
> >>>> @@ -391,6 +391,32 @@ static void omap_crtc_manual_display_update(struct work_struct *data)
> >>>>  	}
> >>>>  }
> >>>>  
> >>>> +static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
> >>>> +{
> >>>> +	uint64_t sign_bit = 1ULL << 63;
> >>>> +	uint64_t cbits = (uint64_t) coef;
> >>>
> >>> s/uint64_t/u64/ for both lines as we're dealing with kernel code. And
> >>> there's no need for a space before coef.
> >>>
> >>>> +	s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
> >>>> +
> >>>> +	if (cbits & sign_bit)
> >>>> +		ret = -ret;
> >>>> +
> >>>> +	return ret;
> >>>
> >>> Can't this be simplified to 
> >>>
> >>> 	s16 ret = (coef >> 24) & 0x1ff;
> >>>
> >>> 	return coef < 0 ? -ret : ret;
> >>>
> >>
> >> No. Clamping is different thing. If the original value is greater than
> >> what we can present with our 2 magnitude bit, we want to use the maximum
> >> value, not something that we may have in the LSB end of bits. e.g if
> >> user-space tries to set the value to 2.0 (= 0x200) we rather present it
> >> as 1.996 (= 0x1FF) than 0.0 (= 0x000).
> > 
> > Of course, my bad.
> > 
> > Perhaps a stupid question, should we reject out of range values at
> > atomic check time ?
> 
> I've at least seen CSC matrices with 2.0 values, so I think we should
> accept those and use clamping, but maybe we should reject CTMs with
> values far bigger than what we can represent. Such matrices would hardly
> work the way they were intended.

I tend to be conservative in such cases and reject invalid values, but
if you think there are users in the wild that would break, then clamping
is fine with me too. If we want to reject values higher than 2.0 and
clamp 2.0 to 0x1ff then that should be done in atomic check, and here we
could convert the values blindly.

> >>>> +}
> >>>> +
> >>>> +static void omap_crtc_cpr_coefs_from_ctm(const struct drm_color_ctm *ctm,
> >>>> +					 struct omap_dss_cpr_coefs *cpr)
> >>>> +{
> >>>> +	cpr->rr = omap_crtc_S31_32_to_s2_8(ctm->matrix[0]);
> >>>> +	cpr->rg = omap_crtc_S31_32_to_s2_8(ctm->matrix[1]);
> >>>> +	cpr->rb = omap_crtc_S31_32_to_s2_8(ctm->matrix[2]);
> >>>> +	cpr->gr = omap_crtc_S31_32_to_s2_8(ctm->matrix[3]);
> >>>> +	cpr->gg = omap_crtc_S31_32_to_s2_8(ctm->matrix[4]);
> >>>> +	cpr->gb = omap_crtc_S31_32_to_s2_8(ctm->matrix[5]);
> >>>> +	cpr->br = omap_crtc_S31_32_to_s2_8(ctm->matrix[6]);
> >>>> +	cpr->bg = omap_crtc_S31_32_to_s2_8(ctm->matrix[7]);
> >>>> +	cpr->bb = omap_crtc_S31_32_to_s2_8(ctm->matrix[8]);
> >>>> +}
> >>>> +
> >>>>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
> >>>>  {
> >>>>  	struct omap_drm_private *priv = crtc->dev->dev_private;
> >>>> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
> >>>>  	info.default_color = 0x000000;
> >>>>  	info.trans_enabled = false;
> >>>>  	info.partial_alpha_enabled = false;
> >>>> -	info.cpr_enable = false;
> >>>> +
> >>>> +	if (crtc->state->ctm) {
> >>>> +		struct drm_color_ctm *ctm =
> >>>> +			(struct drm_color_ctm *) crtc->state->ctm->data;
> >>>> +
> >>>> +		info.cpr_enable = true;
> >>>> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
> >>>
> >>> As an optimisation it would be nice to only write the coefficients when
> >>> they actually change. That could be implemented on top of this series.
> >>
> >> E.g. apply this ?
> >>
> >> - if (crtc->state->ctm)
> >> + if (crtc->state->color_mgmt_changed && crtc->state->ctm)
> > 
> > Something like that, but .mgr_setup() should then be taught not to write
> > unchanged CTM tables to registers. Do you think it would be worth it ?
> 
> Hmmm, jess I should do it like this:
> if (crtc->state->color_mgmt_changed) {
> 	if (crtc->state->ctm) {
> ...
> >>>> +	} else {
> >>>> +		info.cpr_enable = false;
> >>>> +	}
> }
> 
> This way the whole CPR functionality is turned off, if the there is no
> CTM in the crtc state.

Yes, but you would also need to update .mgr_setup() :-) A new
color_mgmt_changed flag would be needed in the info structure too.

> >>>>  
> >>>>  	priv->dispc_ops->mgr_setup(priv->dispc, omap_crtc->channel, &info);
> >>>>  }
> >>>> @@ -836,7 +871,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
> >>>>  	if (priv->dispc_ops->mgr_gamma_size(priv->dispc, channel)) {
> >>>>  		unsigned int gamma_lut_size = 256;
> >>>>  
> >>>> -		drm_crtc_enable_color_mgmt(crtc, 0, false, gamma_lut_size);
> >>>> +		drm_crtc_enable_color_mgmt(crtc, 0, true, gamma_lut_size);
> >>>>  		drm_mode_crtc_set_gamma_size(crtc, gamma_lut_size);
> >>>>  	}
> >>>>
Jyri Sarha Sept. 5, 2019, 10 a.m. | #7
On 05/09/2019 00:52, Laurent Pinchart wrote:
>>>>>>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>>>>>  {
>>>>>>  	struct omap_drm_private *priv = crtc->dev->dev_private;
>>>>>> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>>>>>  	info.default_color = 0x000000;
>>>>>>  	info.trans_enabled = false;
>>>>>>  	info.partial_alpha_enabled = false;
>>>>>> -	info.cpr_enable = false;
>>>>>> +
>>>>>> +	if (crtc->state->ctm) {
>>>>>> +		struct drm_color_ctm *ctm =
>>>>>> +			(struct drm_color_ctm *) crtc->state->ctm->data;
>>>>>> +
>>>>>> +		info.cpr_enable = true;
>>>>>> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
>>>>> As an optimisation it would be nice to only write the coefficients when
>>>>> they actually change. That could be implemented on top of this series.
>>>> E.g. apply this ?
>>>>
>>>> - if (crtc->state->ctm)
>>>> + if (crtc->state->color_mgmt_changed && crtc->state->ctm)
>>> Something like that, but .mgr_setup() should then be taught not to write
>>> unchanged CTM tables to registers. Do you think it would be worth it ?
>> Hmmm, jess I should do it like this:
>> if (crtc->state->color_mgmt_changed) {
>> 	if (crtc->state->ctm) {
>> ...
>>>>>> +	} else {
>>>>>> +		info.cpr_enable = false;
>>>>>> +	}
>> }
>>
>> This way the whole CPR functionality is turned off, if the there is no
>> CTM in the crtc state.
> Yes, but you would also need to update .mgr_setup() :-) A new
> color_mgmt_changed flag would be needed in the info structure too.
> 

I am starting to thing that such an "optimization" may not be worth the
added complexity. The arithmetic and writing three registers is not that
costly and we do not commit a new crtc state that often.

If we later consider otherwise we can add the optimization as a separate
patch.

BR,
Jyri
Laurent Pinchart Sept. 5, 2019, 10:05 a.m. | #8
Hi Jyri,

On Thu, Sep 05, 2019 at 01:00:51PM +0300, Jyri Sarha wrote:
> On 05/09/2019 00:52, Laurent Pinchart wrote:
> >>>>>>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
> >>>>>>  {
> >>>>>>  	struct omap_drm_private *priv = crtc->dev->dev_private;
> >>>>>> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
> >>>>>>  	info.default_color = 0x000000;
> >>>>>>  	info.trans_enabled = false;
> >>>>>>  	info.partial_alpha_enabled = false;
> >>>>>> -	info.cpr_enable = false;
> >>>>>> +
> >>>>>> +	if (crtc->state->ctm) {
> >>>>>> +		struct drm_color_ctm *ctm =
> >>>>>> +			(struct drm_color_ctm *) crtc->state->ctm->data;
> >>>>>> +
> >>>>>> +		info.cpr_enable = true;
> >>>>>> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
> >>>>>
> >>>>> As an optimisation it would be nice to only write the coefficients when
> >>>>> they actually change. That could be implemented on top of this series.
> >>>>
> >>>> E.g. apply this ?
> >>>>
> >>>> - if (crtc->state->ctm)
> >>>> + if (crtc->state->color_mgmt_changed && crtc->state->ctm)
> >>>
> >>> Something like that, but .mgr_setup() should then be taught not to write
> >>> unchanged CTM tables to registers. Do you think it would be worth it ?
> >>
> >> Hmmm, jess I should do it like this:
> >> if (crtc->state->color_mgmt_changed) {
> >> 	if (crtc->state->ctm) {
> >> ...
> >>>>>> +	} else {
> >>>>>> +		info.cpr_enable = false;
> >>>>>> +	}
> >> }
> >>
> >> This way the whole CPR functionality is turned off, if the there is no
> >> CTM in the crtc state.
> >
> > Yes, but you would also need to update .mgr_setup() :-) A new
> > color_mgmt_changed flag would be needed in the info structure too.
> 
> I am starting to thing that such an "optimization" may not be worth the
> added complexity. The arithmetic and writing three registers is not that
> costly and we do not commit a new crtc state that often.

We call omap_crtc_write_crtc_properties() in omap_crtc_atomic_flush(),
so that's at every page flip...

> If we later consider otherwise we can add the optimization as a separate
> patch.
Jyri Sarha Sept. 5, 2019, 1:48 p.m. | #9
On 05/09/2019 13:05, Laurent Pinchart wrote:
> Hi Jyri,
> 
> On Thu, Sep 05, 2019 at 01:00:51PM +0300, Jyri Sarha wrote:
>> On 05/09/2019 00:52, Laurent Pinchart wrote:
>>>>>>>>  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>>>>>>>  {
>>>>>>>>  	struct omap_drm_private *priv = crtc->dev->dev_private;
>>>>>>>> @@ -402,7 +428,16 @@ static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
>>>>>>>>  	info.default_color = 0x000000;
>>>>>>>>  	info.trans_enabled = false;
>>>>>>>>  	info.partial_alpha_enabled = false;
>>>>>>>> -	info.cpr_enable = false;
>>>>>>>> +
>>>>>>>> +	if (crtc->state->ctm) {
>>>>>>>> +		struct drm_color_ctm *ctm =
>>>>>>>> +			(struct drm_color_ctm *) crtc->state->ctm->data;
>>>>>>>> +
>>>>>>>> +		info.cpr_enable = true;
>>>>>>>> +		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
>>>>>>>
>>>>>>> As an optimisation it would be nice to only write the coefficients when
>>>>>>> they actually change. That could be implemented on top of this series.
>>>>>>
>>>>>> E.g. apply this ?
>>>>>>
>>>>>> - if (crtc->state->ctm)
>>>>>> + if (crtc->state->color_mgmt_changed && crtc->state->ctm)
>>>>>
>>>>> Something like that, but .mgr_setup() should then be taught not to write
>>>>> unchanged CTM tables to registers. Do you think it would be worth it ?
>>>>
>>>> Hmmm, jess I should do it like this:
>>>> if (crtc->state->color_mgmt_changed) {
>>>> 	if (crtc->state->ctm) {
>>>> ...
>>>>>>>> +	} else {
>>>>>>>> +		info.cpr_enable = false;
>>>>>>>> +	}
>>>> }
>>>>
>>>> This way the whole CPR functionality is turned off, if the there is no
>>>> CTM in the crtc state.
>>>
>>> Yes, but you would also need to update .mgr_setup() :-) A new
>>> color_mgmt_changed flag would be needed in the info structure too.
>>
>> I am starting to thing that such an "optimization" may not be worth the
>> added complexity. The arithmetic and writing three registers is not that
>> costly and we do not commit a new crtc state that often.
> 
> We call omap_crtc_write_crtc_properties() in omap_crtc_atomic_flush(),
> so that's at every page flip...
> 

Still, the mgr_setup() accesses five different registers even if we do
not touch CPR settings (another 4 registers). All of those have static
values in the mainline omapdrm (there are custom properties to control
those in ti-linux).

I would rather keep this patch as it is and implement another one with a
cached struct omap_overlay_manager_info, that calls mgr_setup() only if
the info values have changed.

With the cached values in place the unneeded conversion arithmetic can
also be skipped based on color_mgmt_changed.

BR,
Jyri

Patch

diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index 3c5ddbf30e97..d63213dd7d83 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -391,6 +391,32 @@  static void omap_crtc_manual_display_update(struct work_struct *data)
 	}
 }
 
+static s16 omap_crtc_S31_32_to_s2_8(s64 coef)
+{
+	uint64_t sign_bit = 1ULL << 63;
+	uint64_t cbits = (uint64_t) coef;
+	s16 ret = clamp_val(((cbits & ~sign_bit) >> 24), 0, 0x1FF);
+
+	if (cbits & sign_bit)
+		ret = -ret;
+
+	return ret;
+}
+
+static void omap_crtc_cpr_coefs_from_ctm(const struct drm_color_ctm *ctm,
+					 struct omap_dss_cpr_coefs *cpr)
+{
+	cpr->rr = omap_crtc_S31_32_to_s2_8(ctm->matrix[0]);
+	cpr->rg = omap_crtc_S31_32_to_s2_8(ctm->matrix[1]);
+	cpr->rb = omap_crtc_S31_32_to_s2_8(ctm->matrix[2]);
+	cpr->gr = omap_crtc_S31_32_to_s2_8(ctm->matrix[3]);
+	cpr->gg = omap_crtc_S31_32_to_s2_8(ctm->matrix[4]);
+	cpr->gb = omap_crtc_S31_32_to_s2_8(ctm->matrix[5]);
+	cpr->br = omap_crtc_S31_32_to_s2_8(ctm->matrix[6]);
+	cpr->bg = omap_crtc_S31_32_to_s2_8(ctm->matrix[7]);
+	cpr->bb = omap_crtc_S31_32_to_s2_8(ctm->matrix[8]);
+}
+
 static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
 {
 	struct omap_drm_private *priv = crtc->dev->dev_private;
@@ -402,7 +428,16 @@  static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
 	info.default_color = 0x000000;
 	info.trans_enabled = false;
 	info.partial_alpha_enabled = false;
-	info.cpr_enable = false;
+
+	if (crtc->state->ctm) {
+		struct drm_color_ctm *ctm =
+			(struct drm_color_ctm *) crtc->state->ctm->data;
+
+		info.cpr_enable = true;
+		omap_crtc_cpr_coefs_from_ctm(ctm, &info.cpr_coefs);
+	} else {
+		info.cpr_enable = false;
+	}
 
 	priv->dispc_ops->mgr_setup(priv->dispc, omap_crtc->channel, &info);
 }
@@ -836,7 +871,7 @@  struct drm_crtc *omap_crtc_init(struct drm_device *dev,
 	if (priv->dispc_ops->mgr_gamma_size(priv->dispc, channel)) {
 		unsigned int gamma_lut_size = 256;
 
-		drm_crtc_enable_color_mgmt(crtc, 0, false, gamma_lut_size);
+		drm_crtc_enable_color_mgmt(crtc, 0, true, gamma_lut_size);
 		drm_mode_crtc_set_gamma_size(crtc, gamma_lut_size);
 	}