diff mbox series

[3/5] drm/msm/dsi: Account for DSC's bits_per_pixel having 4 fractional bits

Message ID 20221001190807.358691-4-marijn.suijten@somainline.org
State New
Headers show
Series drm: Fix math issues in MSM DSC implementation | expand

Commit Message

Marijn Suijten Oct. 1, 2022, 7:08 p.m. UTC
drm_dsc_config's bits_per_pixel field holds a fractional value with 4
bits, which all panel drivers should adhere to for
drm_dsc_pps_payload_pack() to generate a valid payload.  All code in the
DSI driver here seems to assume that this field doesn't contain any
fractional bits, hence resulting in the wrong values being computed.
Since none of the calculations leave any room for fractional bits or
seem to indicate any possible area of support, disallow such values
altogether.

Fixes: b9080324d6ca ("drm/msm/dsi: add support for dsc data")
Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
---
 drivers/gpu/drm/msm/dsi/dsi_host.c | 34 +++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 8 deletions(-)

Comments

Marijn Suijten Oct. 1, 2022, 8:37 p.m. UTC | #1
Doing some self-review as these patches accrued some bit-rot while
waiting to be sent.

On 2022-10-01 21:08:05, Marijn Suijten wrote:
> drm_dsc_config's bits_per_pixel field holds a fractional value with 4
> bits, which all panel drivers should adhere to for
> drm_dsc_pps_payload_pack() to generate a valid payload.  All code in the
> DSI driver here seems to assume that this field doesn't contain any
> fractional bits, hence resulting in the wrong values being computed.
> Since none of the calculations leave any room for fractional bits or
> seem to indicate any possible area of support, disallow such values
> altogether.
> 
> Fixes: b9080324d6ca ("drm/msm/dsi: add support for dsc data")
> Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
> ---
>  drivers/gpu/drm/msm/dsi/dsi_host.c | 34 +++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
> index cb6f2fa11f58..42a5c9776f52 100644
> --- a/drivers/gpu/drm/msm/dsi/dsi_host.c
> +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
> @@ -847,6 +847,11 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
>  	u32 pkt_per_line;
>  	u32 bytes_in_slice;
>  	u32 eol_byte_num;
> +	int bpp = dsc->bits_per_pixel >> 4;

This should have been u16 instead of int.

> +
> +	if (dsc->bits_per_pixel & 0xf)

Should there be a define for this mask?

> +		/* dsi_populate_dsc_params() already caught this case */
> +		pr_err("DSI does not support fractional bits_per_pixel\n");

This file mostly uses pr_err(), but it's probably cleaner to use
DRM_DEV_ERROR(&msm_host->pdev->dev, ...) even though it's not prevalent
yet?

>  
>  	/* first calculate dsc parameters and then program
>  	 * compress mode registers
> @@ -860,7 +865,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
>  	if (slice_per_intf > dsc->slice_count)
>  		dsc->slice_count = 1;
>  
> -	bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
> +	bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * bpp, 8);
>  
>  	dsc->slice_chunk_size = bytes_in_slice;
>  
> @@ -913,6 +918,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>  	u32 va_end = va_start + mode->vdisplay;
>  	u32 hdisplay = mode->hdisplay;
>  	u32 wc;
> +	int ret;
>  
>  	DBG("");
>  
> @@ -948,7 +954,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>  		/* we do the calculations for dsc parameters here so that
>  		 * panel can use these parameters
>  		 */
> -		dsi_populate_dsc_params(dsc);
> +		ret = dsi_populate_dsc_params(dsc);
> +		if (ret)
> +			return;
>  
>  		/* Divide the display by 3 but keep back/font porch and
>  		 * pulse width same
> @@ -1229,6 +1237,10 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
>  	if (packet.size < len)
>  		memset(data + packet.size, 0xff, len - packet.size);
>  
> +	if (msg->type == MIPI_DSI_PICTURE_PARAMETER_SET)
> +		print_hex_dump(KERN_DEBUG, "ALL:", DUMP_PREFIX_NONE,
> +				16, 1, data, len, false);
> +
>  	if (cfg_hnd->ops->tx_buf_put)
>  		cfg_hnd->ops->tx_buf_put(msm_host);
>  
> @@ -1786,6 +1798,12 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>  	int data;
>  	int final_value, final_scale;
>  	int i;
> +	int bpp = dsc->bits_per_pixel >> 4;

Same u16 here.

- Marijn

> +
> +	if (dsc->bits_per_pixel & 0xf) {
> +		pr_err("DSI does not support fractional bits_per_pixel\n");
> +		return -EINVAL;
> +	}
>  
>  	dsc->rc_model_size = 8192;
>  	dsc->first_line_bpg_offset = 12;
> @@ -1807,7 +1825,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>  	}
>  
>  	dsc->initial_offset = 6144; /* Not bpp 12 */
> -	if (dsc->bits_per_pixel != 8)
> +	if (bpp != 8)
>  		dsc->initial_offset = 2048;	/* bpp = 12 */
>  
>  	mux_words_size = 48;		/* bpc == 8/10 */
> @@ -1830,16 +1848,16 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>  	 * params are calculated
>  	 */
>  	groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3);
> -	dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
> -	if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
> +	dsc->slice_chunk_size = dsc->slice_width * bpp / 8;
> +	if ((dsc->slice_width * bpp) % 8)
>  		dsc->slice_chunk_size++;
>  
>  	/* rbs-min */
>  	min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
> -				dsc->initial_xmit_delay * dsc->bits_per_pixel +
> +				dsc->initial_xmit_delay * bpp +
>  				groups_per_line * dsc->first_line_bpg_offset;
>  
> -	hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel);
> +	hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, bpp);
>  
>  	dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
>  
> @@ -1862,7 +1880,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>  	data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits);
>  	dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
>  
> -	target_bpp_x16 = dsc->bits_per_pixel * 16;
> +	target_bpp_x16 = bpp * 16;
>  
>  	data = (dsc->initial_xmit_delay * target_bpp_x16) / 16;
>  	final_value =  dsc->rc_model_size - data + num_extra_mux_bits;
> -- 
> 2.37.3
>
Dmitry Baryshkov Oct. 4, 2022, 2:45 p.m. UTC | #2
On Sat, 1 Oct 2022 at 22:08, Marijn Suijten
<marijn.suijten@somainline.org> wrote:
>
> drm_dsc_config's bits_per_pixel field holds a fractional value with 4
> bits, which all panel drivers should adhere to for
> drm_dsc_pps_payload_pack() to generate a valid payload.  All code in the
> DSI driver here seems to assume that this field doesn't contain any
> fractional bits, hence resulting in the wrong values being computed.
> Since none of the calculations leave any room for fractional bits or
> seem to indicate any possible area of support, disallow such values
> altogether.
>
> Fixes: b9080324d6ca ("drm/msm/dsi: add support for dsc data")
> Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
> ---
>  drivers/gpu/drm/msm/dsi/dsi_host.c | 34 +++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
> index cb6f2fa11f58..42a5c9776f52 100644
> --- a/drivers/gpu/drm/msm/dsi/dsi_host.c
> +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
> @@ -847,6 +847,11 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
>         u32 pkt_per_line;
>         u32 bytes_in_slice;
>         u32 eol_byte_num;
> +       int bpp = dsc->bits_per_pixel >> 4;
> +
> +       if (dsc->bits_per_pixel & 0xf)
> +               /* dsi_populate_dsc_params() already caught this case */
> +               pr_err("DSI does not support fractional bits_per_pixel\n");
>
>         /* first calculate dsc parameters and then program
>          * compress mode registers
> @@ -860,7 +865,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
>         if (slice_per_intf > dsc->slice_count)
>                 dsc->slice_count = 1;
>
> -       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
> +       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * bpp, 8);


bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8 * 16); ?

>
>         dsc->slice_chunk_size = bytes_in_slice;
>
> @@ -913,6 +918,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>         u32 va_end = va_start + mode->vdisplay;
>         u32 hdisplay = mode->hdisplay;
>         u32 wc;
> +       int ret;
>
>         DBG("");
>
> @@ -948,7 +954,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>                 /* we do the calculations for dsc parameters here so that
>                  * panel can use these parameters
>                  */
> -               dsi_populate_dsc_params(dsc);
> +               ret = dsi_populate_dsc_params(dsc);
> +               if (ret)
> +                       return;
>
>                 /* Divide the display by 3 but keep back/font porch and
>                  * pulse width same
> @@ -1229,6 +1237,10 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
>         if (packet.size < len)
>                 memset(data + packet.size, 0xff, len - packet.size);
>
> +       if (msg->type == MIPI_DSI_PICTURE_PARAMETER_SET)
> +               print_hex_dump(KERN_DEBUG, "ALL:", DUMP_PREFIX_NONE,
> +                               16, 1, data, len, false);
> +
>         if (cfg_hnd->ops->tx_buf_put)
>                 cfg_hnd->ops->tx_buf_put(msm_host);
>
> @@ -1786,6 +1798,12 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>         int data;
>         int final_value, final_scale;
>         int i;
> +       int bpp = dsc->bits_per_pixel >> 4;
> +
> +       if (dsc->bits_per_pixel & 0xf) {
> +               pr_err("DSI does not support fractional bits_per_pixel\n");
> +               return -EINVAL;
> +       }
>
>         dsc->rc_model_size = 8192;
>         dsc->first_line_bpg_offset = 12;
> @@ -1807,7 +1825,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>         }
>
>         dsc->initial_offset = 6144; /* Not bpp 12 */
> -       if (dsc->bits_per_pixel != 8)
> +       if (bpp != 8)
>                 dsc->initial_offset = 2048;     /* bpp = 12 */
>
>         mux_words_size = 48;            /* bpc == 8/10 */
> @@ -1830,16 +1848,16 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>          * params are calculated
>          */
>         groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3);
> -       dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
> -       if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
> +       dsc->slice_chunk_size = dsc->slice_width * bpp / 8;
> +       if ((dsc->slice_width * bpp) % 8)

One can use fixed point math here too:

dsc->slice_chunk_size = (dsc->slice_width * dsc->bits_per_pixel  + 8 *
16 - 1)/ (8 * 16);

>                 dsc->slice_chunk_size++;
>
>         /* rbs-min */
>         min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
> -                               dsc->initial_xmit_delay * dsc->bits_per_pixel +
> +                               dsc->initial_xmit_delay * bpp +
>                                 groups_per_line * dsc->first_line_bpg_offset;
>
> -       hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel);
> +       hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, bpp);
>
>         dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
>
> @@ -1862,7 +1880,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>         data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits);
>         dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
>
> -       target_bpp_x16 = dsc->bits_per_pixel * 16;
> +       target_bpp_x16 = bpp * 16;
>
>         data = (dsc->initial_xmit_delay * target_bpp_x16) / 16;

It looks like this can be replaced with the direct multiplication
instead, maybe with support for overflow/rounding.

>         final_value =  dsc->rc_model_size - data + num_extra_mux_bits;
> --
> 2.37.3
>
Marijn Suijten Oct. 4, 2022, 10:35 p.m. UTC | #3
On 2022-10-04 17:45:50, Dmitry Baryshkov wrote:
> On Sat, 1 Oct 2022 at 22:08, Marijn Suijten
> <marijn.suijten@somainline.org> wrote:
> [..]
> > -       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
> > +       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * bpp, 8);
> 
> 
> bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8 * 16); ?

Not necessarily a fan of this, it "hides" the fact that we are dealing
with 4 fractional bits (1/16th precision, it is correct though); but
since this is the only use of `bpp` I can change it and document this
fact wiht a comment on top (including referencing the validation pointed
out in dsi_populate_dsc_params()).

Alternatively we can inline the `>> 4` here?

> >
> >         dsc->slice_chunk_size = bytes_in_slice;
> >
> > @@ -913,6 +918,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
> >         u32 va_end = va_start + mode->vdisplay;
> >         u32 hdisplay = mode->hdisplay;
> >         u32 wc;
> > +       int ret;
> >
> >         DBG("");
> >
> > @@ -948,7 +954,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
> >                 /* we do the calculations for dsc parameters here so that
> >                  * panel can use these parameters
> >                  */
> > -               dsi_populate_dsc_params(dsc);
> > +               ret = dsi_populate_dsc_params(dsc);
> > +               if (ret)
> > +                       return;
> >
> >                 /* Divide the display by 3 but keep back/font porch and
> >                  * pulse width same
> > @@ -1229,6 +1237,10 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
> >         if (packet.size < len)
> >                 memset(data + packet.size, 0xff, len - packet.size);
> >
> > +       if (msg->type == MIPI_DSI_PICTURE_PARAMETER_SET)
> > +               print_hex_dump(KERN_DEBUG, "ALL:", DUMP_PREFIX_NONE,
> > +                               16, 1, data, len, false);
> > +
> >         if (cfg_hnd->ops->tx_buf_put)
> >                 cfg_hnd->ops->tx_buf_put(msm_host);
> >
> > @@ -1786,6 +1798,12 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
> >         int data;
> >         int final_value, final_scale;
> >         int i;
> > +       int bpp = dsc->bits_per_pixel >> 4;
> > +
> > +       if (dsc->bits_per_pixel & 0xf) {
> > +               pr_err("DSI does not support fractional bits_per_pixel\n");
> > +               return -EINVAL;
> > +       }
> >
> >         dsc->rc_model_size = 8192;
> >         dsc->first_line_bpg_offset = 12;
> > @@ -1807,7 +1825,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
> >         }
> >
> >         dsc->initial_offset = 6144; /* Not bpp 12 */
> > -       if (dsc->bits_per_pixel != 8)
> > +       if (bpp != 8)
> >                 dsc->initial_offset = 2048;     /* bpp = 12 */
> >
> >         mux_words_size = 48;            /* bpc == 8/10 */
> > @@ -1830,16 +1848,16 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
> >          * params are calculated
> >          */
> >         groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3);
> > -       dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
> > -       if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
> > +       dsc->slice_chunk_size = dsc->slice_width * bpp / 8;
> > +       if ((dsc->slice_width * bpp) % 8)
> 
> One can use fixed point math here too:
> 
> dsc->slice_chunk_size = (dsc->slice_width * dsc->bits_per_pixel  + 8 *
> 16 - 1)/ (8 * 16);

Good catch, this is effectively a DIV_ROUND_UP() that we happened to
call bytes_in_slice above...

Shall I tackle this in the same patch, or insert another cleanup patch?

In fact dsi_populate_dsc_params() is called first (this comment),
followed by dsi_update_dsc_timing(), meaning that slice_chunk_size is
already provided and shouldn't be recomputed.

> >                 dsc->slice_chunk_size++;
> >
> >         /* rbs-min */
> >         min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
> > -                               dsc->initial_xmit_delay * dsc->bits_per_pixel +
> > +                               dsc->initial_xmit_delay * bpp +
> >                                 groups_per_line * dsc->first_line_bpg_offset;
> >
> > -       hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel);
> > +       hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, bpp);
> >
> >         dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
> >
> > @@ -1862,7 +1880,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
> >         data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits);
> >         dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
> >
> > -       target_bpp_x16 = dsc->bits_per_pixel * 16;
> > +       target_bpp_x16 = bpp * 16;
> >
> >         data = (dsc->initial_xmit_delay * target_bpp_x16) / 16;
> 
> It looks like this can be replaced with the direct multiplication
> instead, maybe with support for overflow/rounding.

Thanks, Abhinav pointed out the same in patch 1/5 which originally
cleaned up most - but apparently not all! - of the math here.  I don't
think this value should ever overlow, nor does this `* 16 / 16` have any
effect on rounding (that'd be `/ 16 * 16`).

> >         final_value =  dsc->rc_model_size - data + num_extra_mux_bits;
> > --
> > 2.37.3
> >
> 
> 
> -- 
> With best wishes
> Dmitry
Dmitry Baryshkov Oct. 4, 2022, 10:40 p.m. UTC | #4
On 05/10/2022 01:35, Marijn Suijten wrote:
> On 2022-10-04 17:45:50, Dmitry Baryshkov wrote:
>> On Sat, 1 Oct 2022 at 22:08, Marijn Suijten
>> <marijn.suijten@somainline.org> wrote:
>> [..]
>>> -       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
>>> +       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * bpp, 8);
>>
>>
>> bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8 * 16); ?
> 
> Not necessarily a fan of this, it "hides" the fact that we are dealing
> with 4 fractional bits (1/16th precision, it is correct though); but
> since this is the only use of `bpp` I can change it and document this
> fact wiht a comment on top (including referencing the validation pointed
> out in dsi_populate_dsc_params()).
> 
> Alternatively we can inline the `>> 4` here?

No, I don't think so. If we shift by 4 bits, we'd loose the fractional 
part. DIV_ROUND_UP( .... , 8 * 16) ensures that we round it up rather 
than just dropping it.

> 
>>>
>>>          dsc->slice_chunk_size = bytes_in_slice;
>>>
>>> @@ -913,6 +918,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>>          u32 va_end = va_start + mode->vdisplay;
>>>          u32 hdisplay = mode->hdisplay;
>>>          u32 wc;
>>> +       int ret;
>>>
>>>          DBG("");
>>>
>>> @@ -948,7 +954,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>>                  /* we do the calculations for dsc parameters here so that
>>>                   * panel can use these parameters
>>>                   */
>>> -               dsi_populate_dsc_params(dsc);
>>> +               ret = dsi_populate_dsc_params(dsc);
>>> +               if (ret)
>>> +                       return;
>>>
>>>                  /* Divide the display by 3 but keep back/font porch and
>>>                   * pulse width same
>>> @@ -1229,6 +1237,10 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
>>>          if (packet.size < len)
>>>                  memset(data + packet.size, 0xff, len - packet.size);
>>>
>>> +       if (msg->type == MIPI_DSI_PICTURE_PARAMETER_SET)
>>> +               print_hex_dump(KERN_DEBUG, "ALL:", DUMP_PREFIX_NONE,
>>> +                               16, 1, data, len, false);
>>> +
>>>          if (cfg_hnd->ops->tx_buf_put)
>>>                  cfg_hnd->ops->tx_buf_put(msm_host);
>>>
>>> @@ -1786,6 +1798,12 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>>>          int data;
>>>          int final_value, final_scale;
>>>          int i;
>>> +       int bpp = dsc->bits_per_pixel >> 4;
>>> +
>>> +       if (dsc->bits_per_pixel & 0xf) {
>>> +               pr_err("DSI does not support fractional bits_per_pixel\n");
>>> +               return -EINVAL;
>>> +       }
>>>
>>>          dsc->rc_model_size = 8192;
>>>          dsc->first_line_bpg_offset = 12;
>>> @@ -1807,7 +1825,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>>>          }
>>>
>>>          dsc->initial_offset = 6144; /* Not bpp 12 */
>>> -       if (dsc->bits_per_pixel != 8)
>>> +       if (bpp != 8)
>>>                  dsc->initial_offset = 2048;     /* bpp = 12 */
>>>
>>>          mux_words_size = 48;            /* bpc == 8/10 */
>>> @@ -1830,16 +1848,16 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>>>           * params are calculated
>>>           */
>>>          groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3);
>>> -       dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
>>> -       if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
>>> +       dsc->slice_chunk_size = dsc->slice_width * bpp / 8;
>>> +       if ((dsc->slice_width * bpp) % 8)
>>
>> One can use fixed point math here too:
>>
>> dsc->slice_chunk_size = (dsc->slice_width * dsc->bits_per_pixel  + 8 *
>> 16 - 1)/ (8 * 16);
> 
> Good catch, this is effectively a DIV_ROUND_UP() that we happened to
> call bytes_in_slice above...
> 
> Shall I tackle this in the same patch, or insert another cleanup patch?

It's up to you. I usually prefer separate patches, even if just to ease 
bisecting between unrelated changes.

> 
> In fact dsi_populate_dsc_params() is called first (this comment),
> followed by dsi_update_dsc_timing(), meaning that slice_chunk_size is
> already provided and shouldn't be recomputed.
> 
>>>                  dsc->slice_chunk_size++;
>>>
>>>          /* rbs-min */
>>>          min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
>>> -                               dsc->initial_xmit_delay * dsc->bits_per_pixel +
>>> +                               dsc->initial_xmit_delay * bpp +
>>>                                  groups_per_line * dsc->first_line_bpg_offset;
>>>
>>> -       hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel);
>>> +       hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, bpp);
>>>
>>>          dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
>>>
>>> @@ -1862,7 +1880,7 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
>>>          data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits);
>>>          dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
>>>
>>> -       target_bpp_x16 = dsc->bits_per_pixel * 16;
>>> +       target_bpp_x16 = bpp * 16;
>>>
>>>          data = (dsc->initial_xmit_delay * target_bpp_x16) / 16;
>>
>> It looks like this can be replaced with the direct multiplication
>> instead, maybe with support for overflow/rounding.
> 
> Thanks, Abhinav pointed out the same in patch 1/5 which originally
> cleaned up most - but apparently not all! - of the math here.  I don't
> think this value should ever overlow, nor does this `* 16 / 16` have any
> effect on rounding (that'd be `/ 16 * 16`).

Ack

> 
>>>          final_value =  dsc->rc_model_size - data + num_extra_mux_bits;
>>> --
>>> 2.37.3
>>>
>>
>>
>> -- 
>> With best wishes
>> Dmitry
Marijn Suijten Oct. 4, 2022, 10:56 p.m. UTC | #5
On 2022-10-05 01:40:12, Dmitry Baryshkov wrote:
> On 05/10/2022 01:35, Marijn Suijten wrote:
> > On 2022-10-04 17:45:50, Dmitry Baryshkov wrote:
> >> On Sat, 1 Oct 2022 at 22:08, Marijn Suijten
> >> <marijn.suijten@somainline.org> wrote:
> >> [..]
> >>> -       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
> >>> +       bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * bpp, 8);
> >>
> >>
> >> bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8 * 16); ?
> > 
> > Not necessarily a fan of this, it "hides" the fact that we are dealing
> > with 4 fractional bits (1/16th precision, it is correct though); but
> > since this is the only use of `bpp` I can change it and document this
> > fact wiht a comment on top (including referencing the validation pointed
> > out in dsi_populate_dsc_params()).
> > 
> > Alternatively we can inline the `>> 4` here?
> 
> No, I don't think so. If we shift by 4 bits, we'd loose the fractional 
> part. DIV_ROUND_UP( .... , 8 * 16) ensures that we round it up rather 
> than just dropping it.

I'd still keep the `-EINVAL` on `if (dsc->bits_per_pixel & 0xf)` to
guarantee that there is no fractional part.
After all, as explained in the patch description, none of this code /
the DSI driver in general seems to be able to handle fractional bits per
pixel.

> >>> [..]
> >>> -       dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
> >>> -       if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
> >>> +       dsc->slice_chunk_size = dsc->slice_width * bpp / 8;
> >>> +       if ((dsc->slice_width * bpp) % 8)
> >>
> >> One can use fixed point math here too:
> >>
> >> dsc->slice_chunk_size = (dsc->slice_width * dsc->bits_per_pixel  + 8 *
> >> 16 - 1)/ (8 * 16);
> > 
> > Good catch, this is effectively a DIV_ROUND_UP() that we happened to
> > call bytes_in_slice above...
> > 
> > Shall I tackle this in the same patch, or insert another cleanup patch?
> 
> It's up to you. I usually prefer separate patches, even if just to ease 
> bisecting between unrelated changes.

Same feeling here, and have already set it up that way; added two extra
patches to 1. replace this with DIV_ROUND_UP() and 2. remove the
recalculation of slice_chunk_size (disguised as bytes_in_slice) above.

- Marijn
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index cb6f2fa11f58..42a5c9776f52 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -847,6 +847,11 @@  static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
 	u32 pkt_per_line;
 	u32 bytes_in_slice;
 	u32 eol_byte_num;
+	int bpp = dsc->bits_per_pixel >> 4;
+
+	if (dsc->bits_per_pixel & 0xf)
+		/* dsi_populate_dsc_params() already caught this case */
+		pr_err("DSI does not support fractional bits_per_pixel\n");
 
 	/* first calculate dsc parameters and then program
 	 * compress mode registers
@@ -860,7 +865,7 @@  static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
 	if (slice_per_intf > dsc->slice_count)
 		dsc->slice_count = 1;
 
-	bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
+	bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * bpp, 8);
 
 	dsc->slice_chunk_size = bytes_in_slice;
 
@@ -913,6 +918,7 @@  static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 	u32 va_end = va_start + mode->vdisplay;
 	u32 hdisplay = mode->hdisplay;
 	u32 wc;
+	int ret;
 
 	DBG("");
 
@@ -948,7 +954,9 @@  static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 		/* we do the calculations for dsc parameters here so that
 		 * panel can use these parameters
 		 */
-		dsi_populate_dsc_params(dsc);
+		ret = dsi_populate_dsc_params(dsc);
+		if (ret)
+			return;
 
 		/* Divide the display by 3 but keep back/font porch and
 		 * pulse width same
@@ -1229,6 +1237,10 @@  static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
 	if (packet.size < len)
 		memset(data + packet.size, 0xff, len - packet.size);
 
+	if (msg->type == MIPI_DSI_PICTURE_PARAMETER_SET)
+		print_hex_dump(KERN_DEBUG, "ALL:", DUMP_PREFIX_NONE,
+				16, 1, data, len, false);
+
 	if (cfg_hnd->ops->tx_buf_put)
 		cfg_hnd->ops->tx_buf_put(msm_host);
 
@@ -1786,6 +1798,12 @@  static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
 	int data;
 	int final_value, final_scale;
 	int i;
+	int bpp = dsc->bits_per_pixel >> 4;
+
+	if (dsc->bits_per_pixel & 0xf) {
+		pr_err("DSI does not support fractional bits_per_pixel\n");
+		return -EINVAL;
+	}
 
 	dsc->rc_model_size = 8192;
 	dsc->first_line_bpg_offset = 12;
@@ -1807,7 +1825,7 @@  static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
 	}
 
 	dsc->initial_offset = 6144; /* Not bpp 12 */
-	if (dsc->bits_per_pixel != 8)
+	if (bpp != 8)
 		dsc->initial_offset = 2048;	/* bpp = 12 */
 
 	mux_words_size = 48;		/* bpc == 8/10 */
@@ -1830,16 +1848,16 @@  static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
 	 * params are calculated
 	 */
 	groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3);
-	dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
-	if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
+	dsc->slice_chunk_size = dsc->slice_width * bpp / 8;
+	if ((dsc->slice_width * bpp) % 8)
 		dsc->slice_chunk_size++;
 
 	/* rbs-min */
 	min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
-				dsc->initial_xmit_delay * dsc->bits_per_pixel +
+				dsc->initial_xmit_delay * bpp +
 				groups_per_line * dsc->first_line_bpg_offset;
 
-	hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel);
+	hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, bpp);
 
 	dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
 
@@ -1862,7 +1880,7 @@  static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
 	data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits);
 	dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
 
-	target_bpp_x16 = dsc->bits_per_pixel * 16;
+	target_bpp_x16 = bpp * 16;
 
 	data = (dsc->initial_xmit_delay * target_bpp_x16) / 16;
 	final_value =  dsc->rc_model_size - data + num_extra_mux_bits;