diff mbox series

[2/2] media: v4l2-mem2mem: add a list for buf used by hw

Message ID 20230704040044.681850-3-randy.li@synaptics.com
State New
Headers show
Series Improve V4L2 M2M job scheduler | expand

Commit Message

Hsia-Jun Li July 4, 2023, 4 a.m. UTC
From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>

Many drivers have to create its own buf_struct for a
vb2_queue to track such a state. Also driver has to
iterate over rdy_queue every times to find out a buffer
which is not sent to hardware(or firmware), this new
list just offers the driver a place to store the buffer
that hardware(firmware) has acknowledged.

One important advance about this list, it doesn't like
rdy_queue which both bottom half of the user calling
could operate it, while the v4l2 worker would as well.
The v4l2 core could only operate this queue when its
v4l2_context is not running, the driver would only
access this new hw_queue in its own worker.

Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
---
 drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
 include/media/v4l2-mem2mem.h           | 10 +++++++++-
 2 files changed, 26 insertions(+), 9 deletions(-)

Comments

Nicolas Dufresne July 7, 2023, 7:20 p.m. UTC | #1
Le mardi 04 juillet 2023 à 12:00 +0800, Hsia-Jun Li a écrit :
> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> 
> Many drivers have to create its own buf_struct for a
> vb2_queue to track such a state. Also driver has to
> iterate over rdy_queue every times to find out a buffer
> which is not sent to hardware(or firmware), this new
> list just offers the driver a place to store the buffer
> that hardware(firmware) has acknowledged.
> 
> One important advance about this list, it doesn't like
> rdy_queue which both bottom half of the user calling
> could operate it, while the v4l2 worker would as well.
> The v4l2 core could only operate this queue when its
> v4l2_context is not running, the driver would only
> access this new hw_queue in its own worker.

That's an interesting proposal. I didn't like leaving decoded frames into the
rdy_queue, but removing them required me to have my own list, so that I can
clean it up if some buffers are never displayed.

We'll see if we can use this into wave5.

> 
> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> ---
>  drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
>  include/media/v4l2-mem2mem.h           | 10 +++++++++-
>  2 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> index c771aba42015..b4151147d5bd 100644
> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
>  		goto job_unlock;
>  	}
>  
> -	src = v4l2_m2m_next_src_buf(m2m_ctx);
> -	dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> -	if (!src && !m2m_ctx->out_q_ctx.buffered) {
> -		dprintk("No input buffers available\n");
> -		goto job_unlock;
> +	if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> +		src = v4l2_m2m_next_src_buf(m2m_ctx);
> +
> +		if (!src && !m2m_ctx->out_q_ctx.buffered) {
> +			dprintk("No input buffers available\n");
> +			goto job_unlock;
> +		}
>  	}
> -	if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> -		dprintk("No output buffers available\n");
> -		goto job_unlock;
> +
> +	if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> +		dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> +		if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> +			dprintk("No output buffers available\n");
> +			goto job_unlock;
> +		}
>  	}
>  
>  	m2m_ctx->new_frame = true;
> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
>  	INIT_LIST_HEAD(&q_ctx->rdy_queue);
>  	q_ctx->num_rdy = 0;
>  	spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> +	INIT_LIST_HEAD(&q_ctx->hw_queue);
>  
>  	if (m2m_dev->curr_ctx == m2m_ctx) {
>  		m2m_dev->curr_ctx = NULL;
> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
>  
>  	INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
>  	INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> +	INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> +	INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
>  	spin_lock_init(&out_q_ctx->rdy_spinlock);
>  	spin_lock_init(&cap_q_ctx->rdy_spinlock);
>  
> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> index d6c8eb2b5201..2342656e582d 100644
> --- a/include/media/v4l2-mem2mem.h
> +++ b/include/media/v4l2-mem2mem.h
> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
>   *	processed
>   *
>   * @q:		pointer to struct &vb2_queue
> - * @rdy_queue:	List of V4L2 mem-to-mem queues
> + * @rdy_queue:	List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> + *		called in struct vb2_ops->buf_queue(), the buffer enqueued
> + *		by user would be added to this list.
>   * @rdy_spinlock: spin lock to protect the struct usage
>   * @num_rdy:	number of buffers ready to be processed
> + * @hw_queue:	A list for tracking the buffer is occupied by the hardware
> + * 		(or device's firmware). A buffer could only be in either
> + * 		this list or @rdy_queue.
> + * 		Driver may choose not to use this list while uses its own
> + * 		private data to do this work.

What's the threading protection around this one ? Also, would it be possible to
opt-in that the driver cleanup that list automatically after streamoff has been
executed ?

One thing the doc is missing, is that HW buffer are actually flagged as ACTIVE
buffer vb2, there is a strong link between the two concept, and the doc should
take care.

>   * @buffered:	is the queue buffered?
>   *
>   * Queue for buffers ready to be processed as soon as this
> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
>  	struct list_head	rdy_queue;
>  	spinlock_t		rdy_spinlock;
>  	u8			num_rdy;
> +	struct list_head	hw_queue;
>  	bool			buffered;
>  };
>
Hsia-Jun Li July 17, 2023, 7:14 a.m. UTC | #2
On 7/12/23 17:33, Tomasz Figa wrote:
> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
>
>
> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
>>
>> Many drivers have to create its own buf_struct for a
>> vb2_queue to track such a state. Also driver has to
>> iterate over rdy_queue every times to find out a buffer
>> which is not sent to hardware(or firmware), this new
>> list just offers the driver a place to store the buffer
>> that hardware(firmware) has acknowledged.
>>
>> One important advance about this list, it doesn't like
>> rdy_queue which both bottom half of the user calling
>> could operate it, while the v4l2 worker would as well.
>> The v4l2 core could only operate this queue when its
>> v4l2_context is not running, the driver would only
>> access this new hw_queue in its own worker.
> Could you describe in what case such a list would be useful for a
> mem2mem driver?

This list, as its description, just for saving us from creating a 
private buffer struct to track buffer state.

The queue in the kernel is not the queue that hardware(codec firmware) 
are using.


>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
>> ---
>>   drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
>>   include/media/v4l2-mem2mem.h           | 10 +++++++++-
>>   2 files changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
>> index c771aba42015..b4151147d5bd 100644
>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
>>                goto job_unlock;
>>        }
>>
>> -     src = v4l2_m2m_next_src_buf(m2m_ctx);
>> -     dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>> -     if (!src && !m2m_ctx->out_q_ctx.buffered) {
>> -             dprintk("No input buffers available\n");
>> -             goto job_unlock;
>> +     if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
>> +             src = v4l2_m2m_next_src_buf(m2m_ctx);
>> +
>> +             if (!src && !m2m_ctx->out_q_ctx.buffered) {
>> +                     dprintk("No input buffers available\n");
>> +                     goto job_unlock;
>> +             }
>>        }
>> -     if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>> -             dprintk("No output buffers available\n");
>> -             goto job_unlock;
>> +
>> +     if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
>> +             dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>> +             if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>> +                     dprintk("No output buffers available\n");
>> +                     goto job_unlock;
>> +             }
>>        }
> src and dst would be referenced unitialized below if neither of the
> above ifs hits...
I think they have been initialized at v4l2_m2m_ctx_init()
>
> Best regards,
> Tomasz
>
>>        m2m_ctx->new_frame = true;
>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
>>        INIT_LIST_HEAD(&q_ctx->rdy_queue);
>>        q_ctx->num_rdy = 0;
>>        spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
>> +     INIT_LIST_HEAD(&q_ctx->hw_queue);
>>
>>        if (m2m_dev->curr_ctx == m2m_ctx) {
>>                m2m_dev->curr_ctx = NULL;
>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
>>
>>        INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
>>        INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
>> +     INIT_LIST_HEAD(&out_q_ctx->hw_queue);
>> +     INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
>>        spin_lock_init(&out_q_ctx->rdy_spinlock);
>>        spin_lock_init(&cap_q_ctx->rdy_spinlock);
>>
>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
>> index d6c8eb2b5201..2342656e582d 100644
>> --- a/include/media/v4l2-mem2mem.h
>> +++ b/include/media/v4l2-mem2mem.h
>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
>>    *   processed
>>    *
>>    * @q:               pointer to struct &vb2_queue
>> - * @rdy_queue:       List of V4L2 mem-to-mem queues
>> + * @rdy_queue:       List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
>> + *           called in struct vb2_ops->buf_queue(), the buffer enqueued
>> + *           by user would be added to this list.
>>    * @rdy_spinlock: spin lock to protect the struct usage
>>    * @num_rdy: number of buffers ready to be processed
>> + * @hw_queue:        A list for tracking the buffer is occupied by the hardware
>> + *           (or device's firmware). A buffer could only be in either
>> + *           this list or @rdy_queue.
>> + *           Driver may choose not to use this list while uses its own
>> + *           private data to do this work.
>>    * @buffered:        is the queue buffered?
>>    *
>>    * Queue for buffers ready to be processed as soon as this
>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
>>        struct list_head        rdy_queue;
>>        spinlock_t              rdy_spinlock;
>>        u8                      num_rdy;
>> +     struct list_head        hw_queue;
>>        bool                    buffered;
>>   };
>>
>> --
>> 2.17.1
>>
Nicolas Dufresne July 17, 2023, 2:07 p.m. UTC | #3
Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> > From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> > 
> > Many drivers have to create its own buf_struct for a
> > vb2_queue to track such a state. Also driver has to
> > iterate over rdy_queue every times to find out a buffer
> > which is not sent to hardware(or firmware), this new
> > list just offers the driver a place to store the buffer
> > that hardware(firmware) has acknowledged.
> > 
> > One important advance about this list, it doesn't like
> > rdy_queue which both bottom half of the user calling
> > could operate it, while the v4l2 worker would as well.
> > The v4l2 core could only operate this queue when its
> > v4l2_context is not running, the driver would only
> > access this new hw_queue in its own worker.
> 
> Could you describe in what case such a list would be useful for a
> mem2mem driver?

Today all driver must track buffers that are "owned by the hardware". This is a
concept dictated by the m2m framework and enforced through the ACTIVE flag. All
buffers from this list must be mark as done/error/queued after streamoff of the
respective queue in order to acknowledge that they are no longer in use by the
HW. Not doing so will warn:

  videobuf2_common: driver bug: stop_streaming operation is leaving buf ...

Though, there is no queue to easily iterate them. All driver endup having their
own queue, or just leaving the buffers in the rdy_queue (which isn't better).

Nicolas
> 
> > 
> > Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> > ---
> >  drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> >  include/media/v4l2-mem2mem.h           | 10 +++++++++-
> >  2 files changed, 26 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > index c771aba42015..b4151147d5bd 100644
> > --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> > +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> >  		goto job_unlock;
> >  	}
> >  
> > -	src = v4l2_m2m_next_src_buf(m2m_ctx);
> > -	dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > -	if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > -		dprintk("No input buffers available\n");
> > -		goto job_unlock;
> > +	if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> > +		src = v4l2_m2m_next_src_buf(m2m_ctx);
> > +
> > +		if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > +			dprintk("No input buffers available\n");
> > +			goto job_unlock;
> > +		}
> >  	}
> > -	if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > -		dprintk("No output buffers available\n");
> > -		goto job_unlock;
> > +
> > +	if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> > +		dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > +		if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > +			dprintk("No output buffers available\n");
> > +			goto job_unlock;
> > +		}
> >  	}
> 
> src and dst would be referenced unitialized below if neither of the
> above ifs hits...
> 
> Best regards,
> Tomasz
> 
> >  
> >  	m2m_ctx->new_frame = true;
> > @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> >  	INIT_LIST_HEAD(&q_ctx->rdy_queue);
> >  	q_ctx->num_rdy = 0;
> >  	spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> > +	INIT_LIST_HEAD(&q_ctx->hw_queue);
> >  
> >  	if (m2m_dev->curr_ctx == m2m_ctx) {
> >  		m2m_dev->curr_ctx = NULL;
> > @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> >  
> >  	INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> >  	INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> > +	INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> > +	INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> >  	spin_lock_init(&out_q_ctx->rdy_spinlock);
> >  	spin_lock_init(&cap_q_ctx->rdy_spinlock);
> >  
> > diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> > index d6c8eb2b5201..2342656e582d 100644
> > --- a/include/media/v4l2-mem2mem.h
> > +++ b/include/media/v4l2-mem2mem.h
> > @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> >   *	processed
> >   *
> >   * @q:		pointer to struct &vb2_queue
> > - * @rdy_queue:	List of V4L2 mem-to-mem queues
> > + * @rdy_queue:	List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> > + *		called in struct vb2_ops->buf_queue(), the buffer enqueued
> > + *		by user would be added to this list.
> >   * @rdy_spinlock: spin lock to protect the struct usage
> >   * @num_rdy:	number of buffers ready to be processed
> > + * @hw_queue:	A list for tracking the buffer is occupied by the hardware
> > + * 		(or device's firmware). A buffer could only be in either
> > + * 		this list or @rdy_queue.
> > + * 		Driver may choose not to use this list while uses its own
> > + * 		private data to do this work.
> >   * @buffered:	is the queue buffered?
> >   *
> >   * Queue for buffers ready to be processed as soon as this
> > @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> >  	struct list_head	rdy_queue;
> >  	spinlock_t		rdy_spinlock;
> >  	u8			num_rdy;
> > +	struct list_head	hw_queue;
> >  	bool			buffered;
> >  };
> >  
> > -- 
> > 2.17.1
> >
Hsia-Jun Li July 18, 2023, 3:53 a.m. UTC | #4
On 7/17/23 22:07, Nicolas Dufresne wrote:
> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
>
>
> Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
>> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
>>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
>>>
>>> Many drivers have to create its own buf_struct for a
>>> vb2_queue to track such a state. Also driver has to
>>> iterate over rdy_queue every times to find out a buffer
>>> which is not sent to hardware(or firmware), this new
>>> list just offers the driver a place to store the buffer
>>> that hardware(firmware) has acknowledged.
>>>
>>> One important advance about this list, it doesn't like
>>> rdy_queue which both bottom half of the user calling
>>> could operate it, while the v4l2 worker would as well.
>>> The v4l2 core could only operate this queue when its
>>> v4l2_context is not running, the driver would only
>>> access this new hw_queue in its own worker.
>> Could you describe in what case such a list would be useful for a
>> mem2mem driver?
> Today all driver must track buffers that are "owned by the hardware". This is a
> concept dictated by the m2m framework and enforced through the ACTIVE flag.

I think that flag is confusing, the m2m framework would only set this 
flag when a buffer is enqueue into v4l2 (__enqueue_in_driver()).

The application can't know whether the driver(or hardware) would still 
use it when that buffer is dequeued(__vb2_dqbuf() would override the 
state here).

I was trying to suggest a flag for the new DELETE_BUF ioctl() or it 
could delete a buffer which the hardware could still use in the future, 
if we are not in the case for non-secure stateless codec.

> All
> buffers from this list must be mark as done/error/queued after streamoff of the
> respective queue in order to acknowledge that they are no longer in use by the
> HW. Not doing so will warn:
>
>    videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
>
> Though, there is no queue to easily iterate them. All driver endup having their
> own queue, or just leaving the buffers in the rdy_queue (which isn't better).
>
> Nicolas
>>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
>>> ---
>>>   drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
>>>   include/media/v4l2-mem2mem.h           | 10 +++++++++-
>>>   2 files changed, 26 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>> index c771aba42015..b4151147d5bd 100644
>>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
>>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
>>>              goto job_unlock;
>>>      }
>>>
>>> -   src = v4l2_m2m_next_src_buf(m2m_ctx);
>>> -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>> -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>> -           dprintk("No input buffers available\n");
>>> -           goto job_unlock;
>>> +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
>>> +           src = v4l2_m2m_next_src_buf(m2m_ctx);
>>> +
>>> +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>> +                   dprintk("No input buffers available\n");
>>> +                   goto job_unlock;
>>> +           }
>>>      }
>>> -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>> -           dprintk("No output buffers available\n");
>>> -           goto job_unlock;
>>> +
>>> +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
>>> +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>> +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>> +                   dprintk("No output buffers available\n");
>>> +                   goto job_unlock;
>>> +           }
>>>      }
>> src and dst would be referenced unitialized below if neither of the
>> above ifs hits...
>>
>> Best regards,
>> Tomasz
>>
>>>      m2m_ctx->new_frame = true;
>>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
>>>      INIT_LIST_HEAD(&q_ctx->rdy_queue);
>>>      q_ctx->num_rdy = 0;
>>>      spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
>>> +   INIT_LIST_HEAD(&q_ctx->hw_queue);
>>>
>>>      if (m2m_dev->curr_ctx == m2m_ctx) {
>>>              m2m_dev->curr_ctx = NULL;
>>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
>>>
>>>      INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
>>>      INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
>>> +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
>>> +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
>>>      spin_lock_init(&out_q_ctx->rdy_spinlock);
>>>      spin_lock_init(&cap_q_ctx->rdy_spinlock);
>>>
>>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
>>> index d6c8eb2b5201..2342656e582d 100644
>>> --- a/include/media/v4l2-mem2mem.h
>>> +++ b/include/media/v4l2-mem2mem.h
>>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
>>>    * processed
>>>    *
>>>    * @q:             pointer to struct &vb2_queue
>>> - * @rdy_queue:     List of V4L2 mem-to-mem queues
>>> + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
>>> + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
>>> + *         by user would be added to this list.
>>>    * @rdy_spinlock: spin lock to protect the struct usage
>>>    * @num_rdy:       number of buffers ready to be processed
>>> + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
>>> + *                 (or device's firmware). A buffer could only be in either
>>> + *                 this list or @rdy_queue.
>>> + *                 Driver may choose not to use this list while uses its own
>>> + *                 private data to do this work.
>>>    * @buffered:      is the queue buffered?
>>>    *
>>>    * Queue for buffers ready to be processed as soon as this
>>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
>>>      struct list_head        rdy_queue;
>>>      spinlock_t              rdy_spinlock;
>>>      u8                      num_rdy;
>>> +   struct list_head        hw_queue;
>>>      bool                    buffered;
>>>   };
>>>
>>> --
>>> 2.17.1
>>>
Tomasz Figa July 27, 2023, 7:25 a.m. UTC | #5
On Mon, Jul 17, 2023 at 4:15 PM Hsia-Jun Li <Randy.Li@synaptics.com> wrote:
>
>
> On 7/12/23 17:33, Tomasz Figa wrote:
> > CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> >
> >
> > On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> >> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> >>
> >> Many drivers have to create its own buf_struct for a
> >> vb2_queue to track such a state. Also driver has to
> >> iterate over rdy_queue every times to find out a buffer
> >> which is not sent to hardware(or firmware), this new
> >> list just offers the driver a place to store the buffer
> >> that hardware(firmware) has acknowledged.
> >>
> >> One important advance about this list, it doesn't like
> >> rdy_queue which both bottom half of the user calling
> >> could operate it, while the v4l2 worker would as well.
> >> The v4l2 core could only operate this queue when its
> >> v4l2_context is not running, the driver would only
> >> access this new hw_queue in its own worker.
> > Could you describe in what case such a list would be useful for a
> > mem2mem driver?
>
> This list, as its description, just for saving us from creating a
> private buffer struct to track buffer state.
>
> The queue in the kernel is not the queue that hardware(codec firmware)
> are using.
>

Sorry, I find the description difficult to understand. It might make
sense to have the text proofread by someone experienced in writing
technical documentation in English before posting in the future.
Thanks.

I think I got the point from Nicolas' explanation, though.

>
> >> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> >> ---
> >>   drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> >>   include/media/v4l2-mem2mem.h           | 10 +++++++++-
> >>   2 files changed, 26 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> >> index c771aba42015..b4151147d5bd 100644
> >> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> >> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> >> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> >>                goto job_unlock;
> >>        }
> >>
> >> -     src = v4l2_m2m_next_src_buf(m2m_ctx);
> >> -     dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> >> -     if (!src && !m2m_ctx->out_q_ctx.buffered) {
> >> -             dprintk("No input buffers available\n");
> >> -             goto job_unlock;
> >> +     if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> >> +             src = v4l2_m2m_next_src_buf(m2m_ctx);
> >> +
> >> +             if (!src && !m2m_ctx->out_q_ctx.buffered) {
> >> +                     dprintk("No input buffers available\n");
> >> +                     goto job_unlock;
> >> +             }
> >>        }
> >> -     if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> >> -             dprintk("No output buffers available\n");
> >> -             goto job_unlock;
> >> +
> >> +     if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> >> +             dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> >> +             if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> >> +                     dprintk("No output buffers available\n");
> >> +                     goto job_unlock;
> >> +             }
> >>        }
> > src and dst would be referenced unitialized below if neither of the
> > above ifs hits...
> I think they have been initialized at v4l2_m2m_ctx_init()

What do you mean? They are local variables in this function.

> >
> > Best regards,
> > Tomasz
> >
> >>        m2m_ctx->new_frame = true;
> >> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> >>        INIT_LIST_HEAD(&q_ctx->rdy_queue);
> >>        q_ctx->num_rdy = 0;
> >>        spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> >> +     INIT_LIST_HEAD(&q_ctx->hw_queue);
> >>
> >>        if (m2m_dev->curr_ctx == m2m_ctx) {
> >>                m2m_dev->curr_ctx = NULL;
> >> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> >>
> >>        INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> >>        INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> >> +     INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> >> +     INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> >>        spin_lock_init(&out_q_ctx->rdy_spinlock);
> >>        spin_lock_init(&cap_q_ctx->rdy_spinlock);
> >>
> >> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> >> index d6c8eb2b5201..2342656e582d 100644
> >> --- a/include/media/v4l2-mem2mem.h
> >> +++ b/include/media/v4l2-mem2mem.h
> >> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> >>    *   processed
> >>    *
> >>    * @q:               pointer to struct &vb2_queue
> >> - * @rdy_queue:       List of V4L2 mem-to-mem queues
> >> + * @rdy_queue:       List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> >> + *           called in struct vb2_ops->buf_queue(), the buffer enqueued
> >> + *           by user would be added to this list.
> >>    * @rdy_spinlock: spin lock to protect the struct usage
> >>    * @num_rdy: number of buffers ready to be processed
> >> + * @hw_queue:        A list for tracking the buffer is occupied by the hardware
> >> + *           (or device's firmware). A buffer could only be in either
> >> + *           this list or @rdy_queue.
> >> + *           Driver may choose not to use this list while uses its own
> >> + *           private data to do this work.
> >>    * @buffered:        is the queue buffered?
> >>    *
> >>    * Queue for buffers ready to be processed as soon as this
> >> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> >>        struct list_head        rdy_queue;
> >>        spinlock_t              rdy_spinlock;
> >>        u8                      num_rdy;
> >> +     struct list_head        hw_queue;
> >>        bool                    buffered;
> >>   };
> >>
> >> --
> >> 2.17.1
> >>
> --
> Hsia-Jun(Randy) Li
>
Hsia-Jun Li July 27, 2023, 7:33 a.m. UTC | #6
On 7/27/23 15:25, Tomasz Figa wrote:
> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> On Mon, Jul 17, 2023 at 4:15 PM Hsia-Jun Li <Randy.Li@synaptics.com> wrote:
>>
>>
>> On 7/12/23 17:33, Tomasz Figa wrote:
>>> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
>>>
>>>
>>> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
>>>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
>>>>
>>>> Many drivers have to create its own buf_struct for a
>>>> vb2_queue to track such a state. Also driver has to
>>>> iterate over rdy_queue every times to find out a buffer
>>>> which is not sent to hardware(or firmware), this new
>>>> list just offers the driver a place to store the buffer
>>>> that hardware(firmware) has acknowledged.
>>>>
>>>> One important advance about this list, it doesn't like
>>>> rdy_queue which both bottom half of the user calling
>>>> could operate it, while the v4l2 worker would as well.
>>>> The v4l2 core could only operate this queue when its
>>>> v4l2_context is not running, the driver would only
>>>> access this new hw_queue in its own worker.
>>> Could you describe in what case such a list would be useful for a
>>> mem2mem driver?
>>
>> This list, as its description, just for saving us from creating a
>> private buffer struct to track buffer state.
>>
>> The queue in the kernel is not the queue that hardware(codec firmware)
>> are using.
>>
> 
> Sorry, I find the description difficult to understand. It might make
> sense to have the text proofread by someone experienced in writing
> technical documentation in English before posting in the future.
> Thanks.
> 
Sorry, I may not be able to get more resource here. I would try to ask a 
chatbot to fix my description next time.
> I think I got the point from Nicolas' explanation, though.
> 
>>
>>>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
>>>> ---
>>>>    drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
>>>>    include/media/v4l2-mem2mem.h           | 10 +++++++++-
>>>>    2 files changed, 26 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>> index c771aba42015..b4151147d5bd 100644
>>>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
>>>>                 goto job_unlock;
>>>>         }
>>>>
>>>> -     src = v4l2_m2m_next_src_buf(m2m_ctx);
>>>> -     dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>>> -     if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>>> -             dprintk("No input buffers available\n");
>>>> -             goto job_unlock;
>>>> +     if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
>>>> +             src = v4l2_m2m_next_src_buf(m2m_ctx);
>>>> +
>>>> +             if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>>> +                     dprintk("No input buffers available\n");
>>>> +                     goto job_unlock;
>>>> +             }
>>>>         }
>>>> -     if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>>> -             dprintk("No output buffers available\n");
>>>> -             goto job_unlock;
>>>> +
>>>> +     if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
>>>> +             dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>>> +             if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>>> +                     dprintk("No output buffers available\n");
>>>> +                     goto job_unlock;
>>>> +             }
>>>>         }
>>> src and dst would be referenced unitialized below if neither of the
>>> above ifs hits...
>> I think they have been initialized at v4l2_m2m_ctx_init()
> 
> What do you mean? They are local variables in this function.
> 
Sorry, I didn't notice the sentence after that.
>>>
>>> Best regards,
>>> Tomasz
>>>
>>>>         m2m_ctx->new_frame = true;
>>>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
>>>>         INIT_LIST_HEAD(&q_ctx->rdy_queue);
>>>>         q_ctx->num_rdy = 0;
>>>>         spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
>>>> +     INIT_LIST_HEAD(&q_ctx->hw_queue);
>>>>
>>>>         if (m2m_dev->curr_ctx == m2m_ctx) {
>>>>                 m2m_dev->curr_ctx = NULL;
>>>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
>>>>
>>>>         INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
>>>>         INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
>>>> +     INIT_LIST_HEAD(&out_q_ctx->hw_queue);
>>>> +     INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
>>>>         spin_lock_init(&out_q_ctx->rdy_spinlock);
>>>>         spin_lock_init(&cap_q_ctx->rdy_spinlock);
>>>>
>>>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
>>>> index d6c8eb2b5201..2342656e582d 100644
>>>> --- a/include/media/v4l2-mem2mem.h
>>>> +++ b/include/media/v4l2-mem2mem.h
>>>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
>>>>     *   processed
>>>>     *
>>>>     * @q:               pointer to struct &vb2_queue
>>>> - * @rdy_queue:       List of V4L2 mem-to-mem queues
>>>> + * @rdy_queue:       List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
>>>> + *           called in struct vb2_ops->buf_queue(), the buffer enqueued
>>>> + *           by user would be added to this list.
>>>>     * @rdy_spinlock: spin lock to protect the struct usage
>>>>     * @num_rdy: number of buffers ready to be processed
>>>> + * @hw_queue:        A list for tracking the buffer is occupied by the hardware
>>>> + *           (or device's firmware). A buffer could only be in either
>>>> + *           this list or @rdy_queue.
>>>> + *           Driver may choose not to use this list while uses its own
>>>> + *           private data to do this work.
>>>>     * @buffered:        is the queue buffered?
>>>>     *
>>>>     * Queue for buffers ready to be processed as soon as this
>>>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
>>>>         struct list_head        rdy_queue;
>>>>         spinlock_t              rdy_spinlock;
>>>>         u8                      num_rdy;
>>>> +     struct list_head        hw_queue;
>>>>         bool                    buffered;
>>>>    };
>>>>
>>>> --
>>>> 2.17.1
>>>>
>> --
>> Hsia-Jun(Randy) Li
>>
Tomasz Figa July 27, 2023, 7:43 a.m. UTC | #7
On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>
> Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
> > On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> > > From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> > >
> > > Many drivers have to create its own buf_struct for a
> > > vb2_queue to track such a state. Also driver has to
> > > iterate over rdy_queue every times to find out a buffer
> > > which is not sent to hardware(or firmware), this new
> > > list just offers the driver a place to store the buffer
> > > that hardware(firmware) has acknowledged.
> > >
> > > One important advance about this list, it doesn't like
> > > rdy_queue which both bottom half of the user calling
> > > could operate it, while the v4l2 worker would as well.
> > > The v4l2 core could only operate this queue when its
> > > v4l2_context is not running, the driver would only
> > > access this new hw_queue in its own worker.
> >
> > Could you describe in what case such a list would be useful for a
> > mem2mem driver?
>
> Today all driver must track buffers that are "owned by the hardware". This is a
> concept dictated by the m2m framework and enforced through the ACTIVE flag. All
> buffers from this list must be mark as done/error/queued after streamoff of the
> respective queue in order to acknowledge that they are no longer in use by the
> HW. Not doing so will warn:
>
>   videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
>
> Though, there is no queue to easily iterate them. All driver endup having their
> own queue, or just leaving the buffers in the rdy_queue (which isn't better).
>

Thanks for the explanation. I see how it could be useful now.

Although I guess this is a problem specifically for hardware (or
firmware) which can internally queue more than 1 buffer, right?
Otherwise the current buffer could just stay at the top of the
rdy_queue until it's removed by the driver's completion handler,
timeout/error handler or context destruction.

Best regards,
Tomasz

> Nicolas
> >
> > >
> > > Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> > > ---
> > >  drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> > >  include/media/v4l2-mem2mem.h           | 10 +++++++++-
> > >  2 files changed, 26 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > index c771aba42015..b4151147d5bd 100644
> > > --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> > >             goto job_unlock;
> > >     }
> > >
> > > -   src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > -           dprintk("No input buffers available\n");
> > > -           goto job_unlock;
> > > +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> > > +           src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > +
> > > +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > +                   dprintk("No input buffers available\n");
> > > +                   goto job_unlock;
> > > +           }
> > >     }
> > > -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > -           dprintk("No output buffers available\n");
> > > -           goto job_unlock;
> > > +
> > > +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> > > +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > +                   dprintk("No output buffers available\n");
> > > +                   goto job_unlock;
> > > +           }
> > >     }
> >
> > src and dst would be referenced unitialized below if neither of the
> > above ifs hits...
> >
> > Best regards,
> > Tomasz
> >
> > >
> > >     m2m_ctx->new_frame = true;
> > > @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> > >     INIT_LIST_HEAD(&q_ctx->rdy_queue);
> > >     q_ctx->num_rdy = 0;
> > >     spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> > > +   INIT_LIST_HEAD(&q_ctx->hw_queue);
> > >
> > >     if (m2m_dev->curr_ctx == m2m_ctx) {
> > >             m2m_dev->curr_ctx = NULL;
> > > @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> > >
> > >     INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> > >     INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> > > +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> > > +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> > >     spin_lock_init(&out_q_ctx->rdy_spinlock);
> > >     spin_lock_init(&cap_q_ctx->rdy_spinlock);
> > >
> > > diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> > > index d6c8eb2b5201..2342656e582d 100644
> > > --- a/include/media/v4l2-mem2mem.h
> > > +++ b/include/media/v4l2-mem2mem.h
> > > @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> > >   * processed
> > >   *
> > >   * @q:             pointer to struct &vb2_queue
> > > - * @rdy_queue:     List of V4L2 mem-to-mem queues
> > > + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> > > + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
> > > + *         by user would be added to this list.
> > >   * @rdy_spinlock: spin lock to protect the struct usage
> > >   * @num_rdy:       number of buffers ready to be processed
> > > + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
> > > + *                 (or device's firmware). A buffer could only be in either
> > > + *                 this list or @rdy_queue.
> > > + *                 Driver may choose not to use this list while uses its own
> > > + *                 private data to do this work.
> > >   * @buffered:      is the queue buffered?
> > >   *
> > >   * Queue for buffers ready to be processed as soon as this
> > > @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> > >     struct list_head        rdy_queue;
> > >     spinlock_t              rdy_spinlock;
> > >     u8                      num_rdy;
> > > +   struct list_head        hw_queue;
> > >     bool                    buffered;
> > >  };
> > >
> > > --
> > > 2.17.1
> > >
>
Tomasz Figa July 27, 2023, 7:54 a.m. UTC | #8
On Thu, Jul 27, 2023 at 4:33 PM Hsia-Jun Li <Randy.Li@synaptics.com> wrote:
>
>
>
> On 7/27/23 15:25, Tomasz Figa wrote:
> > CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> >
> >
> > On Mon, Jul 17, 2023 at 4:15 PM Hsia-Jun Li <Randy.Li@synaptics.com> wrote:
> >>
> >>
> >> On 7/12/23 17:33, Tomasz Figa wrote:
> >>> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> >>>
> >>>
> >>> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> >>>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> >>>>
> >>>> Many drivers have to create its own buf_struct for a
> >>>> vb2_queue to track such a state. Also driver has to
> >>>> iterate over rdy_queue every times to find out a buffer
> >>>> which is not sent to hardware(or firmware), this new
> >>>> list just offers the driver a place to store the buffer
> >>>> that hardware(firmware) has acknowledged.
> >>>>
> >>>> One important advance about this list, it doesn't like
> >>>> rdy_queue which both bottom half of the user calling
> >>>> could operate it, while the v4l2 worker would as well.
> >>>> The v4l2 core could only operate this queue when its
> >>>> v4l2_context is not running, the driver would only
> >>>> access this new hw_queue in its own worker.
> >>> Could you describe in what case such a list would be useful for a
> >>> mem2mem driver?
> >>
> >> This list, as its description, just for saving us from creating a
> >> private buffer struct to track buffer state.
> >>
> >> The queue in the kernel is not the queue that hardware(codec firmware)
> >> are using.
> >>
> >
> > Sorry, I find the description difficult to understand. It might make
> > sense to have the text proofread by someone experienced in writing
> > technical documentation in English before posting in the future.
> > Thanks.
> >
> Sorry, I may not be able to get more resource here. I would try to ask a
> chatbot to fix my description next time.

I think people on the linux-media IRC channel (including me) could
also be willing to help with rephrasing things, but if a chatbot can
do it too, it's even better. :)

> > I think I got the point from Nicolas' explanation, though.
> >
> >>
> >>>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> >>>> ---
> >>>>    drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> >>>>    include/media/v4l2-mem2mem.h           | 10 +++++++++-
> >>>>    2 files changed, 26 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> >>>> index c771aba42015..b4151147d5bd 100644
> >>>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> >>>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> >>>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> >>>>                 goto job_unlock;
> >>>>         }
> >>>>
> >>>> -     src = v4l2_m2m_next_src_buf(m2m_ctx);
> >>>> -     dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> >>>> -     if (!src && !m2m_ctx->out_q_ctx.buffered) {
> >>>> -             dprintk("No input buffers available\n");
> >>>> -             goto job_unlock;
> >>>> +     if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> >>>> +             src = v4l2_m2m_next_src_buf(m2m_ctx);
> >>>> +
> >>>> +             if (!src && !m2m_ctx->out_q_ctx.buffered) {
> >>>> +                     dprintk("No input buffers available\n");
> >>>> +                     goto job_unlock;
> >>>> +             }
> >>>>         }
> >>>> -     if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> >>>> -             dprintk("No output buffers available\n");
> >>>> -             goto job_unlock;
> >>>> +
> >>>> +     if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> >>>> +             dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> >>>> +             if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> >>>> +                     dprintk("No output buffers available\n");
> >>>> +                     goto job_unlock;
> >>>> +             }
> >>>>         }
> >>> src and dst would be referenced unitialized below if neither of the
> >>> above ifs hits...
> >> I think they have been initialized at v4l2_m2m_ctx_init()
> >
> > What do you mean? They are local variables in this function.
> >
> Sorry, I didn't notice the sentence after that.
> >>>
> >>> Best regards,
> >>> Tomasz
> >>>
> >>>>         m2m_ctx->new_frame = true;
> >>>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> >>>>         INIT_LIST_HEAD(&q_ctx->rdy_queue);
> >>>>         q_ctx->num_rdy = 0;
> >>>>         spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> >>>> +     INIT_LIST_HEAD(&q_ctx->hw_queue);
> >>>>
> >>>>         if (m2m_dev->curr_ctx == m2m_ctx) {
> >>>>                 m2m_dev->curr_ctx = NULL;
> >>>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> >>>>
> >>>>         INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> >>>>         INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> >>>> +     INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> >>>> +     INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> >>>>         spin_lock_init(&out_q_ctx->rdy_spinlock);
> >>>>         spin_lock_init(&cap_q_ctx->rdy_spinlock);
> >>>>
> >>>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> >>>> index d6c8eb2b5201..2342656e582d 100644
> >>>> --- a/include/media/v4l2-mem2mem.h
> >>>> +++ b/include/media/v4l2-mem2mem.h
> >>>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> >>>>     *   processed
> >>>>     *
> >>>>     * @q:               pointer to struct &vb2_queue
> >>>> - * @rdy_queue:       List of V4L2 mem-to-mem queues
> >>>> + * @rdy_queue:       List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> >>>> + *           called in struct vb2_ops->buf_queue(), the buffer enqueued
> >>>> + *           by user would be added to this list.
> >>>>     * @rdy_spinlock: spin lock to protect the struct usage
> >>>>     * @num_rdy: number of buffers ready to be processed
> >>>> + * @hw_queue:        A list for tracking the buffer is occupied by the hardware
> >>>> + *           (or device's firmware). A buffer could only be in either
> >>>> + *           this list or @rdy_queue.
> >>>> + *           Driver may choose not to use this list while uses its own
> >>>> + *           private data to do this work.
> >>>>     * @buffered:        is the queue buffered?
> >>>>     *
> >>>>     * Queue for buffers ready to be processed as soon as this
> >>>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> >>>>         struct list_head        rdy_queue;
> >>>>         spinlock_t              rdy_spinlock;
> >>>>         u8                      num_rdy;
> >>>> +     struct list_head        hw_queue;
> >>>>         bool                    buffered;
> >>>>    };
> >>>>
> >>>> --
> >>>> 2.17.1
> >>>>
> >> --
> >> Hsia-Jun(Randy) Li
> >>
>
> --
> Hsia-Jun(Randy) Li
Nicolas Dufresne July 27, 2023, 4:58 p.m. UTC | #9
Le jeudi 27 juillet 2023 à 16:43 +0900, Tomasz Figa a écrit :
> On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> > 
> > Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
> > > On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> > > > From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> > > > 
> > > > Many drivers have to create its own buf_struct for a
> > > > vb2_queue to track such a state. Also driver has to
> > > > iterate over rdy_queue every times to find out a buffer
> > > > which is not sent to hardware(or firmware), this new
> > > > list just offers the driver a place to store the buffer
> > > > that hardware(firmware) has acknowledged.
> > > > 
> > > > One important advance about this list, it doesn't like
> > > > rdy_queue which both bottom half of the user calling
> > > > could operate it, while the v4l2 worker would as well.
> > > > The v4l2 core could only operate this queue when its
> > > > v4l2_context is not running, the driver would only
> > > > access this new hw_queue in its own worker.
> > > 
> > > Could you describe in what case such a list would be useful for a
> > > mem2mem driver?
> > 
> > Today all driver must track buffers that are "owned by the hardware". This is a
> > concept dictated by the m2m framework and enforced through the ACTIVE flag. All
> > buffers from this list must be mark as done/error/queued after streamoff of the
> > respective queue in order to acknowledge that they are no longer in use by the
> > HW. Not doing so will warn:
> > 
> >   videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
> > 
> > Though, there is no queue to easily iterate them. All driver endup having their
> > own queue, or just leaving the buffers in the rdy_queue (which isn't better).
> > 
> 
> Thanks for the explanation. I see how it could be useful now.
> 
> Although I guess this is a problem specifically for hardware (or
> firmware) which can internally queue more than 1 buffer, right?
> Otherwise the current buffer could just stay at the top of the
> rdy_queue until it's removed by the driver's completion handler,
> timeout/error handler or context destruction.

Correct, its only an issue when you need to process multiple src buffers before
producing a dst buffer. If affects stateful decoder, stateful encoders and
deinterlacer as far as I'm aware.

Nicolas

> 
> Best regards,
> Tomasz
> 
> > Nicolas
> > > 
> > > > 
> > > > Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> > > > ---
> > > >  drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> > > >  include/media/v4l2-mem2mem.h           | 10 +++++++++-
> > > >  2 files changed, 26 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > index c771aba42015..b4151147d5bd 100644
> > > > --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> > > >             goto job_unlock;
> > > >     }
> > > > 
> > > > -   src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > > -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > > -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > > -           dprintk("No input buffers available\n");
> > > > -           goto job_unlock;
> > > > +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> > > > +           src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > > +
> > > > +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > > +                   dprintk("No input buffers available\n");
> > > > +                   goto job_unlock;
> > > > +           }
> > > >     }
> > > > -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > > -           dprintk("No output buffers available\n");
> > > > -           goto job_unlock;
> > > > +
> > > > +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> > > > +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > > +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > > +                   dprintk("No output buffers available\n");
> > > > +                   goto job_unlock;
> > > > +           }
> > > >     }
> > > 
> > > src and dst would be referenced unitialized below if neither of the
> > > above ifs hits...
> > > 
> > > Best regards,
> > > Tomasz
> > > 
> > > > 
> > > >     m2m_ctx->new_frame = true;
> > > > @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> > > >     INIT_LIST_HEAD(&q_ctx->rdy_queue);
> > > >     q_ctx->num_rdy = 0;
> > > >     spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> > > > +   INIT_LIST_HEAD(&q_ctx->hw_queue);
> > > > 
> > > >     if (m2m_dev->curr_ctx == m2m_ctx) {
> > > >             m2m_dev->curr_ctx = NULL;
> > > > @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> > > > 
> > > >     INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> > > >     INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> > > > +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> > > > +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> > > >     spin_lock_init(&out_q_ctx->rdy_spinlock);
> > > >     spin_lock_init(&cap_q_ctx->rdy_spinlock);
> > > > 
> > > > diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> > > > index d6c8eb2b5201..2342656e582d 100644
> > > > --- a/include/media/v4l2-mem2mem.h
> > > > +++ b/include/media/v4l2-mem2mem.h
> > > > @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> > > >   * processed
> > > >   *
> > > >   * @q:             pointer to struct &vb2_queue
> > > > - * @rdy_queue:     List of V4L2 mem-to-mem queues
> > > > + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> > > > + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
> > > > + *         by user would be added to this list.
> > > >   * @rdy_spinlock: spin lock to protect the struct usage
> > > >   * @num_rdy:       number of buffers ready to be processed
> > > > + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
> > > > + *                 (or device's firmware). A buffer could only be in either
> > > > + *                 this list or @rdy_queue.
> > > > + *                 Driver may choose not to use this list while uses its own
> > > > + *                 private data to do this work.
> > > >   * @buffered:      is the queue buffered?
> > > >   *
> > > >   * Queue for buffers ready to be processed as soon as this
> > > > @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> > > >     struct list_head        rdy_queue;
> > > >     spinlock_t              rdy_spinlock;
> > > >     u8                      num_rdy;
> > > > +   struct list_head        hw_queue;
> > > >     bool                    buffered;
> > > >  };
> > > > 
> > > > --
> > > > 2.17.1
> > > > 
> >
Tomasz Figa July 28, 2023, 4:43 a.m. UTC | #10
On Fri, Jul 28, 2023 at 1:58 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>
> Le jeudi 27 juillet 2023 à 16:43 +0900, Tomasz Figa a écrit :
> > On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> > >
> > > Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
> > > > On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> > > > > From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> > > > >
> > > > > Many drivers have to create its own buf_struct for a
> > > > > vb2_queue to track such a state. Also driver has to
> > > > > iterate over rdy_queue every times to find out a buffer
> > > > > which is not sent to hardware(or firmware), this new
> > > > > list just offers the driver a place to store the buffer
> > > > > that hardware(firmware) has acknowledged.
> > > > >
> > > > > One important advance about this list, it doesn't like
> > > > > rdy_queue which both bottom half of the user calling
> > > > > could operate it, while the v4l2 worker would as well.
> > > > > The v4l2 core could only operate this queue when its
> > > > > v4l2_context is not running, the driver would only
> > > > > access this new hw_queue in its own worker.
> > > >
> > > > Could you describe in what case such a list would be useful for a
> > > > mem2mem driver?
> > >
> > > Today all driver must track buffers that are "owned by the hardware". This is a
> > > concept dictated by the m2m framework and enforced through the ACTIVE flag. All
> > > buffers from this list must be mark as done/error/queued after streamoff of the
> > > respective queue in order to acknowledge that they are no longer in use by the
> > > HW. Not doing so will warn:
> > >
> > >   videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
> > >
> > > Though, there is no queue to easily iterate them. All driver endup having their
> > > own queue, or just leaving the buffers in the rdy_queue (which isn't better).
> > >
> >
> > Thanks for the explanation. I see how it could be useful now.
> >
> > Although I guess this is a problem specifically for hardware (or
> > firmware) which can internally queue more than 1 buffer, right?
> > Otherwise the current buffer could just stay at the top of the
> > rdy_queue until it's removed by the driver's completion handler,
> > timeout/error handler or context destruction.
>
> Correct, its only an issue when you need to process multiple src buffers before
> producing a dst buffer. If affects stateful decoder, stateful encoders and
> deinterlacer as far as I'm aware.

Is it actually necessary to keep those buffers in a list in that case, though?
I can see that a deinterlacer would indeed need 2 input buffers to
perform the deinterlacing operation, but those would be just known to
the driver, since it's running the task currently.
For a stateful decoder, wouldn't it just consume the bitstream buffer
(producing something partially decoded to its own internal buffers)
and return it shortly?
The most realistic scenario would be for stateful encoders which could
keep some input buffers as reference frames for further encoding, but
then would this patch actually work for them? It would make
__v4l2_m2m_try_queue never add the context to the job_queue if there
are some buffers in that hw_queue list.

Maybe what I need here are actual patches modifying some existing
drivers. Randy, would you be able to include that in the next version?
Thanks.

Best regards,
Tomasz

>
> Nicolas
>
> >
> > Best regards,
> > Tomasz
> >
> > > Nicolas
> > > >
> > > > >
> > > > > Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> > > > > ---
> > > > >  drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> > > > >  include/media/v4l2-mem2mem.h           | 10 +++++++++-
> > > > >  2 files changed, 26 insertions(+), 9 deletions(-)
> > > > >
> > > > > diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > > index c771aba42015..b4151147d5bd 100644
> > > > > --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > > +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > > @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> > > > >             goto job_unlock;
> > > > >     }
> > > > >
> > > > > -   src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > > > -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > > > -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > > > -           dprintk("No input buffers available\n");
> > > > > -           goto job_unlock;
> > > > > +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> > > > > +           src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > > > +
> > > > > +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > > > +                   dprintk("No input buffers available\n");
> > > > > +                   goto job_unlock;
> > > > > +           }
> > > > >     }
> > > > > -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > > > -           dprintk("No output buffers available\n");
> > > > > -           goto job_unlock;
> > > > > +
> > > > > +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> > > > > +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > > > +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > > > +                   dprintk("No output buffers available\n");
> > > > > +                   goto job_unlock;
> > > > > +           }
> > > > >     }
> > > >
> > > > src and dst would be referenced unitialized below if neither of the
> > > > above ifs hits...
> > > >
> > > > Best regards,
> > > > Tomasz
> > > >
> > > > >
> > > > >     m2m_ctx->new_frame = true;
> > > > > @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> > > > >     INIT_LIST_HEAD(&q_ctx->rdy_queue);
> > > > >     q_ctx->num_rdy = 0;
> > > > >     spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> > > > > +   INIT_LIST_HEAD(&q_ctx->hw_queue);
> > > > >
> > > > >     if (m2m_dev->curr_ctx == m2m_ctx) {
> > > > >             m2m_dev->curr_ctx = NULL;
> > > > > @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> > > > >
> > > > >     INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> > > > >     INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> > > > > +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> > > > > +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> > > > >     spin_lock_init(&out_q_ctx->rdy_spinlock);
> > > > >     spin_lock_init(&cap_q_ctx->rdy_spinlock);
> > > > >
> > > > > diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> > > > > index d6c8eb2b5201..2342656e582d 100644
> > > > > --- a/include/media/v4l2-mem2mem.h
> > > > > +++ b/include/media/v4l2-mem2mem.h
> > > > > @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> > > > >   * processed
> > > > >   *
> > > > >   * @q:             pointer to struct &vb2_queue
> > > > > - * @rdy_queue:     List of V4L2 mem-to-mem queues
> > > > > + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> > > > > + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
> > > > > + *         by user would be added to this list.
> > > > >   * @rdy_spinlock: spin lock to protect the struct usage
> > > > >   * @num_rdy:       number of buffers ready to be processed
> > > > > + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
> > > > > + *                 (or device's firmware). A buffer could only be in either
> > > > > + *                 this list or @rdy_queue.
> > > > > + *                 Driver may choose not to use this list while uses its own
> > > > > + *                 private data to do this work.
> > > > >   * @buffered:      is the queue buffered?
> > > > >   *
> > > > >   * Queue for buffers ready to be processed as soon as this
> > > > > @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> > > > >     struct list_head        rdy_queue;
> > > > >     spinlock_t              rdy_spinlock;
> > > > >     u8                      num_rdy;
> > > > > +   struct list_head        hw_queue;
> > > > >     bool                    buffered;
> > > > >  };
> > > > >
> > > > > --
> > > > > 2.17.1
> > > > >
> > >
>
Hsia-Jun Li July 28, 2023, 7:08 a.m. UTC | #11
On 7/28/23 12:43, Tomasz Figa wrote:
> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> On Fri, Jul 28, 2023 at 1:58 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>>
>> Le jeudi 27 juillet 2023 à 16:43 +0900, Tomasz Figa a écrit :
>>> On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>>>>
>>>> Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
>>>>> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
>>>>>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
>>>>>>
>>>>>> Many drivers have to create its own buf_struct for a
>>>>>> vb2_queue to track such a state. Also driver has to
>>>>>> iterate over rdy_queue every times to find out a buffer
>>>>>> which is not sent to hardware(or firmware), this new
>>>>>> list just offers the driver a place to store the buffer
>>>>>> that hardware(firmware) has acknowledged.
>>>>>>
>>>>>> One important advance about this list, it doesn't like
>>>>>> rdy_queue which both bottom half of the user calling
>>>>>> could operate it, while the v4l2 worker would as well.
>>>>>> The v4l2 core could only operate this queue when its
>>>>>> v4l2_context is not running, the driver would only
>>>>>> access this new hw_queue in its own worker.
>>>>>
>>>>> Could you describe in what case such a list would be useful for a
>>>>> mem2mem driver?
>>>>
>>>> Today all driver must track buffers that are "owned by the hardware". This is a
>>>> concept dictated by the m2m framework and enforced through the ACTIVE flag. All
>>>> buffers from this list must be mark as done/error/queued after streamoff of the
>>>> respective queue in order to acknowledge that they are no longer in use by the
>>>> HW. Not doing so will warn:
>>>>
>>>>    videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
>>>>
>>>> Though, there is no queue to easily iterate them. All driver endup having their
>>>> own queue, or just leaving the buffers in the rdy_queue (which isn't better).
>>>>
>>>
>>> Thanks for the explanation. I see how it could be useful now.
>>>
>>> Although I guess this is a problem specifically for hardware (or
>>> firmware) which can internally queue more than 1 buffer, right?
>>> Otherwise the current buffer could just stay at the top of the
>>> rdy_queue until it's removed by the driver's completion handler,
>>> timeout/error handler or context destruction.
>>
>> Correct, its only an issue when you need to process multiple src buffers before
>> producing a dst buffer. If affects stateful decoder, stateful encoders and
>> deinterlacer as far as I'm aware.
> 
> Is it actually necessary to keep those buffers in a list in that case, though?
> I can see that a deinterlacer would indeed need 2 input buffers to
> perform the deinterlacing operation, but those would be just known to
> the driver, since it's running the task currently.
> For a stateful decoder, wouldn't it just consume the bitstream buffer
> (producing something partially decoded to its own internal buffers)
> and return it shortly?
Display re-order. Firmware could do such batch work, taking a few 
bitstream buffer, then output a list graphics buffer in the display 
order also discard the usage of the non-display buffer when it is 
removed from dpb.

Even in one input and one output mode, firmware need to do redo, let the 
driver know when a graphics buffer could be display, so firmware would 
usually hold the graphics buffer(frame) until its display time.

Besides, I hate the driver occupied a large of memory without user's 
order. I would like to drop those internal buffers.
> The most realistic scenario would be for stateful encoders which could
> keep some input buffers as reference frames for further encoding, but
> then would this patch actually work for them? It would make
> __v4l2_m2m_try_queue never add the context to the job_queue if there
> are some buffers in that hw_queue list.
why?
> 
> Maybe what I need here are actual patches modifying some existing
> drivers. Randy, would you be able to include that in the next version?
May not. The Synaptics VideoSmart is a secure video platform(DRM), I 
could release a snapshot of the driver when I got the permission, that 
would be after the official release of the SDK.
But you may not be able to compile it because we have our own TEE 
interface(not optee), also running it because the trusted app would be 
signed with a per-device key.
> Thanks.
> 
> Best regards,
> Tomasz
> 
>>
>> Nicolas
>>
>>>
>>> Best regards,
>>> Tomasz
>>>
>>>> Nicolas
>>>>>
>>>>>>
>>>>>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
>>>>>> ---
>>>>>>   drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
>>>>>>   include/media/v4l2-mem2mem.h           | 10 +++++++++-
>>>>>>   2 files changed, 26 insertions(+), 9 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>>>> index c771aba42015..b4151147d5bd 100644
>>>>>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>>>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>>>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
>>>>>>              goto job_unlock;
>>>>>>      }
>>>>>>
>>>>>> -   src = v4l2_m2m_next_src_buf(m2m_ctx);
>>>>>> -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>>>>> -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>>>>> -           dprintk("No input buffers available\n");
>>>>>> -           goto job_unlock;
>>>>>> +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
>>>>>> +           src = v4l2_m2m_next_src_buf(m2m_ctx);
>>>>>> +
>>>>>> +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>>>>> +                   dprintk("No input buffers available\n");
>>>>>> +                   goto job_unlock;
>>>>>> +           }
>>>>>>      }
>>>>>> -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>>>>> -           dprintk("No output buffers available\n");
>>>>>> -           goto job_unlock;
>>>>>> +
>>>>>> +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
>>>>>> +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>>>>> +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>>>>> +                   dprintk("No output buffers available\n");
>>>>>> +                   goto job_unlock;
>>>>>> +           }
>>>>>>      }
>>>>>
>>>>> src and dst would be referenced unitialized below if neither of the
>>>>> above ifs hits...
>>>>>
>>>>> Best regards,
>>>>> Tomasz
>>>>>
>>>>>>
>>>>>>      m2m_ctx->new_frame = true;
>>>>>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
>>>>>>      INIT_LIST_HEAD(&q_ctx->rdy_queue);
>>>>>>      q_ctx->num_rdy = 0;
>>>>>>      spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
>>>>>> +   INIT_LIST_HEAD(&q_ctx->hw_queue);
>>>>>>
>>>>>>      if (m2m_dev->curr_ctx == m2m_ctx) {
>>>>>>              m2m_dev->curr_ctx = NULL;
>>>>>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
>>>>>>
>>>>>>      INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
>>>>>>      INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
>>>>>> +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
>>>>>> +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
>>>>>>      spin_lock_init(&out_q_ctx->rdy_spinlock);
>>>>>>      spin_lock_init(&cap_q_ctx->rdy_spinlock);
>>>>>>
>>>>>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
>>>>>> index d6c8eb2b5201..2342656e582d 100644
>>>>>> --- a/include/media/v4l2-mem2mem.h
>>>>>> +++ b/include/media/v4l2-mem2mem.h
>>>>>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
>>>>>>    * processed
>>>>>>    *
>>>>>>    * @q:             pointer to struct &vb2_queue
>>>>>> - * @rdy_queue:     List of V4L2 mem-to-mem queues
>>>>>> + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
>>>>>> + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
>>>>>> + *         by user would be added to this list.
>>>>>>    * @rdy_spinlock: spin lock to protect the struct usage
>>>>>>    * @num_rdy:       number of buffers ready to be processed
>>>>>> + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
>>>>>> + *                 (or device's firmware). A buffer could only be in either
>>>>>> + *                 this list or @rdy_queue.
>>>>>> + *                 Driver may choose not to use this list while uses its own
>>>>>> + *                 private data to do this work.
>>>>>>    * @buffered:      is the queue buffered?
>>>>>>    *
>>>>>>    * Queue for buffers ready to be processed as soon as this
>>>>>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
>>>>>>      struct list_head        rdy_queue;
>>>>>>      spinlock_t              rdy_spinlock;
>>>>>>      u8                      num_rdy;
>>>>>> +   struct list_head        hw_queue;
>>>>>>      bool                    buffered;
>>>>>>   };
>>>>>>
>>>>>> --
>>>>>> 2.17.1
>>>>>>
>>>>
>>
Tomasz Figa July 28, 2023, 7:26 a.m. UTC | #12
On Fri, Jul 28, 2023 at 4:09 PM Hsia-Jun Li <Randy.Li@synaptics.com> wrote:
>
>
>
> On 7/28/23 12:43, Tomasz Figa wrote:
> > CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> >
> >
> > On Fri, Jul 28, 2023 at 1:58 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> >>
> >> Le jeudi 27 juillet 2023 à 16:43 +0900, Tomasz Figa a écrit :
> >>> On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> >>>>
> >>>> Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
> >>>>> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> >>>>>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> >>>>>>
> >>>>>> Many drivers have to create its own buf_struct for a
> >>>>>> vb2_queue to track such a state. Also driver has to
> >>>>>> iterate over rdy_queue every times to find out a buffer
> >>>>>> which is not sent to hardware(or firmware), this new
> >>>>>> list just offers the driver a place to store the buffer
> >>>>>> that hardware(firmware) has acknowledged.
> >>>>>>
> >>>>>> One important advance about this list, it doesn't like
> >>>>>> rdy_queue which both bottom half of the user calling
> >>>>>> could operate it, while the v4l2 worker would as well.
> >>>>>> The v4l2 core could only operate this queue when its
> >>>>>> v4l2_context is not running, the driver would only
> >>>>>> access this new hw_queue in its own worker.
> >>>>>
> >>>>> Could you describe in what case such a list would be useful for a
> >>>>> mem2mem driver?
> >>>>
> >>>> Today all driver must track buffers that are "owned by the hardware". This is a
> >>>> concept dictated by the m2m framework and enforced through the ACTIVE flag. All
> >>>> buffers from this list must be mark as done/error/queued after streamoff of the
> >>>> respective queue in order to acknowledge that they are no longer in use by the
> >>>> HW. Not doing so will warn:
> >>>>
> >>>>    videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
> >>>>
> >>>> Though, there is no queue to easily iterate them. All driver endup having their
> >>>> own queue, or just leaving the buffers in the rdy_queue (which isn't better).
> >>>>
> >>>
> >>> Thanks for the explanation. I see how it could be useful now.
> >>>
> >>> Although I guess this is a problem specifically for hardware (or
> >>> firmware) which can internally queue more than 1 buffer, right?
> >>> Otherwise the current buffer could just stay at the top of the
> >>> rdy_queue until it's removed by the driver's completion handler,
> >>> timeout/error handler or context destruction.
> >>
> >> Correct, its only an issue when you need to process multiple src buffers before
> >> producing a dst buffer. If affects stateful decoder, stateful encoders and
> >> deinterlacer as far as I'm aware.
> >
> > Is it actually necessary to keep those buffers in a list in that case, though?
> > I can see that a deinterlacer would indeed need 2 input buffers to
> > perform the deinterlacing operation, but those would be just known to
> > the driver, since it's running the task currently.
> > For a stateful decoder, wouldn't it just consume the bitstream buffer
> > (producing something partially decoded to its own internal buffers)
> > and return it shortly?
> Display re-order. Firmware could do such batch work, taking a few
> bitstream buffer, then output a list graphics buffer in the display
> order also discard the usage of the non-display buffer when it is
> removed from dpb.
>
> Even in one input and one output mode, firmware need to do redo, let the
> driver know when a graphics buffer could be display, so firmware would
> usually hold the graphics buffer(frame) until its display time.
>

Okay, so that hold would be for frame buffers, not bitstream buffers, right?
But yeah, I see that then it could hold onto those buffers until it's
their turn to display and it could be a bigger number of frames,
depending on the complexity of the codec.

> Besides, I hate the driver occupied a large of memory without user's
> order. I would like to drop those internal buffers.

I think this is one reason to migrate to the stateless decoder design.

> > The most realistic scenario would be for stateful encoders which could
> > keep some input buffers as reference frames for further encoding, but
> > then would this patch actually work for them? It would make
> > __v4l2_m2m_try_queue never add the context to the job_queue if there
> > are some buffers in that hw_queue list.
> why?
> >
> > Maybe what I need here are actual patches modifying some existing
> > drivers. Randy, would you be able to include that in the next version?
> May not. The Synaptics VideoSmart is a secure video platform(DRM), I
> could release a snapshot of the driver when I got the permission, that
> would be after the official release of the SDK.
> But you may not be able to compile it because we have our own TEE
> interface(not optee), also running it because the trusted app would be
> signed with a per-device key.

Could you modify another, already existing driver then?

> > Thanks.
> >
> > Best regards,
> > Tomasz
> >
> >>
> >> Nicolas
> >>
> >>>
> >>> Best regards,
> >>> Tomasz
> >>>
> >>>> Nicolas
> >>>>>
> >>>>>>
> >>>>>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> >>>>>> ---
> >>>>>>   drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> >>>>>>   include/media/v4l2-mem2mem.h           | 10 +++++++++-
> >>>>>>   2 files changed, 26 insertions(+), 9 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> >>>>>> index c771aba42015..b4151147d5bd 100644
> >>>>>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> >>>>>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> >>>>>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> >>>>>>              goto job_unlock;
> >>>>>>      }
> >>>>>>
> >>>>>> -   src = v4l2_m2m_next_src_buf(m2m_ctx);
> >>>>>> -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> >>>>>> -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
> >>>>>> -           dprintk("No input buffers available\n");
> >>>>>> -           goto job_unlock;
> >>>>>> +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> >>>>>> +           src = v4l2_m2m_next_src_buf(m2m_ctx);
> >>>>>> +
> >>>>>> +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
> >>>>>> +                   dprintk("No input buffers available\n");
> >>>>>> +                   goto job_unlock;
> >>>>>> +           }
> >>>>>>      }
> >>>>>> -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> >>>>>> -           dprintk("No output buffers available\n");
> >>>>>> -           goto job_unlock;
> >>>>>> +
> >>>>>> +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> >>>>>> +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> >>>>>> +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> >>>>>> +                   dprintk("No output buffers available\n");
> >>>>>> +                   goto job_unlock;
> >>>>>> +           }
> >>>>>>      }
> >>>>>
> >>>>> src and dst would be referenced unitialized below if neither of the
> >>>>> above ifs hits...
> >>>>>
> >>>>> Best regards,
> >>>>> Tomasz
> >>>>>
> >>>>>>
> >>>>>>      m2m_ctx->new_frame = true;
> >>>>>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> >>>>>>      INIT_LIST_HEAD(&q_ctx->rdy_queue);
> >>>>>>      q_ctx->num_rdy = 0;
> >>>>>>      spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> >>>>>> +   INIT_LIST_HEAD(&q_ctx->hw_queue);
> >>>>>>
> >>>>>>      if (m2m_dev->curr_ctx == m2m_ctx) {
> >>>>>>              m2m_dev->curr_ctx = NULL;
> >>>>>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> >>>>>>
> >>>>>>      INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> >>>>>>      INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> >>>>>> +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> >>>>>> +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> >>>>>>      spin_lock_init(&out_q_ctx->rdy_spinlock);
> >>>>>>      spin_lock_init(&cap_q_ctx->rdy_spinlock);
> >>>>>>
> >>>>>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> >>>>>> index d6c8eb2b5201..2342656e582d 100644
> >>>>>> --- a/include/media/v4l2-mem2mem.h
> >>>>>> +++ b/include/media/v4l2-mem2mem.h
> >>>>>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> >>>>>>    * processed
> >>>>>>    *
> >>>>>>    * @q:             pointer to struct &vb2_queue
> >>>>>> - * @rdy_queue:     List of V4L2 mem-to-mem queues
> >>>>>> + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> >>>>>> + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
> >>>>>> + *         by user would be added to this list.
> >>>>>>    * @rdy_spinlock: spin lock to protect the struct usage
> >>>>>>    * @num_rdy:       number of buffers ready to be processed
> >>>>>> + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
> >>>>>> + *                 (or device's firmware). A buffer could only be in either
> >>>>>> + *                 this list or @rdy_queue.
> >>>>>> + *                 Driver may choose not to use this list while uses its own
> >>>>>> + *                 private data to do this work.
> >>>>>>    * @buffered:      is the queue buffered?
> >>>>>>    *
> >>>>>>    * Queue for buffers ready to be processed as soon as this
> >>>>>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> >>>>>>      struct list_head        rdy_queue;
> >>>>>>      spinlock_t              rdy_spinlock;
> >>>>>>      u8                      num_rdy;
> >>>>>> +   struct list_head        hw_queue;
> >>>>>>      bool                    buffered;
> >>>>>>   };
> >>>>>>
> >>>>>> --
> >>>>>> 2.17.1
> >>>>>>
> >>>>
> >>
>
> --
> Hsia-Jun(Randy) Li
Hsia-Jun Li July 28, 2023, 7:37 a.m. UTC | #13
On 7/28/23 15:26, Tomasz Figa wrote:
> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> On Fri, Jul 28, 2023 at 4:09 PM Hsia-Jun Li <Randy.Li@synaptics.com> wrote:
>>
>>
>>
>> On 7/28/23 12:43, Tomasz Figa wrote:
>>> CAUTION: Email originated externally, do not click links or open attachments unless you recognize the sender and know the content is safe.
>>>
>>>
>>> On Fri, Jul 28, 2023 at 1:58 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>>>>
>>>> Le jeudi 27 juillet 2023 à 16:43 +0900, Tomasz Figa a écrit :
>>>>> On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>>>>>>
>>>>>> Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
>>>>>>> On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
>>>>>>>> From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
>>>>>>>>
>>>>>>>> Many drivers have to create its own buf_struct for a
>>>>>>>> vb2_queue to track such a state. Also driver has to
>>>>>>>> iterate over rdy_queue every times to find out a buffer
>>>>>>>> which is not sent to hardware(or firmware), this new
>>>>>>>> list just offers the driver a place to store the buffer
>>>>>>>> that hardware(firmware) has acknowledged.
>>>>>>>>
>>>>>>>> One important advance about this list, it doesn't like
>>>>>>>> rdy_queue which both bottom half of the user calling
>>>>>>>> could operate it, while the v4l2 worker would as well.
>>>>>>>> The v4l2 core could only operate this queue when its
>>>>>>>> v4l2_context is not running, the driver would only
>>>>>>>> access this new hw_queue in its own worker.
>>>>>>>
>>>>>>> Could you describe in what case such a list would be useful for a
>>>>>>> mem2mem driver?
>>>>>>
>>>>>> Today all driver must track buffers that are "owned by the hardware". This is a
>>>>>> concept dictated by the m2m framework and enforced through the ACTIVE flag. All
>>>>>> buffers from this list must be mark as done/error/queued after streamoff of the
>>>>>> respective queue in order to acknowledge that they are no longer in use by the
>>>>>> HW. Not doing so will warn:
>>>>>>
>>>>>>     videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
>>>>>>
>>>>>> Though, there is no queue to easily iterate them. All driver endup having their
>>>>>> own queue, or just leaving the buffers in the rdy_queue (which isn't better).
>>>>>>
>>>>>
>>>>> Thanks for the explanation. I see how it could be useful now.
>>>>>
>>>>> Although I guess this is a problem specifically for hardware (or
>>>>> firmware) which can internally queue more than 1 buffer, right?
>>>>> Otherwise the current buffer could just stay at the top of the
>>>>> rdy_queue until it's removed by the driver's completion handler,
>>>>> timeout/error handler or context destruction.
>>>>
>>>> Correct, its only an issue when you need to process multiple src buffers before
>>>> producing a dst buffer. If affects stateful decoder, stateful encoders and
>>>> deinterlacer as far as I'm aware.
>>>
>>> Is it actually necessary to keep those buffers in a list in that case, though?
>>> I can see that a deinterlacer would indeed need 2 input buffers to
>>> perform the deinterlacing operation, but those would be just known to
>>> the driver, since it's running the task currently.
>>> For a stateful decoder, wouldn't it just consume the bitstream buffer
>>> (producing something partially decoded to its own internal buffers)
>>> and return it shortly?
>> Display re-order. Firmware could do such batch work, taking a few
>> bitstream buffer, then output a list graphics buffer in the display
>> order also discard the usage of the non-display buffer when it is
>> removed from dpb.
>>
>> Even in one input and one output mode, firmware need to do redo, let the
>> driver know when a graphics buffer could be display, so firmware would
>> usually hold the graphics buffer(frame) until its display time.
>>
> 
> Okay, so that hold would be for frame buffers, not bitstream buffers, right?
For the 1:1 model, decoder won't hold the input(OUTPUT queue) buffer 
usually.
While for the VP9, we have a super frame and temporal unit packing for 
AV1 which break the current API requirement for an AU in a buffer. The 
hardware would trigger multiple work for that(that means multiple irqs 
ack for a usual devices).
For the encoder, it is a different story.
> But yeah, I see that then it could hold onto those buffers until it's
> their turn to display and it could be a bigger number of frames,
> depending on the complexity of the codec.
> 
>> Besides, I hate the driver occupied a large of memory without user's
>> order. I would like to drop those internal buffers.
> 
> I think this is one reason to migrate to the stateless decoder design.
> 
I didn't know such plan here. I don't think the current stateless API 
could export the reconstruction buffers for encoder or post-processing 
buffer for decoder to us.
>>> The most realistic scenario would be for stateful encoders which could
>>> keep some input buffers as reference frames for further encoding, but
>>> then would this patch actually work for them? It would make
>>> __v4l2_m2m_try_queue never add the context to the job_queue if there
>>> are some buffers in that hw_queue list.
>> why?
>>>
>>> Maybe what I need here are actual patches modifying some existing
>>> drivers. Randy, would you be able to include that in the next version?
>> May not. The Synaptics VideoSmart is a secure video platform(DRM), I
>> could release a snapshot of the driver when I got the permission, that
>> would be after the official release of the SDK.
>> But you may not be able to compile it because we have our own TEE
>> interface(not optee), also running it because the trusted app would be
>> signed with a per-device key.
> 
> Could you modify another, already existing driver then?
> 
>>> Thanks.
>>>
>>> Best regards,
>>> Tomasz
>>>
>>>>
>>>> Nicolas
>>>>
>>>>>
>>>>> Best regards,
>>>>> Tomasz
>>>>>
>>>>>> Nicolas
>>>>>>>
>>>>>>>>
>>>>>>>> Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
>>>>>>>> ---
>>>>>>>>    drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
>>>>>>>>    include/media/v4l2-mem2mem.h           | 10 +++++++++-
>>>>>>>>    2 files changed, 26 insertions(+), 9 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>>>>>> index c771aba42015..b4151147d5bd 100644
>>>>>>>> --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>>>>>> +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
>>>>>>>> @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
>>>>>>>>               goto job_unlock;
>>>>>>>>       }
>>>>>>>>
>>>>>>>> -   src = v4l2_m2m_next_src_buf(m2m_ctx);
>>>>>>>> -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>>>>>>> -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>>>>>>> -           dprintk("No input buffers available\n");
>>>>>>>> -           goto job_unlock;
>>>>>>>> +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
>>>>>>>> +           src = v4l2_m2m_next_src_buf(m2m_ctx);
>>>>>>>> +
>>>>>>>> +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
>>>>>>>> +                   dprintk("No input buffers available\n");
>>>>>>>> +                   goto job_unlock;
>>>>>>>> +           }
>>>>>>>>       }
>>>>>>>> -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>>>>>>> -           dprintk("No output buffers available\n");
>>>>>>>> -           goto job_unlock;
>>>>>>>> +
>>>>>>>> +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
>>>>>>>> +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
>>>>>>>> +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
>>>>>>>> +                   dprintk("No output buffers available\n");
>>>>>>>> +                   goto job_unlock;
>>>>>>>> +           }
>>>>>>>>       }
>>>>>>>
>>>>>>> src and dst would be referenced unitialized below if neither of the
>>>>>>> above ifs hits...
>>>>>>>
>>>>>>> Best regards,
>>>>>>> Tomasz
>>>>>>>
>>>>>>>>
>>>>>>>>       m2m_ctx->new_frame = true;
>>>>>>>> @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
>>>>>>>>       INIT_LIST_HEAD(&q_ctx->rdy_queue);
>>>>>>>>       q_ctx->num_rdy = 0;
>>>>>>>>       spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
>>>>>>>> +   INIT_LIST_HEAD(&q_ctx->hw_queue);
>>>>>>>>
>>>>>>>>       if (m2m_dev->curr_ctx == m2m_ctx) {
>>>>>>>>               m2m_dev->curr_ctx = NULL;
>>>>>>>> @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
>>>>>>>>
>>>>>>>>       INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
>>>>>>>>       INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
>>>>>>>> +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
>>>>>>>> +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
>>>>>>>>       spin_lock_init(&out_q_ctx->rdy_spinlock);
>>>>>>>>       spin_lock_init(&cap_q_ctx->rdy_spinlock);
>>>>>>>>
>>>>>>>> diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
>>>>>>>> index d6c8eb2b5201..2342656e582d 100644
>>>>>>>> --- a/include/media/v4l2-mem2mem.h
>>>>>>>> +++ b/include/media/v4l2-mem2mem.h
>>>>>>>> @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
>>>>>>>>     * processed
>>>>>>>>     *
>>>>>>>>     * @q:             pointer to struct &vb2_queue
>>>>>>>> - * @rdy_queue:     List of V4L2 mem-to-mem queues
>>>>>>>> + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
>>>>>>>> + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
>>>>>>>> + *         by user would be added to this list.
>>>>>>>>     * @rdy_spinlock: spin lock to protect the struct usage
>>>>>>>>     * @num_rdy:       number of buffers ready to be processed
>>>>>>>> + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
>>>>>>>> + *                 (or device's firmware). A buffer could only be in either
>>>>>>>> + *                 this list or @rdy_queue.
>>>>>>>> + *                 Driver may choose not to use this list while uses its own
>>>>>>>> + *                 private data to do this work.
>>>>>>>>     * @buffered:      is the queue buffered?
>>>>>>>>     *
>>>>>>>>     * Queue for buffers ready to be processed as soon as this
>>>>>>>> @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
>>>>>>>>       struct list_head        rdy_queue;
>>>>>>>>       spinlock_t              rdy_spinlock;
>>>>>>>>       u8                      num_rdy;
>>>>>>>> +   struct list_head        hw_queue;
>>>>>>>>       bool                    buffered;
>>>>>>>>    };
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.17.1
>>>>>>>>
>>>>>>
>>>>
>>
>> --
>> Hsia-Jun(Randy) Li
Nicolas Dufresne July 28, 2023, 4:09 p.m. UTC | #14
Le vendredi 28 juillet 2023 à 13:43 +0900, Tomasz Figa a écrit :
> On Fri, Jul 28, 2023 at 1:58 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> > 
> > Le jeudi 27 juillet 2023 à 16:43 +0900, Tomasz Figa a écrit :
> > > On Mon, Jul 17, 2023 at 11:07 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> > > > 
> > > > Le mercredi 12 juillet 2023 à 09:33 +0000, Tomasz Figa a écrit :
> > > > > On Tue, Jul 04, 2023 at 12:00:38PM +0800, Hsia-Jun Li wrote:
> > > > > > From: "Hsia-Jun(Randy) Li" <randy.li@synaptics.com>
> > > > > > 
> > > > > > Many drivers have to create its own buf_struct for a
> > > > > > vb2_queue to track such a state. Also driver has to
> > > > > > iterate over rdy_queue every times to find out a buffer
> > > > > > which is not sent to hardware(or firmware), this new
> > > > > > list just offers the driver a place to store the buffer
> > > > > > that hardware(firmware) has acknowledged.
> > > > > > 
> > > > > > One important advance about this list, it doesn't like
> > > > > > rdy_queue which both bottom half of the user calling
> > > > > > could operate it, while the v4l2 worker would as well.
> > > > > > The v4l2 core could only operate this queue when its
> > > > > > v4l2_context is not running, the driver would only
> > > > > > access this new hw_queue in its own worker.
> > > > > 
> > > > > Could you describe in what case such a list would be useful for a
> > > > > mem2mem driver?
> > > > 
> > > > Today all driver must track buffers that are "owned by the hardware". This is a
> > > > concept dictated by the m2m framework and enforced through the ACTIVE flag. All
> > > > buffers from this list must be mark as done/error/queued after streamoff of the
> > > > respective queue in order to acknowledge that they are no longer in use by the
> > > > HW. Not doing so will warn:
> > > > 
> > > >   videobuf2_common: driver bug: stop_streaming operation is leaving buf ...
> > > > 
> > > > Though, there is no queue to easily iterate them. All driver endup having their
> > > > own queue, or just leaving the buffers in the rdy_queue (which isn't better).
> > > > 
> > > 
> > > Thanks for the explanation. I see how it could be useful now.
> > > 
> > > Although I guess this is a problem specifically for hardware (or
> > > firmware) which can internally queue more than 1 buffer, right?
> > > Otherwise the current buffer could just stay at the top of the
> > > rdy_queue until it's removed by the driver's completion handler,
> > > timeout/error handler or context destruction.
> > 
> > Correct, its only an issue when you need to process multiple src buffers before
> > producing a dst buffer. If affects stateful decoder, stateful encoders and
> > deinterlacer as far as I'm aware.
> 
> Is it actually necessary to keep those buffers in a list in that case, though?
> I can see that a deinterlacer would indeed need 2 input buffers to
> perform the deinterlacing operation, but those would be just known to
> the driver, since it's running the task currently.
> For a stateful decoder, wouldn't it just consume the bitstream buffer
> (producing something partially decoded to its own internal buffers)
> and return it shortly?

In practice, in stateful decoder, we pace the consumption of input buffers,
otherwise we just endup consuming the entire video into a ring buffer, which
makes operation like seeks quite heavy and cause CPU spikes.

That being said, I'm not sure how useful a list would be for bitstream buffers.
At the moment, in my current work, I'm leaving buffers in the ready queue, and
just tagging the one I have already copied into the ring buffer. And I remove
them from the ready list, when the related data has been decoded. This is when I
actually copy the timestamp from src to dst buffer. So in short, I don't use an
extra list, but use some marking on the buffers though, to remember which one
have already been copied. This is specific to ring buffer based codecs of
course.

The one where a second list helps is for display picture buffers. When a buffer
has been filled, if its in the ready queue, I currently remove that buffer and
put it in a custom list. It will then be removed when/if the firmware decides to
display it. It may also never be displayed, and reused by the firmware. I short,
these are the frame "owned" by the firmware and containing valid pixels. The rdy
list contains free pictures buffers, and the pixels are undefined.

Maybe, and I'm ready to try, I could also leave them in ready queue and opt for
marking and a counter. As I'm using a job_ready() function, its my driver that
decides if a device_run() should be executed or not. So what matters is
basically if there is a free buffer for a new decode operation, and a counter of
filled but not displayed buffer could probably do that.

> The most realistic scenario would be for stateful encoders which could
> keep some input buffers as reference frames for further encoding, but
> then would this patch actually work for them? It would make
> __v4l2_m2m_try_queue never add the context to the job_queue if there
> are some buffers in that hw_queue list.

Encoders have 3 set of buffers, despite m2m having two queues. OUTPUT buffers
are the pictures, there is a set of internal reconstruction buffers, and finally
the CAPTURE buffers are the bitstream. Bitstream buffers are subject to
reordering, so conceptually the firmware holds more then 1, and reconstruction
buffers are completely hidden.

> 
> Maybe what I need here are actual patches modifying some existing
> drivers. Randy, would you be able to include that in the next version?
> Thanks.

Agreed.

> 
> Best regards,
> Tomasz
> 
> > 
> > Nicolas
> > 
> > > 
> > > Best regards,
> > > Tomasz
> > > 
> > > > Nicolas
> > > > > 
> > > > > > 
> > > > > > Signed-off-by: Hsia-Jun(Randy) Li <randy.li@synaptics.com>
> > > > > > ---
> > > > > >  drivers/media/v4l2-core/v4l2-mem2mem.c | 25 +++++++++++++++++--------
> > > > > >  include/media/v4l2-mem2mem.h           | 10 +++++++++-
> > > > > >  2 files changed, 26 insertions(+), 9 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > > > index c771aba42015..b4151147d5bd 100644
> > > > > > --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > > > +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
> > > > > > @@ -321,15 +321,21 @@ static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
> > > > > >             goto job_unlock;
> > > > > >     }
> > > > > > 
> > > > > > -   src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > > > > -   dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > > > > -   if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > > > > -           dprintk("No input buffers available\n");
> > > > > > -           goto job_unlock;
> > > > > > +   if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
> > > > > > +           src = v4l2_m2m_next_src_buf(m2m_ctx);
> > > > > > +
> > > > > > +           if (!src && !m2m_ctx->out_q_ctx.buffered) {
> > > > > > +                   dprintk("No input buffers available\n");
> > > > > > +                   goto job_unlock;
> > > > > > +           }
> > > > > >     }
> > > > > > -   if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > > > > -           dprintk("No output buffers available\n");
> > > > > > -           goto job_unlock;
> > > > > > +
> > > > > > +   if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
> > > > > > +           dst = v4l2_m2m_next_dst_buf(m2m_ctx);
> > > > > > +           if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
> > > > > > +                   dprintk("No output buffers available\n");
> > > > > > +                   goto job_unlock;
> > > > > > +           }
> > > > > >     }
> > > > > 
> > > > > src and dst would be referenced unitialized below if neither of the
> > > > > above ifs hits...
> > > > > 
> > > > > Best regards,
> > > > > Tomasz
> > > > > 
> > > > > > 
> > > > > >     m2m_ctx->new_frame = true;
> > > > > > @@ -896,6 +902,7 @@ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
> > > > > >     INIT_LIST_HEAD(&q_ctx->rdy_queue);
> > > > > >     q_ctx->num_rdy = 0;
> > > > > >     spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
> > > > > > +   INIT_LIST_HEAD(&q_ctx->hw_queue);
> > > > > > 
> > > > > >     if (m2m_dev->curr_ctx == m2m_ctx) {
> > > > > >             m2m_dev->curr_ctx = NULL;
> > > > > > @@ -1234,6 +1241,8 @@ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
> > > > > > 
> > > > > >     INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
> > > > > >     INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
> > > > > > +   INIT_LIST_HEAD(&out_q_ctx->hw_queue);
> > > > > > +   INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
> > > > > >     spin_lock_init(&out_q_ctx->rdy_spinlock);
> > > > > >     spin_lock_init(&cap_q_ctx->rdy_spinlock);
> > > > > > 
> > > > > > diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
> > > > > > index d6c8eb2b5201..2342656e582d 100644
> > > > > > --- a/include/media/v4l2-mem2mem.h
> > > > > > +++ b/include/media/v4l2-mem2mem.h
> > > > > > @@ -53,9 +53,16 @@ struct v4l2_m2m_dev;
> > > > > >   * processed
> > > > > >   *
> > > > > >   * @q:             pointer to struct &vb2_queue
> > > > > > - * @rdy_queue:     List of V4L2 mem-to-mem queues
> > > > > > + * @rdy_queue:     List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
> > > > > > + *         called in struct vb2_ops->buf_queue(), the buffer enqueued
> > > > > > + *         by user would be added to this list.
> > > > > >   * @rdy_spinlock: spin lock to protect the struct usage
> > > > > >   * @num_rdy:       number of buffers ready to be processed
> > > > > > + * @hw_queue:      A list for tracking the buffer is occupied by the hardware
> > > > > > + *                 (or device's firmware). A buffer could only be in either
> > > > > > + *                 this list or @rdy_queue.
> > > > > > + *                 Driver may choose not to use this list while uses its own
> > > > > > + *                 private data to do this work.
> > > > > >   * @buffered:      is the queue buffered?
> > > > > >   *
> > > > > >   * Queue for buffers ready to be processed as soon as this
> > > > > > @@ -68,6 +75,7 @@ struct v4l2_m2m_queue_ctx {
> > > > > >     struct list_head        rdy_queue;
> > > > > >     spinlock_t              rdy_spinlock;
> > > > > >     u8                      num_rdy;
> > > > > > +   struct list_head        hw_queue;
> > > > > >     bool                    buffered;
> > > > > >  };
> > > > > > 
> > > > > > --
> > > > > > 2.17.1
> > > > > > 
> > > > 
> >
Nicolas Dufresne July 28, 2023, 4:19 p.m. UTC | #15
Le vendredi 28 juillet 2023 à 15:37 +0800, Hsia-Jun Li a écrit :
> > I think this is one reason to migrate to the stateless decoder design.
> > 
> I didn't know such plan here. I don't think the current stateless API 
> could export the reconstruction buffers for encoder or post-processing 
> buffer for decoder to us.

Someone suggested introduce auxiliary queues in our meeting in Lyon a while ago,
but I bet everyone got too busy with finalizing APIs, fixing fluster tests etc.
The suggestion felt like it would be possible to add it after the fact. This was
also being discussed in the context of supporting multi-scalers (standalone our
inline with the codec, like VC8000D+). It could also cover for primary and
secondary buffers, along with encoder primary, and reconstruction buffers, but
also auxiliary reference data. This would also be needed to properly support
Vulkan Video fwiw, and could also help with a transition to DMABuf Heaps and
memory accounting.

I've also had corridor discussion around having multi-instance media constroller
devices. It wasn't clear how to bind the media instance to the video node
instances, but assuming there is a way, it would be a tad more flexible (but
massively more complex).

Nicolas
Randy Li Aug. 3, 2023, 4:16 p.m. UTC | #16
On 2023/7/29 00:19, Nicolas Dufresne wrote:
> Le vendredi 28 juillet 2023 à 15:37 +0800, Hsia-Jun Li a écrit :
>>> I think this is one reason to migrate to the stateless decoder design.
>>>
>> I didn't know such plan here. I don't think the current stateless API
>> could export the reconstruction buffers for encoder or post-processing
>> buffer for decoder to us.
> Someone suggested introduce auxiliary queues in our meeting in Lyon a while ago,
> but I bet everyone got too busy with finalizing APIs, fixing fluster tests etc.
> The suggestion felt like it would be possible to add it after the fact. This was
> also being discussed in the context of supporting multi-scalers (standalone our
> inline with the codec, like VC8000D+). It could also cover for primary and
> secondary buffers, along with encoder primary, and reconstruction buffers, but
> also auxiliary reference data. This would also be needed to properly support
> Vulkan Video fwiw, and could also help with a transition to DMABuf Heaps and
> memory accounting.
>
> I've also had corridor discussion around having multi-instance media constroller
> devices. It wasn't clear how to bind the media instance to the video node
> instances, but assuming there is a way, it would be a tad more flexible (but
> massively more complex).

I think we should answer to those questions before we decided what we want:

A. Should a queue only has the buffers for the same format and sizes?

B. How does an application handle those drivers requests additional queue?

C. How to sync multiple buffers in a v4l2 job.

I asked the same question A when I discuss this with media: v4l2: Add 
DELETE_BUF ioctl.

If we would not add extra queue here, how does the driver filter out the 
most proper buffer for the current hardware output(CAPTURE) buffer.

If we have multiple queues in a direction, how to make driver select 
between them?


The question B is the debt we made, some applications have gotten used 
to the case they can't control the lifetime of reconstruction buffer in 
encoding or turn the post-processing off when the display pipeline could 
support tile format output.

We know allow the userspace could decide where we allocate those 
buffers, but could the userspace decided not to handle their lifetime?


The question C may more be more related to the complex case like camera 
senor and ISP. With this auxiliary queue, multiple video nodes are not 
necessary anymore.

But ISP may not request all the data finish its path, ex. the ISP are 
not satisfied with the focus point that its senor detected or the light 
level, it may just drop the image data then shot again.

Also the poll event could only tell us which direction could do the 
dequeue/enqueue work, it can't tell us which queue is ready. Should we 
introduce something likes sync point(fence fd) here?


We may lead way to V4L3 as Tomasz suggested although I don't want to 
take the risk to be. If we would make a V4L3 like thing, we have better 
to decide it correct and could handle any future problem.

>
> Nicolas
>
Nicolas Dufresne Aug. 4, 2023, 8:42 p.m. UTC | #17
Hi Randy,

Le vendredi 04 août 2023 à 00:16 +0800, Randy Li a écrit :
> On 2023/7/29 00:19, Nicolas Dufresne wrote:
> > Le vendredi 28 juillet 2023 à 15:37 +0800, Hsia-Jun Li a écrit :
> > > > I think this is one reason to migrate to the stateless decoder design.
> > > > 
> > > I didn't know such plan here. I don't think the current stateless API
> > > could export the reconstruction buffers for encoder or post-processing
> > > buffer for decoder to us.
> > Someone suggested introduce auxiliary queues in our meeting in Lyon a while ago,
> > but I bet everyone got too busy with finalizing APIs, fixing fluster tests etc.
> > The suggestion felt like it would be possible to add it after the fact. This was
> > also being discussed in the context of supporting multi-scalers (standalone our
> > inline with the codec, like VC8000D+). It could also cover for primary and
> > secondary buffers, along with encoder primary, and reconstruction buffers, but
> > also auxiliary reference data. This would also be needed to properly support
> > Vulkan Video fwiw, and could also help with a transition to DMABuf Heaps and
> > memory accounting.
> > 
> > I've also had corridor discussion around having multi-instance media constroller
> > devices. It wasn't clear how to bind the media instance to the video node
> > instances, but assuming there is a way, it would be a tad more flexible (but
> > massively more complex).
> 
> I think we should answer to those questions before we decided what we want:
> 
> A. Should a queue only has the buffers for the same format and sizes?

I initially started answering these, but ended up concluding this is more some
sort of personal notes. I believe the discussion is diverging. Remember that in
an existing API, one cannot fix all theoretical issues in one go. In order to
move forward, you have to tackle very specific use case and find a way forward.
If you are to break compatibility as much as you suggest, you'd rather look into
writing a new API.

[...]
diff mbox series

Patch

diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index c771aba42015..b4151147d5bd 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -321,15 +321,21 @@  static void __v4l2_m2m_try_queue(struct v4l2_m2m_dev *m2m_dev,
 		goto job_unlock;
 	}
 
-	src = v4l2_m2m_next_src_buf(m2m_ctx);
-	dst = v4l2_m2m_next_dst_buf(m2m_ctx);
-	if (!src && !m2m_ctx->out_q_ctx.buffered) {
-		dprintk("No input buffers available\n");
-		goto job_unlock;
+	if (list_empty(&m2m_ctx->out_q_ctx.hw_queue)) {
+		src = v4l2_m2m_next_src_buf(m2m_ctx);
+
+		if (!src && !m2m_ctx->out_q_ctx.buffered) {
+			dprintk("No input buffers available\n");
+			goto job_unlock;
+		}
 	}
-	if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
-		dprintk("No output buffers available\n");
-		goto job_unlock;
+
+	if (list_empty(&m2m_ctx->cap_q_ctx.hw_queue)) {
+		dst = v4l2_m2m_next_dst_buf(m2m_ctx);
+		if (!dst && !m2m_ctx->cap_q_ctx.buffered) {
+			dprintk("No output buffers available\n");
+			goto job_unlock;
+		}
 	}
 
 	m2m_ctx->new_frame = true;
@@ -896,6 +902,7 @@  int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 	INIT_LIST_HEAD(&q_ctx->rdy_queue);
 	q_ctx->num_rdy = 0;
 	spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
+	INIT_LIST_HEAD(&q_ctx->hw_queue);
 
 	if (m2m_dev->curr_ctx == m2m_ctx) {
 		m2m_dev->curr_ctx = NULL;
@@ -1234,6 +1241,8 @@  struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev,
 
 	INIT_LIST_HEAD(&out_q_ctx->rdy_queue);
 	INIT_LIST_HEAD(&cap_q_ctx->rdy_queue);
+	INIT_LIST_HEAD(&out_q_ctx->hw_queue);
+	INIT_LIST_HEAD(&cap_q_ctx->hw_queue);
 	spin_lock_init(&out_q_ctx->rdy_spinlock);
 	spin_lock_init(&cap_q_ctx->rdy_spinlock);
 
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index d6c8eb2b5201..2342656e582d 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -53,9 +53,16 @@  struct v4l2_m2m_dev;
  *	processed
  *
  * @q:		pointer to struct &vb2_queue
- * @rdy_queue:	List of V4L2 mem-to-mem queues
+ * @rdy_queue:	List of V4L2 mem-to-mem queues. If v4l2_m2m_buf_queue() is
+ *		called in struct vb2_ops->buf_queue(), the buffer enqueued
+ *		by user would be added to this list.
  * @rdy_spinlock: spin lock to protect the struct usage
  * @num_rdy:	number of buffers ready to be processed
+ * @hw_queue:	A list for tracking the buffer is occupied by the hardware
+ * 		(or device's firmware). A buffer could only be in either
+ * 		this list or @rdy_queue.
+ * 		Driver may choose not to use this list while uses its own
+ * 		private data to do this work.
  * @buffered:	is the queue buffered?
  *
  * Queue for buffers ready to be processed as soon as this
@@ -68,6 +75,7 @@  struct v4l2_m2m_queue_ctx {
 	struct list_head	rdy_queue;
 	spinlock_t		rdy_spinlock;
 	u8			num_rdy;
+	struct list_head	hw_queue;
 	bool			buffered;
 };