diff mbox series

media: vb2: unify calling of set_page_dirty_lock

Message ID 20170829112603.32732-1-stanimir.varbanov@linaro.org
State Accepted
Commit c0cb76589c77b9a05f6271691be8707104ff0241
Headers show
Series media: vb2: unify calling of set_page_dirty_lock | expand

Commit Message

Stanimir Varbanov Aug. 29, 2017, 11:26 a.m. UTC
Currently videobuf2-dma-sg checks for dma direction for
every single page and videobuf2-dc lacks any dma direction
checks and calls set_page_dirty_lock unconditionally.

Thus unify and align the invocations of set_page_dirty_lock
for videobuf2-dc, videobuf2-sg  memory allocators with
videobuf2-vmalloc, i.e. the pattern used in vmalloc has been
copied to dc and dma-sg.

Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>

---
 drivers/media/v4l2-core/videobuf2-dma-contig.c | 6 ++++--
 drivers/media/v4l2-core/videobuf2-dma-sg.c     | 7 +++----
 2 files changed, 7 insertions(+), 6 deletions(-)

-- 
2.11.0

Comments

Stanimir Varbanov Oct. 10, 2017, 7:42 a.m. UTC | #1
Marek,

Any comments?

On 08/29/2017 02:26 PM, Stanimir Varbanov wrote:
> Currently videobuf2-dma-sg checks for dma direction for

> every single page and videobuf2-dc lacks any dma direction

> checks and calls set_page_dirty_lock unconditionally.

> 

> Thus unify and align the invocations of set_page_dirty_lock

> for videobuf2-dc, videobuf2-sg  memory allocators with

> videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> copied to dc and dma-sg.

> 

> Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>

> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>

> ---

>  drivers/media/v4l2-core/videobuf2-dma-contig.c | 6 ++++--

>  drivers/media/v4l2-core/videobuf2-dma-sg.c     | 7 +++----

>  2 files changed, 7 insertions(+), 6 deletions(-)

> 

> diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c

> index 9f389f36566d..696e24f9128d 100644

> --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c

> +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c

> @@ -434,8 +434,10 @@ static void vb2_dc_put_userptr(void *buf_priv)

>  		pages = frame_vector_pages(buf->vec);

>  		/* sgt should exist only if vector contains pages... */

>  		BUG_ON(IS_ERR(pages));

> -		for (i = 0; i < frame_vector_count(buf->vec); i++)

> -			set_page_dirty_lock(pages[i]);

> +		if (buf->dma_dir == DMA_FROM_DEVICE ||

> +		    buf->dma_dir == DMA_BIDIRECTIONAL)

> +			for (i = 0; i < frame_vector_count(buf->vec); i++)

> +				set_page_dirty_lock(pages[i]);

>  		sg_free_table(sgt);

>  		kfree(sgt);

>  	}

> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c

> index 6808231a6bdc..753ed3138dcc 100644

> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c

> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c

> @@ -292,11 +292,10 @@ static void vb2_dma_sg_put_userptr(void *buf_priv)

>  	if (buf->vaddr)

>  		vm_unmap_ram(buf->vaddr, buf->num_pages);

>  	sg_free_table(buf->dma_sgt);

> -	while (--i >= 0) {

> -		if (buf->dma_dir == DMA_FROM_DEVICE ||

> -		    buf->dma_dir == DMA_BIDIRECTIONAL)

> +	if (buf->dma_dir == DMA_FROM_DEVICE ||

> +	    buf->dma_dir == DMA_BIDIRECTIONAL)

> +		while (--i >= 0)

>  			set_page_dirty_lock(buf->pages[i]);

> -	}

>  	vb2_destroy_framevec(buf->vec);

>  	kfree(buf);

>  }

> 


-- 
regards,
Stan
Marek Szyprowski Oct. 10, 2017, 8:01 a.m. UTC | #2
Hi Stanimir,

On 2017-10-10 09:42, Stanimir Varbanov wrote:
> Marek,

>

> Any comments?


Oh, I thought that this one has been already merged. If not (yet),
here is my ack.

> On 08/29/2017 02:26 PM, Stanimir Varbanov wrote:

>> Currently videobuf2-dma-sg checks for dma direction for

>> every single page and videobuf2-dc lacks any dma direction

>> checks and calls set_page_dirty_lock unconditionally.

>>

>> Thus unify and align the invocations of set_page_dirty_lock

>> for videobuf2-dc, videobuf2-sg  memory allocators with

>> videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

>> copied to dc and dma-sg.

>>

>> Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>

>> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>


Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>


>> ---

>>   drivers/media/v4l2-core/videobuf2-dma-contig.c | 6 ++++--

>>   drivers/media/v4l2-core/videobuf2-dma-sg.c     | 7 +++----

>>   2 files changed, 7 insertions(+), 6 deletions(-)

>>

>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c

>> index 9f389f36566d..696e24f9128d 100644

>> --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c

>> +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c

>> @@ -434,8 +434,10 @@ static void vb2_dc_put_userptr(void *buf_priv)

>>   		pages = frame_vector_pages(buf->vec);

>>   		/* sgt should exist only if vector contains pages... */

>>   		BUG_ON(IS_ERR(pages));

>> -		for (i = 0; i < frame_vector_count(buf->vec); i++)

>> -			set_page_dirty_lock(pages[i]);

>> +		if (buf->dma_dir == DMA_FROM_DEVICE ||

>> +		    buf->dma_dir == DMA_BIDIRECTIONAL)

>> +			for (i = 0; i < frame_vector_count(buf->vec); i++)

>> +				set_page_dirty_lock(pages[i]);

>>   		sg_free_table(sgt);

>>   		kfree(sgt);

>>   	}

>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c

>> index 6808231a6bdc..753ed3138dcc 100644

>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c

>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c

>> @@ -292,11 +292,10 @@ static void vb2_dma_sg_put_userptr(void *buf_priv)

>>   	if (buf->vaddr)

>>   		vm_unmap_ram(buf->vaddr, buf->num_pages);

>>   	sg_free_table(buf->dma_sgt);

>> -	while (--i >= 0) {

>> -		if (buf->dma_dir == DMA_FROM_DEVICE ||

>> -		    buf->dma_dir == DMA_BIDIRECTIONAL)

>> +	if (buf->dma_dir == DMA_FROM_DEVICE ||

>> +	    buf->dma_dir == DMA_BIDIRECTIONAL)

>> +		while (--i >= 0)

>>   			set_page_dirty_lock(buf->pages[i]);

>> -	}

>>   	vb2_destroy_framevec(buf->vec);

>>   	kfree(buf);

>>   }

>>


Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Sakari Ailus Oct. 10, 2017, 8:54 a.m. UTC | #3
On Tue, Oct 10, 2017 at 10:01:36AM +0200, Marek Szyprowski wrote:
> Hi Stanimir,

> 

> On 2017-10-10 09:42, Stanimir Varbanov wrote:

> > Marek,

> > 

> > Any comments?

> 

> Oh, I thought that this one has been already merged. If not (yet),

> here is my ack.

> 

> > On 08/29/2017 02:26 PM, Stanimir Varbanov wrote:

> > > Currently videobuf2-dma-sg checks for dma direction for

> > > every single page and videobuf2-dc lacks any dma direction

> > > checks and calls set_page_dirty_lock unconditionally.

> > > 

> > > Thus unify and align the invocations of set_page_dirty_lock

> > > for videobuf2-dc, videobuf2-sg  memory allocators with

> > > videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> > > copied to dc and dma-sg.

> > > 

> > > Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>

> > > Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>

> 

> Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>


Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>


-- 
Sakari Ailus
e-mail: sakari.ailus@iki.fi
Nicolas Dufresne Oct. 10, 2017, 3:40 p.m. UTC | #4
Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :
> Currently videobuf2-dma-sg checks for dma direction for

> every single page and videobuf2-dc lacks any dma direction

> checks and calls set_page_dirty_lock unconditionally.

> 

> Thus unify and align the invocations of set_page_dirty_lock

> for videobuf2-dc, videobuf2-sg  memory allocators with

> videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> copied to dc and dma-sg.


Just before we go too far in "doing like vmalloc", I would like to
share this small video that display coherency issues when rendering
vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce this
easily with Intel and MSM display drivers using UVC or Vivid as source.

The following is an HDMI capture of the following GStreamer pipeline
running on Dragonboard 410c.

    gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-raw,format=NV16,width=1280,height=720 ! kmssink
    https://people.collabora.com/~nicolas/vmalloc-issue.mov

Feedback on this issue would be more then welcome. It's not clear to me
who's bug is this (v4l2, drm or iommu). The software is unlikely to be
blamed as this same pipeline works fine with non-vmalloc based sources.

regards,
Nicolas

> 

> Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>

> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>

> ---

>  drivers/media/v4l2-core/videobuf2-dma-contig.c | 6 ++++--

>  drivers/media/v4l2-core/videobuf2-dma-sg.c     | 7 +++----

>  2 files changed, 7 insertions(+), 6 deletions(-)

> 

> diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c

> index 9f389f36566d..696e24f9128d 100644

> --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c

> +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c

> @@ -434,8 +434,10 @@ static void vb2_dc_put_userptr(void *buf_priv)

>  		pages = frame_vector_pages(buf->vec);

>  		/* sgt should exist only if vector contains pages... */

>  		BUG_ON(IS_ERR(pages));

> -		for (i = 0; i < frame_vector_count(buf->vec); i++)

> -			set_page_dirty_lock(pages[i]);

> +		if (buf->dma_dir == DMA_FROM_DEVICE ||

> +		    buf->dma_dir == DMA_BIDIRECTIONAL)

> +			for (i = 0; i < frame_vector_count(buf->vec); i++)

> +				set_page_dirty_lock(pages[i]);

>  		sg_free_table(sgt);

>  		kfree(sgt);

>  	}

> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c

> index 6808231a6bdc..753ed3138dcc 100644

> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c

> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c

> @@ -292,11 +292,10 @@ static void vb2_dma_sg_put_userptr(void *buf_priv)

>  	if (buf->vaddr)

>  		vm_unmap_ram(buf->vaddr, buf->num_pages);

>  	sg_free_table(buf->dma_sgt);

> -	while (--i >= 0) {

> -		if (buf->dma_dir == DMA_FROM_DEVICE ||

> -		    buf->dma_dir == DMA_BIDIRECTIONAL)

> +	if (buf->dma_dir == DMA_FROM_DEVICE ||

> +	    buf->dma_dir == DMA_BIDIRECTIONAL)

> +		while (--i >= 0)

>  			set_page_dirty_lock(buf->pages[i]);

> -	}

>  	vb2_destroy_framevec(buf->vec);

>  	kfree(buf);

>  }
Sakari Ailus Oct. 15, 2017, 8:40 p.m. UTC | #5
Hi Nicolas,

On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne wrote:
> Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :

> > Currently videobuf2-dma-sg checks for dma direction for

> > every single page and videobuf2-dc lacks any dma direction

> > checks and calls set_page_dirty_lock unconditionally.

> > 

> > Thus unify and align the invocations of set_page_dirty_lock

> > for videobuf2-dc, videobuf2-sg  memory allocators with

> > videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> > copied to dc and dma-sg.

> 

> Just before we go too far in "doing like vmalloc", I would like to

> share this small video that display coherency issues when rendering

> vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce this

> easily with Intel and MSM display drivers using UVC or Vivid as source.

> 

> The following is an HDMI capture of the following GStreamer pipeline

> running on Dragonboard 410c.

> 

>     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-raw,format=NV16,width=1280,height=720 ! kmssink

>     https://people.collabora.com/~nicolas/vmalloc-issue.mov

> 

> Feedback on this issue would be more then welcome. It's not clear to me

> who's bug is this (v4l2, drm or iommu). The software is unlikely to be

> blamed as this same pipeline works fine with non-vmalloc based sources.


Could you elaborate this a little bit more? Which Intel CPU do you have
there?

Where are the buffers allocated for this GStreamer pipeline, is it v4l2src
or another element or somewhere else?

-- 
Kind regards,

Sakari Ailus
e-mail: sakari.ailus@iki.fi
Nicolas Dufresne Oct. 15, 2017, 11:09 p.m. UTC | #6
Le dimanche 15 octobre 2017 à 23:40 +0300, Sakari Ailus a écrit :
> Hi Nicolas,

> 

> On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne wrote:

> > Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :

> > > Currently videobuf2-dma-sg checks for dma direction for

> > > every single page and videobuf2-dc lacks any dma direction

> > > checks and calls set_page_dirty_lock unconditionally.

> > > 

> > > Thus unify and align the invocations of set_page_dirty_lock

> > > for videobuf2-dc, videobuf2-sg  memory allocators with

> > > videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> > > copied to dc and dma-sg.

> > 

> > Just before we go too far in "doing like vmalloc", I would like to

> > share this small video that display coherency issues when rendering

> > vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce

> > this

> > easily with Intel and MSM display drivers using UVC or Vivid as

> > source.

> > 

> > The following is an HDMI capture of the following GStreamer

> > pipeline

> > running on Dragonboard 410c.

> > 

> >     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-

> > raw,format=NV16,width=1280,height=720 ! kmssink

> >     https://people.collabora.com/~nicolas/vmalloc-issue.mov

> > 

> > Feedback on this issue would be more then welcome. It's not clear

> > to me

> > who's bug is this (v4l2, drm or iommu). The software is unlikely to

> > be

> > blamed as this same pipeline works fine with non-vmalloc based

> > sources.

> 

> Could you elaborate this a little bit more? Which Intel CPU do you

> have

> there?


I have tested with Skylake and Ivy Bridge and on Dragonboard 410c
(Qualcomm APQ8016 SoC) (same visual artefact)

> 

> Where are the buffers allocated for this GStreamer pipeline, is it

> v4l2src

> or another element or somewhere else?


This is from V4L2 capture driver, exported as DMABuf, drivers are UVC
and VIVID, both are using the vmalloc allocator.

Nicolas
Sakari Ailus Oct. 16, 2017, 11:24 a.m. UTC | #7
On Sun, Oct 15, 2017 at 07:09:24PM -0400, Nicolas Dufresne wrote:
> Le dimanche 15 octobre 2017 à 23:40 +0300, Sakari Ailus a écrit :

> > Hi Nicolas,

> > 

> > On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne wrote:

> > > Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :

> > > > Currently videobuf2-dma-sg checks for dma direction for

> > > > every single page and videobuf2-dc lacks any dma direction

> > > > checks and calls set_page_dirty_lock unconditionally.

> > > > 

> > > > Thus unify and align the invocations of set_page_dirty_lock

> > > > for videobuf2-dc, videobuf2-sg  memory allocators with

> > > > videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> > > > copied to dc and dma-sg.

> > > 

> > > Just before we go too far in "doing like vmalloc", I would like to

> > > share this small video that display coherency issues when rendering

> > > vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce

> > > this

> > > easily with Intel and MSM display drivers using UVC or Vivid as

> > > source.

> > > 

> > > The following is an HDMI capture of the following GStreamer

> > > pipeline

> > > running on Dragonboard 410c.

> > > 

> > >     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-

> > > raw,format=NV16,width=1280,height=720 ! kmssink

> > >     https://people.collabora.com/~nicolas/vmalloc-issue.mov

> > > 

> > > Feedback on this issue would be more then welcome. It's not clear

> > > to me

> > > who's bug is this (v4l2, drm or iommu). The software is unlikely to

> > > be

> > > blamed as this same pipeline works fine with non-vmalloc based

> > > sources.

> > 

> > Could you elaborate this a little bit more? Which Intel CPU do you

> > have

> > there?

> 

> I have tested with Skylake and Ivy Bridge and on Dragonboard 410c

> (Qualcomm APQ8016 SoC) (same visual artefact)


Do you still have both corrupted and uncompressed frames around? Those
would be interesting to look at.

-- 
Sakari Ailus
e-mail: sakari.ailus@iki.fi
Sakari Ailus Oct. 17, 2017, 10:14 a.m. UTC | #8
On Sun, Oct 15, 2017 at 07:09:24PM -0400, Nicolas Dufresne wrote:
> Le dimanche 15 octobre 2017 à 23:40 +0300, Sakari Ailus a écrit :

> > Hi Nicolas,

> > 

> > On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne wrote:

> > > Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :

> > > > Currently videobuf2-dma-sg checks for dma direction for

> > > > every single page and videobuf2-dc lacks any dma direction

> > > > checks and calls set_page_dirty_lock unconditionally.

> > > > 

> > > > Thus unify and align the invocations of set_page_dirty_lock

> > > > for videobuf2-dc, videobuf2-sg  memory allocators with

> > > > videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> > > > copied to dc and dma-sg.

> > > 

> > > Just before we go too far in "doing like vmalloc", I would like to

> > > share this small video that display coherency issues when rendering

> > > vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce

> > > this

> > > easily with Intel and MSM display drivers using UVC or Vivid as

> > > source.

> > > 

> > > The following is an HDMI capture of the following GStreamer

> > > pipeline

> > > running on Dragonboard 410c.

> > > 

> > >     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-

> > > raw,format=NV16,width=1280,height=720 ! kmssink

> > >     https://people.collabora.com/~nicolas/vmalloc-issue.mov

> > > 

> > > Feedback on this issue would be more then welcome. It's not clear

> > > to me

> > > who's bug is this (v4l2, drm or iommu). The software is unlikely to

> > > be

> > > blamed as this same pipeline works fine with non-vmalloc based

> > > sources.

> > 

> > Could you elaborate this a little bit more? Which Intel CPU do you

> > have

> > there?

> 

> I have tested with Skylake and Ivy Bridge and on Dragonboard 410c

> (Qualcomm APQ8016 SoC) (same visual artefact)


I presume kmssink draws on the display. Which GPU did you use?

-- 
Sakari Ailus
e-mail: sakari.ailus@iki.fi
Nicolas Dufresne Oct. 17, 2017, 2:19 p.m. UTC | #9
Le mardi 17 octobre 2017 à 13:14 +0300, Sakari Ailus a écrit :
> On Sun, Oct 15, 2017 at 07:09:24PM -0400, Nicolas Dufresne wrote:

> > Le dimanche 15 octobre 2017 à 23:40 +0300, Sakari Ailus a écrit :

> > > Hi Nicolas,

> > > 

> > > On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne wrote:

> > > > Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :

> > > > > Currently videobuf2-dma-sg checks for dma direction for

> > > > > every single page and videobuf2-dc lacks any dma direction

> > > > > checks and calls set_page_dirty_lock unconditionally.

> > > > > 

> > > > > Thus unify and align the invocations of set_page_dirty_lock

> > > > > for videobuf2-dc, videobuf2-sg  memory allocators with

> > > > > videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

> > > > > copied to dc and dma-sg.

> > > > 

> > > > Just before we go too far in "doing like vmalloc", I would like to

> > > > share this small video that display coherency issues when rendering

> > > > vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce

> > > > this

> > > > easily with Intel and MSM display drivers using UVC or Vivid as

> > > > source.

> > > > 

> > > > The following is an HDMI capture of the following GStreamer

> > > > pipeline

> > > > running on Dragonboard 410c.

> > > > 

> > > >     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-

> > > > raw,format=NV16,width=1280,height=720 ! kmssink

> > > >     https://people.collabora.com/~nicolas/vmalloc-issue.mov

> > > > 

> > > > Feedback on this issue would be more then welcome. It's not clear

> > > > to me

> > > > who's bug is this (v4l2, drm or iommu). The software is unlikely to

> > > > be

> > > > blamed as this same pipeline works fine with non-vmalloc based

> > > > sources.

> > > 

> > > Could you elaborate this a little bit more? Which Intel CPU do you

> > > have

> > > there?

> > 

> > I have tested with Skylake and Ivy Bridge and on Dragonboard 410c

> > (Qualcomm APQ8016 SoC) (same visual artefact)

> 

> I presume kmssink draws on the display. Which GPU did you use?


In order, GPU will be Iris Pro 580, Intel® Ivybridge Mobile and an
Adreno (3x ?). Why does it matter ? I'm pretty sure the GPU is not used
on the DB410c for this use case.

regards,
Nicolas
Stanimir Varbanov Oct. 18, 2017, 8:34 a.m. UTC | #10
On 10/17/2017 05:19 PM, Nicolas Dufresne wrote:
> Le mardi 17 octobre 2017 à 13:14 +0300, Sakari Ailus a écrit :

>> On Sun, Oct 15, 2017 at 07:09:24PM -0400, Nicolas Dufresne wrote:

>>> Le dimanche 15 octobre 2017 à 23:40 +0300, Sakari Ailus a écrit :

>>>> Hi Nicolas,

>>>>

>>>> On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne wrote:

>>>>> Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a écrit :

>>>>>> Currently videobuf2-dma-sg checks for dma direction for

>>>>>> every single page and videobuf2-dc lacks any dma direction

>>>>>> checks and calls set_page_dirty_lock unconditionally.

>>>>>>

>>>>>> Thus unify and align the invocations of set_page_dirty_lock

>>>>>> for videobuf2-dc, videobuf2-sg  memory allocators with

>>>>>> videobuf2-vmalloc, i.e. the pattern used in vmalloc has been

>>>>>> copied to dc and dma-sg.

>>>>>

>>>>> Just before we go too far in "doing like vmalloc", I would like to

>>>>> share this small video that display coherency issues when rendering

>>>>> vmalloc backed DMABuf over various KMS/DRM driver. I can reproduce

>>>>> this

>>>>> easily with Intel and MSM display drivers using UVC or Vivid as

>>>>> source.

>>>>>

>>>>> The following is an HDMI capture of the following GStreamer

>>>>> pipeline

>>>>> running on Dragonboard 410c.

>>>>>

>>>>>     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-

>>>>> raw,format=NV16,width=1280,height=720 ! kmssink

>>>>>     https://people.collabora.com/~nicolas/vmalloc-issue.mov

>>>>>

>>>>> Feedback on this issue would be more then welcome. It's not clear

>>>>> to me

>>>>> who's bug is this (v4l2, drm or iommu). The software is unlikely to

>>>>> be

>>>>> blamed as this same pipeline works fine with non-vmalloc based

>>>>> sources.

>>>>

>>>> Could you elaborate this a little bit more? Which Intel CPU do you

>>>> have

>>>> there?

>>>

>>> I have tested with Skylake and Ivy Bridge and on Dragonboard 410c

>>> (Qualcomm APQ8016 SoC) (same visual artefact)

>>

>> I presume kmssink draws on the display. Which GPU did you use?

> 

> In order, GPU will be Iris Pro 580, Intel® Ivybridge Mobile and an

> Adreno (3x ?). Why does it matter ? I'm pretty sure the GPU is not used

> on the DB410c for this use case.


Nicolas, for me this looks like a problem in v4l2. In the case of vivid
the stats overlay (where the coherency issues are observed, and most
probably the issue will be observed on the whole image but fortunately
it is a static image pattern) are filled by the CPU but I cannot see
where the cache is flushed. Also I'm wondering why .finish method is
missing for dma-vmalloc mem_ops.

To be sure that the problem is in vmalloc v4l2 allocator, could you
change the allocator to dma-contig, there is a module param for that
called 'allocators'.


-- 
regards,
Stan
Nicolas Dufresne March 14, 2018, 12:44 a.m. UTC | #11
Le mercredi 18 octobre 2017 à 11:34 +0300, Stanimir Varbanov a écrit :
> 

> On 10/17/2017 05:19 PM, Nicolas Dufresne wrote:

> > Le mardi 17 octobre 2017 à 13:14 +0300, Sakari Ailus a écrit :

> > > On Sun, Oct 15, 2017 at 07:09:24PM -0400, Nicolas Dufresne wrote:

> > > > Le dimanche 15 octobre 2017 à 23:40 +0300, Sakari Ailus a écrit

> > > > :

> > > > > Hi Nicolas,

> > > > > 

> > > > > On Tue, Oct 10, 2017 at 11:40:10AM -0400, Nicolas Dufresne

> > > > > wrote:

> > > > > > Le mardi 29 août 2017 à 14:26 +0300, Stanimir Varbanov a

> > > > > > écrit :

> > > > > > > Currently videobuf2-dma-sg checks for dma direction for

> > > > > > > every single page and videobuf2-dc lacks any dma

> > > > > > > direction

> > > > > > > checks and calls set_page_dirty_lock unconditionally.

> > > > > > > 

> > > > > > > Thus unify and align the invocations of

> > > > > > > set_page_dirty_lock

> > > > > > > for videobuf2-dc, videobuf2-sg  memory allocators with

> > > > > > > videobuf2-vmalloc, i.e. the pattern used in vmalloc has

> > > > > > > been

> > > > > > > copied to dc and dma-sg.

> > > > > > 

> > > > > > Just before we go too far in "doing like vmalloc", I would

> > > > > > like to

> > > > > > share this small video that display coherency issues when

> > > > > > rendering

> > > > > > vmalloc backed DMABuf over various KMS/DRM driver. I can

> > > > > > reproduce

> > > > > > this

> > > > > > easily with Intel and MSM display drivers using UVC or

> > > > > > Vivid as

> > > > > > source.

> > > > > > 

> > > > > > The following is an HDMI capture of the following GStreamer

> > > > > > pipeline

> > > > > > running on Dragonboard 410c.

> > > > > > 

> > > > > >     gst-launch-1.0 -v v4l2src device=/dev/video2 ! video/x-

> > > > > > raw,format=NV16,width=1280,height=720 ! kmssink

> > > > > >     https://people.collabora.com/~nicolas/vmalloc-issue.mov

> > > > > > 

> > > > > > Feedback on this issue would be more then welcome. It's not

> > > > > > clear

> > > > > > to me

> > > > > > who's bug is this (v4l2, drm or iommu). The software is

> > > > > > unlikely to

> > > > > > be

> > > > > > blamed as this same pipeline works fine with non-vmalloc

> > > > > > based

> > > > > > sources.

> > > > > 

> > > > > Could you elaborate this a little bit more? Which Intel CPU

> > > > > do you

> > > > > have

> > > > > there?

> > > > 

> > > > I have tested with Skylake and Ivy Bridge and on Dragonboard

> > > > 410c

> > > > (Qualcomm APQ8016 SoC) (same visual artefact)

> > > 

> > > I presume kmssink draws on the display. Which GPU did you use?

> > 

> > In order, GPU will be Iris Pro 580, Intel® Ivybridge Mobile and an

> > Adreno (3x ?). Why does it matter ? I'm pretty sure the GPU is not

> > used

> > on the DB410c for this use case.

> 

> Nicolas, for me this looks like a problem in v4l2. In the case of

> vivid

> the stats overlay (where the coherency issues are observed, and most

> probably the issue will be observed on the whole image but

> fortunately

> it is a static image pattern) are filled by the CPU but I cannot see

> where the cache is flushed. Also I'm wondering why .finish method is

> missing for dma-vmalloc mem_ops.

> 

> To be sure that the problem is in vmalloc v4l2 allocator, could you

> change the allocator to dma-contig, there is a module param for that

> called 'allocators'.


I've looked into this again. I have hit the same issue but with CPU to
DRM, using DMABuf allocated from DRM Dumb buffers. In that case, using
DMA_BUF_IOCTL_SYNC fixes the issues.

This raises a lot of question around the model used in V4L2. As you
mention, prepare/finish are missing in dma-vmalloc mem_ops. I'll give a
try implementing that, it should cover my initial use case, but then I
believe it will fail if my pipeline is:

  UVC -> in plane CPU modification -> DRM

Because we don't implement begin/end_cpu_access on our exported DMABuf.
It should also fail for the following use case:

  UVC (importer) -> DRM

UVC driver won't call the remote dmabuf being/end_cpu_access method.
This one is difficult because UVC driver and vivid don't seem to be
aware of being an importer, exported or simply exporting to CPU
(through mmap). I believe what we have now pretty much assumes the what
we export as vmalloc is to be used by CPU only. Also, the usual
direction used by prepare/finish ops won't work for drivers like vivid
and UVC that write into the buffers using the cpu.

To be continued ...

Nicolas
Nicolas Dufresne March 14, 2018, 2:02 a.m. UTC | #12
Le mardi 13 mars 2018 à 21:09 -0400, Nicolas Dufresne a écrit :
> > I've looked into this again. I have hit the same issue but with CPU

> > to

> > DRM, using DMABuf allocated from DRM Dumb buffers. In that case,

> > using

> > DMA_BUF_IOCTL_SYNC fixes the issues.

> > 

> > This raises a lot of question around the model used in V4L2. As you

> > mention, prepare/finish are missing in dma-vmalloc mem_ops. I'll

> > give

> > a

> > try implementing that, it should cover my initial use case, but

> > then

> > I

> > believe it will fail if my pipeline is:

> > 

> >    UVC -> in plane CPU modification -> DRM

> > 

> > Because we don't implement begin/end_cpu_access on our exported

> > DMABuf.

> > It should also fail for the following use case:

> > 

> >    UVC (importer) -> DRM

> > 

> > UVC driver won't call the remote dmabuf being/end_cpu_access

> > method.

> > This one is difficult because UVC driver and vivid don't seem to be

> > aware of being an importer, exported or simply exporting to CPU

> > (through mmap). I believe what we have now pretty much assumes the

> > what

> > we export as vmalloc is to be used by CPU only. Also, the usual

> > direction used by prepare/finish ops won't work for drivers like

> > vivid

> > and UVC that write into the buffers using the cpu.

> > 

> > To be continued ...

> 

> While I was writing that, I was already outdated, as of now, we only

> have one ops, called sync. This implements the to_cpu direction only.


Replying to myself again, obviously looking at the old videobuf code
can only get one confused.

Nicolas
diff mbox series

Patch

diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 9f389f36566d..696e24f9128d 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -434,8 +434,10 @@  static void vb2_dc_put_userptr(void *buf_priv)
 		pages = frame_vector_pages(buf->vec);
 		/* sgt should exist only if vector contains pages... */
 		BUG_ON(IS_ERR(pages));
-		for (i = 0; i < frame_vector_count(buf->vec); i++)
-			set_page_dirty_lock(pages[i]);
+		if (buf->dma_dir == DMA_FROM_DEVICE ||
+		    buf->dma_dir == DMA_BIDIRECTIONAL)
+			for (i = 0; i < frame_vector_count(buf->vec); i++)
+				set_page_dirty_lock(pages[i]);
 		sg_free_table(sgt);
 		kfree(sgt);
 	}
diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index 6808231a6bdc..753ed3138dcc 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -292,11 +292,10 @@  static void vb2_dma_sg_put_userptr(void *buf_priv)
 	if (buf->vaddr)
 		vm_unmap_ram(buf->vaddr, buf->num_pages);
 	sg_free_table(buf->dma_sgt);
-	while (--i >= 0) {
-		if (buf->dma_dir == DMA_FROM_DEVICE ||
-		    buf->dma_dir == DMA_BIDIRECTIONAL)
+	if (buf->dma_dir == DMA_FROM_DEVICE ||
+	    buf->dma_dir == DMA_BIDIRECTIONAL)
+		while (--i >= 0)
 			set_page_dirty_lock(buf->pages[i]);
-	}
 	vb2_destroy_framevec(buf->vec);
 	kfree(buf);
 }