diff mbox

[08/11] v4l: vb2-dma-contig: add support for scatterlist in userptr mode

Message ID 1333634408-4960-9-git-send-email-t.stanislaws@samsung.com
State Superseded, archived
Headers show

Commit Message

Tomasz Stanislawski April 5, 2012, 2 p.m. UTC
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>

This patch introduces usage of dma_map_sg to map memory behind
a userspace pointer to a device as dma-contiguous mapping.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
	[bugfixing]
Signed-off-by: Kamil Debski <k.debski@samsung.com>
	[bugfixing]
Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
	[add sglist subroutines/code refactoring]
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/media/video/videobuf2-dma-contig.c |  282 ++++++++++++++++++++++++++--
 1 files changed, 265 insertions(+), 17 deletions(-)

Comments

Laurent Pinchart April 6, 2012, 3:02 p.m. UTC | #1
Hi Tomasz,

On Thursday 05 April 2012 16:00:05 Tomasz Stanislawski wrote:
> From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
> 
> This patch introduces usage of dma_map_sg to map memory behind
> a userspace pointer to a device as dma-contiguous mapping.
> 
> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> 	[bugfixing]
> Signed-off-by: Kamil Debski <k.debski@samsung.com>
> 	[bugfixing]
> Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
> 	[add sglist subroutines/code refactoring]
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> ---
>  drivers/media/video/videobuf2-dma-contig.c |  282 +++++++++++++++++++++++--
>  1 files changed, 265 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/media/video/videobuf2-dma-contig.c
> b/drivers/media/video/videobuf2-dma-contig.c index 476e536..6ab3165 100644
> --- a/drivers/media/video/videobuf2-dma-contig.c
> +++ b/drivers/media/video/videobuf2-dma-contig.c

[snip]

> @@ -32,6 +36,98 @@ struct vb2_dc_buf {
>  };
> 
>  /*********************************************/
> +/*        scatterlist table functions        */
> +/*********************************************/
> +
> +static struct sg_table *vb2_dc_pages_to_sgt(struct page **pages,
> +	unsigned long n_pages, size_t offset, size_t offset2)

"offset2" isn't very descriptive. I would replace it with the total size of 
the buffer instead (or, alternatively, rename offset to offset_start and 
offset2 to offset_end, but I like the first option better).

> +{
> +	struct sg_table *sgt;
> +	int i, j; /* loop counters */

I don't think the comment is needed.

> +	int cur_page, chunks;

i, j, cur_page and chunks can't be negative. Could you please make them 
unsigned int (and I would order them) ?

Also, Documentation/CodingStyle favors one variable declaration per line, 
without commas for multiple declarations.

> +	int ret;
> +	struct scatterlist *s;
> +
> +	sgt = kzalloc(sizeof *sgt, GFP_KERNEL);
> +	if (!sgt)
> +		return ERR_PTR(-ENOMEM);
> +
> +	/* compute number of chunks */
> +	chunks = 1;
> +	for (i = 1; i < n_pages; ++i)
> +		if (pages[i] != pages[i - 1] + 1)
> +			++chunks;
> +
> +	ret = sg_alloc_table(sgt, chunks, GFP_KERNEL);
> +	if (ret) {
> +		kfree(sgt);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	/* merging chunks and putting them into the scatterlist */
> +	cur_page = 0;
> +	for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
> +		size_t size = PAGE_SIZE;
> +
> +		for (j = cur_page + 1; j < n_pages; ++j) {
> +			if (pages[j] != pages[j - 1] + 1)
> +				break;
> +			size += PAGE_SIZE;
> +		}
> +
> +		/* cut offset if chunk starts at the first page */
> +		if (cur_page == 0)
> +			size -= offset;
> +		/* cut offset2 if chunk ends at the last page */
> +		if (j == n_pages)
> +			size -= offset2;
> +
> +		sg_set_page(s, pages[cur_page], size, offset);
> +		offset = 0;
> +		cur_page = j;
> +	}
> +
> +	return sgt;
> +}
> +
> +static void vb2_dc_release_sgtable(struct sg_table *sgt)
> +{
> +	sg_free_table(sgt);
> +	kfree(sgt);
> +}
> +
> +static void vb2_dc_sgt_foreach_page(struct sg_table *sgt,
> +	void (*cb)(struct page *pg))
> +{
> +	struct scatterlist *s;
> +	int i, j;
> +
> +	for_each_sg(sgt->sgl, s, sgt->nents, i) {
> +		struct page *page = sg_page(s);
> +		int n_pages = PAGE_ALIGN(s->offset + s->length) >> PAGE_SHIFT;
> +
> +		for (j = 0; j < n_pages; ++j, ++page)
> +			cb(page);

Same for i, j and n_pages here.

> +	}
> +}
> +
> +static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
> +{
> +	struct scatterlist *s;
> +	dma_addr_t expected = sg_dma_address(sgt->sgl);
> +	int i;

Same for i here.

> +	unsigned long size = 0;
> +
> +	for_each_sg(sgt->sgl, s, sgt->nents, i) {
> +		if (sg_dma_address(s) != expected)
> +			break;
> +		expected = sg_dma_address(s) + sg_dma_len(s);
> +		size += sg_dma_len(s);
> +	}
> +	return size;
> +}
> +
> +/*********************************************/
>  /*         callbacks for all buffers         */
>  /*********************************************/
> 
> @@ -116,42 +212,194 @@ static int vb2_dc_mmap(void *buf_priv, struct
> vm_area_struct *vma) /*       callbacks for USERPTR buffers       */
>  /*********************************************/
> 
> +static inline int vma_is_io(struct vm_area_struct *vma)
> +{
> +	return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
> +}
> +
> +static struct vm_area_struct *vb2_dc_get_user_vma(
> +	unsigned long start, unsigned long size)
> +{
> +	struct vm_area_struct *vma;
> +
> +	/* current->mm->mmap_sem is taken by videobuf2 core */
> +	vma = find_vma(current->mm, start);
> +	if (!vma) {
> +		printk(KERN_ERR "no vma for address %lu\n", start);
> +		return ERR_PTR(-EFAULT);
> +	}
> +
> +	if (vma->vm_end - vma->vm_start < size) {
> +		printk(KERN_ERR "vma at %lu is too small for %lu bytes\n",
> +			start, size);
> +		return ERR_PTR(-EFAULT);
> +	}

Should we support multiple VMAs, or do you think that's not worth it ?

> +	vma = vb2_get_vma(vma);
> +	if (!vma) {
> +		printk(KERN_ERR "failed to copy vma\n");
> +		return ERR_PTR(-ENOMEM);
> +	}

I still think there's no need to copy the VMA. get_user_pages() will make sure 
the memory doesn't get paged out, and we don't need to ensure that the 
userspace mapping stays in place as our cache operations use a scatter list. 
Storing the result of vma_is_io() in vb2_dc_buf should be enough.

> +	return vma;
> +}
> +
> +static int vb2_dc_get_user_pages(unsigned long start, struct page **pages,
> +	int n_pages, struct vm_area_struct *vma, int write)
> +{
> +	int n;

n_pages and n can be unsigned (and I would rename n to i, to be coherent with 
the rest of the file).

> +
> +	if (vma_is_io(vma)) {
> +		for (n = 0; n < n_pages; ++n, start += PAGE_SIZE) {
> +			unsigned long pfn;
> +			int ret = follow_pfn(vma, start, &pfn);
> +
> +			if (ret) {
> +				printk(KERN_ERR "no page for address %lu\n",
> +					start);
> +				return ret;
> +			}
> +			pages[n] = pfn_to_page(pfn);
> +		}
> +	} else {
> +		n = get_user_pages(current, current->mm, start & PAGE_MASK,
> +			n_pages, write, 1, pages, NULL);
> +		if (n != n_pages) {
> +			printk(KERN_ERR "got only %d of %d user pages\n",
> +				n, n_pages);
> +			while (n)
> +				put_page(pages[--n]);
> +			return -EFAULT;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void vb2_dc_set_page_dirty(struct page *page)
> +{
> +	set_page_dirty_lock(page);
> +}
> +
> +static void vb2_dc_put_userptr(void *buf_priv)
> +{
> +	struct vb2_dc_buf *buf = buf_priv;
> +	struct sg_table *sgt = buf->dma_sgt;
> +
> +	dma_unmap_sg(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);
> +	if (!vma_is_io(buf->vma)) {
> +		vb2_dc_sgt_foreach_page(sgt, vb2_dc_set_page_dirty);
> +		vb2_dc_sgt_foreach_page(sgt, put_page);

This results in two iterations over the pages. Wouldn't it better to fold the 
vb2_dc_sgt_foreach_page() function into this one, and loop once only ? 
vb2_dc_sgt_foreach_page() is also called in the cleanup path of 
vb2_dc_get_userptr(), but you have the list of pages available in the 
function, so you can iterate over it directly.

> +	}
> +
> +	vb2_dc_release_sgtable(sgt);
> +	vb2_put_vma(buf->vma);
> +	kfree(buf);
> +}
> +
>  static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
> -					unsigned long size, int write)
> +	unsigned long size, int write)
>  {
>  	struct vb2_dc_buf *buf;
> -	struct vm_area_struct *vma;
> -	dma_addr_t dma_addr = 0;
> -	int ret;
> +	unsigned long start, end, offset, offset2;

If you don't use the buffer size above, please rename offset2 here too (and 
avoid multiple variable declarations per line).

> +	struct page **pages;
> +	int n_pages;
> +	int ret = 0;
> +	struct sg_table *sgt;
> +	unsigned long contig_size;
> 
>  	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>  	if (!buf)
>  		return ERR_PTR(-ENOMEM);
> 
> -	ret = vb2_get_contig_userptr(vaddr, size, &vma, &dma_addr);
> +	buf->dev = alloc_ctx;
> +	buf->dma_dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> +
> +	start = (unsigned long)vaddr & PAGE_MASK;
> +	offset = (unsigned long)vaddr & ~PAGE_MASK;
> +	end = PAGE_ALIGN((unsigned long)vaddr + size);
> +	offset2 = end - (unsigned long)vaddr - size;

vaddr is already an unsigned long, there's no need to cast it.

> +	n_pages = (end - start) >> PAGE_SHIFT;
> +
> +	pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
> +	if (!pages) {
> +		ret = -ENOMEM;
> +		printk(KERN_ERR "failed to allocate pages table\n");
> +		goto fail_buf;
> +	}
> +
> +	buf->vma = vb2_dc_get_user_vma(start, end - start);
> +	if (IS_ERR(buf->vma)) {
> +		printk(KERN_ERR "failed to get VMA\n");
> +		ret = PTR_ERR(buf->vma);
> +		goto fail_pages;
> +	}
> +
> +	/* extract page list from userspace mapping */
> +	ret = vb2_dc_get_user_pages(start, pages, n_pages, buf->vma, write);
>  	if (ret) {
> -		printk(KERN_ERR "Failed acquiring VMA for vaddr 0x%08lx\n",
> -				vaddr);
> -		kfree(buf);
> -		return ERR_PTR(ret);
> +		printk(KERN_ERR "failed to get user pages\n");
> +		goto fail_vma;
> +	}
> +
> +	sgt = vb2_dc_pages_to_sgt(pages, n_pages, offset, offset2);
> +	if (IS_ERR(sgt)) {
> +		printk(KERN_ERR "failed to create scatterlist table\n");
> +		ret = -ENOMEM;
> +		goto fail_get_user_pages;
> +	}
> +
> +	/* pages are no longer needed */
> +	kfree(pages);
> +	pages = NULL;
> +
> +	sgt->nents = dma_map_sg(buf->dev, sgt->sgl, sgt->orig_nents,
> +		buf->dma_dir);
> +	if (sgt->nents <= 0) {
> +		printk(KERN_ERR "failed to map scatterlist\n");
> +		ret = -EIO;
> +		goto fail_sgt;
> +	}
> +
> +	contig_size = vb2_dc_get_contiguous_size(sgt);
> +	if (contig_size < size) {
> +		printk(KERN_ERR "contiguous mapping is too small %lu/%lu\n",
> +			contig_size, size);
> +		ret = -EFAULT;
> +		goto fail_map_sg;
>  	}
> 
> +	buf->dma_addr = sg_dma_address(sgt->sgl);
>  	buf->size = size;
> -	buf->dma_addr = dma_addr;
> -	buf->vma = vma;
> +	buf->dma_sgt = sgt;
> +
> +	atomic_inc(&buf->refcount);

refcount is only used for MMAP buffers as far as I can tell, I don't think you 
need to increment refcount here.
> 
>  	return buf;
> -}
> 
> -static void vb2_dc_put_userptr(void *mem_priv)
> -{
> -	struct vb2_dc_buf *buf = mem_priv;
> +fail_map_sg:
> +	dma_unmap_sg(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
> 
> -	if (!buf)
> -		return;
> +fail_sgt:
> +	if (!vma_is_io(buf->vma))
> +		vb2_dc_sgt_foreach_page(sgt, put_page);
> +	vb2_dc_release_sgtable(sgt);
> +
> +fail_get_user_pages:
> +	if (pages && !vma_is_io(buf->vma))
> +		while (n_pages)
> +			put_page(pages[--n_pages]);
> 
> +fail_vma:
>  	vb2_put_vma(buf->vma);
> +
> +fail_pages:
> +	kfree(pages); /* kfree is NULL-proof */
> +
> +fail_buf:
>  	kfree(buf);
> +
> +	return ERR_PTR(ret);
>  }
> 
>  /*********************************************/
Tomasz Stanislawski April 11, 2012, 10:06 a.m. UTC | #2
Hi Laurent,
Thank you for review. Your comments are very helpful :).
Take a look on the comments below.

On 04/06/2012 05:02 PM, Laurent Pinchart wrote:
> Hi Tomasz,
> 
> On Thursday 05 April 2012 16:00:05 Tomasz Stanislawski wrote:
>> From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>>
>> This patch introduces usage of dma_map_sg to map memory behind
>> a userspace pointer to a device as dma-contiguous mapping.
>>
>> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
>> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
>> 	[bugfixing]
>> Signed-off-by: Kamil Debski <k.debski@samsung.com>
>> 	[bugfixing]
>> Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
>> 	[add sglist subroutines/code refactoring]
>> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
>> ---
>>  drivers/media/video/videobuf2-dma-contig.c |  282 +++++++++++++++++++++++--
>>  1 files changed, 265 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/media/video/videobuf2-dma-contig.c
>> b/drivers/media/video/videobuf2-dma-contig.c index 476e536..6ab3165 100644
>> --- a/drivers/media/video/videobuf2-dma-contig.c
>> +++ b/drivers/media/video/videobuf2-dma-contig.c
> 
> [snip]
> 
>> @@ -32,6 +36,98 @@ struct vb2_dc_buf {
>>  };
>>
>>  /*********************************************/
>> +/*        scatterlist table functions        */
>> +/*********************************************/
>> +
>> +static struct sg_table *vb2_dc_pages_to_sgt(struct page **pages,
>> +	unsigned long n_pages, size_t offset, size_t offset2)
> 
> "offset2" isn't very descriptive. I would replace it with the total size of 
> the buffer instead (or, alternatively, rename offset to offset_start and 
> offset2 to offset_end, but I like the first option better).
> 
>> +{
>> +	struct sg_table *sgt;
>> +	int i, j; /* loop counters */
> 
> I don't think the comment is needed.
> 

ok

>> +	int cur_page, chunks;
> 
> i, j, cur_page and chunks can't be negative. Could you please make them 
> unsigned int (and I would order them) ?

ok

> 
> Also, Documentation/CodingStyle favors one variable declaration per line, 
> without commas for multiple declarations.

does it include loop counters like i and j?

> 
>> +	int ret;
>> +	struct scatterlist *s;
>> +
>> +	sgt = kzalloc(sizeof *sgt, GFP_KERNEL);
>> +	if (!sgt)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	/* compute number of chunks */
>> +	chunks = 1;
>> +	for (i = 1; i < n_pages; ++i)
>> +		if (pages[i] != pages[i - 1] + 1)
>> +			++chunks;
>> +
>> +	ret = sg_alloc_table(sgt, chunks, GFP_KERNEL);
>> +	if (ret) {
>> +		kfree(sgt);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	/* merging chunks and putting them into the scatterlist */
>> +	cur_page = 0;
>> +	for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
>> +		size_t size = PAGE_SIZE;
>> +
>> +		for (j = cur_page + 1; j < n_pages; ++j) {
>> +			if (pages[j] != pages[j - 1] + 1)
>> +				break;
>> +			size += PAGE_SIZE;
>> +		}
>> +
>> +		/* cut offset if chunk starts at the first page */
>> +		if (cur_page == 0)
>> +			size -= offset;
>> +		/* cut offset2 if chunk ends at the last page */
>> +		if (j == n_pages)
>> +			size -= offset2;
>> +
>> +		sg_set_page(s, pages[cur_page], size, offset);
>> +		offset = 0;
>> +		cur_page = j;
>> +	}
>> +
>> +	return sgt;
>> +}
>> +
>> +static void vb2_dc_release_sgtable(struct sg_table *sgt)
>> +{
>> +	sg_free_table(sgt);
>> +	kfree(sgt);
>> +}
>> +
>> +static void vb2_dc_sgt_foreach_page(struct sg_table *sgt,
>> +	void (*cb)(struct page *pg))
>> +{
>> +	struct scatterlist *s;
>> +	int i, j;
>> +
>> +	for_each_sg(sgt->sgl, s, sgt->nents, i) {
>> +		struct page *page = sg_page(s);
>> +		int n_pages = PAGE_ALIGN(s->offset + s->length) >> PAGE_SHIFT;
>> +
>> +		for (j = 0; j < n_pages; ++j, ++page)
>> +			cb(page);
> 
> Same for i, j and n_pages here.
> 

ok

>> +	}
>> +}
>> +
>> +static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
>> +{
>> +	struct scatterlist *s;
>> +	dma_addr_t expected = sg_dma_address(sgt->sgl);
>> +	int i;
> 
> Same for i here.
> 
>> +	unsigned long size = 0;
>> +
>> +	for_each_sg(sgt->sgl, s, sgt->nents, i) {
>> +		if (sg_dma_address(s) != expected)
>> +			break;
>> +		expected = sg_dma_address(s) + sg_dma_len(s);
>> +		size += sg_dma_len(s);
>> +	}
>> +	return size;
>> +}
>> +
>> +/*********************************************/
>>  /*         callbacks for all buffers         */
>>  /*********************************************/
>>
>> @@ -116,42 +212,194 @@ static int vb2_dc_mmap(void *buf_priv, struct
>> vm_area_struct *vma) /*       callbacks for USERPTR buffers       */
>>  /*********************************************/
>>
>> +static inline int vma_is_io(struct vm_area_struct *vma)
>> +{
>> +	return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
>> +}
>> +
>> +static struct vm_area_struct *vb2_dc_get_user_vma(
>> +	unsigned long start, unsigned long size)
>> +{
>> +	struct vm_area_struct *vma;
>> +
>> +	/* current->mm->mmap_sem is taken by videobuf2 core */
>> +	vma = find_vma(current->mm, start);
>> +	if (!vma) {
>> +		printk(KERN_ERR "no vma for address %lu\n", start);
>> +		return ERR_PTR(-EFAULT);
>> +	}
>> +
>> +	if (vma->vm_end - vma->vm_start < size) {
>> +		printk(KERN_ERR "vma at %lu is too small for %lu bytes\n",
>> +			start, size);
>> +		return ERR_PTR(-EFAULT);
>> +	}
> 
> Should we support multiple VMAs, or do you think that's not worth it ?
> 

What do you mean by multiple VMAs?

>> +	vma = vb2_get_vma(vma);
>> +	if (!vma) {
>> +		printk(KERN_ERR "failed to copy vma\n");
>> +		return ERR_PTR(-ENOMEM);
>> +	}
> 
> I still think there's no need to copy the VMA. get_user_pages() will make sure 
> the memory doesn't get paged out, and we don't need to ensure that the 
> userspace mapping stays in place as our cache operations use a scatter list. 
> Storing the result of vma_is_io() in vb2_dc_buf should be enough.

As I understand calling get_user_pages ensures that pages are not going to be
swapped or freed. I agree that it provides enough protection for the memory.

IO mappings are the problem. As you mentioned few mails ago get_page would
likely crash for such pages. AFAIK increasing reference count for VMA could be
a reliable mechanism for protecting the memory from being freed.
The problem is that VMA has no reference counters in it. Calling open ops
will protect the memory. However it will not protect VMA structure from
being freed!

Analyze following scenario:

- mmap -> returns userptr
- qbuf (userptr)
- unmap (userptr)
- dqbuf

The VMA will be destroyed at unmap but memory will not be released.
The reason is that open ops was called at qbuf.
In order to free the memory the VB2 has to call close ops. This
callback takes pointer to VMA as an argument. The original VMA
cannot be used here because it is not longer valid.

Therefore VMA has to be copied at qbuf operation. The function vb2_get_vma
is used for this purpose.

The workaround could be dropping support for IO mappings in VB2.
However it will handicap all s5p media drivers because mapping
produced by dma_mmap_coherent (aka writecombine) are IO mapping.
As result s5p-fimc could no longer create a pipeline with s5p-mfc.

Introducing DMABUF to V4L is a good alternative but only if exporting
is supported.

For now I think it is better to keep support for IO mappings. At least
until DMA mapping redesign and DMABUF exporting in V4L is merged.

> 
>> +	return vma;
>> +}
>> +
>> +static int vb2_dc_get_user_pages(unsigned long start, struct page **pages,
>> +	int n_pages, struct vm_area_struct *vma, int write)
>> +{
>> +	int n;
> 
> n_pages and n can be unsigned (and I would rename n to i, to be coherent with 
> the rest of the file).
> 

ok

>> +
>> +	if (vma_is_io(vma)) {
>> +		for (n = 0; n < n_pages; ++n, start += PAGE_SIZE) {
>> +			unsigned long pfn;
>> +			int ret = follow_pfn(vma, start, &pfn);
>> +
>> +			if (ret) {
>> +				printk(KERN_ERR "no page for address %lu\n",
>> +					start);
>> +				return ret;
>> +			}
>> +			pages[n] = pfn_to_page(pfn);
>> +		}
>> +	} else {
>> +		n = get_user_pages(current, current->mm, start & PAGE_MASK,
>> +			n_pages, write, 1, pages, NULL);
>> +		if (n != n_pages) {
>> +			printk(KERN_ERR "got only %d of %d user pages\n",
>> +				n, n_pages);
>> +			while (n)
>> +				put_page(pages[--n]);
>> +			return -EFAULT;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static void vb2_dc_set_page_dirty(struct page *page)
>> +{
>> +	set_page_dirty_lock(page);
>> +}
>> +
>> +static void vb2_dc_put_userptr(void *buf_priv)
>> +{
>> +	struct vb2_dc_buf *buf = buf_priv;
>> +	struct sg_table *sgt = buf->dma_sgt;
>> +
>> +	dma_unmap_sg(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);
>> +	if (!vma_is_io(buf->vma)) {
>> +		vb2_dc_sgt_foreach_page(sgt, vb2_dc_set_page_dirty);
>> +		vb2_dc_sgt_foreach_page(sgt, put_page);
> 
> This results in two iterations over the pages. Wouldn't it better to fold the 
> vb2_dc_sgt_foreach_page() function into this one, and loop once only ? 
> vb2_dc_sgt_foreach_page() is also called in the cleanup path of 
> vb2_dc_get_userptr(), but you have the list of pages available in the 
> function, so you can iterate over it directly.
> 

what do you think about using function vb2_dc_put_dirty_page which
would call both set_page_dirty_lock and put_page ?

>> +	}
>> +
>> +	vb2_dc_release_sgtable(sgt);
>> +	vb2_put_vma(buf->vma);
>> +	kfree(buf);
>> +}
>> +
>>  static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
>> -					unsigned long size, int write)
>> +	unsigned long size, int write)
>>  {
>>  	struct vb2_dc_buf *buf;
>> -	struct vm_area_struct *vma;
>> -	dma_addr_t dma_addr = 0;
>> -	int ret;
>> +	unsigned long start, end, offset, offset2;
> 
> If you don't use the buffer size above, please rename offset2 here too (and 
> avoid multiple variable declarations per line).
> 

ok

>> +	struct page **pages;
>> +	int n_pages;
>> +	int ret = 0;
>> +	struct sg_table *sgt;
>> +	unsigned long contig_size;
>>
>>  	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>>  	if (!buf)
>>  		return ERR_PTR(-ENOMEM);
>>
>> -	ret = vb2_get_contig_userptr(vaddr, size, &vma, &dma_addr);
>> +	buf->dev = alloc_ctx;
>> +	buf->dma_dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
>> +
>> +	start = (unsigned long)vaddr & PAGE_MASK;
>> +	offset = (unsigned long)vaddr & ~PAGE_MASK;
>> +	end = PAGE_ALIGN((unsigned long)vaddr + size);
>> +	offset2 = end - (unsigned long)vaddr - size;
> 
> vaddr is already an unsigned long, there's no need to cast it.
> 

roger that

>> +	n_pages = (end - start) >> PAGE_SHIFT;
>> +
>> +	pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
>> +	if (!pages) {
>> +		ret = -ENOMEM;
>> +		printk(KERN_ERR "failed to allocate pages table\n");
>> +		goto fail_buf;
>> +	}
>> +
>> +	buf->vma = vb2_dc_get_user_vma(start, end - start);
>> +	if (IS_ERR(buf->vma)) {
>> +		printk(KERN_ERR "failed to get VMA\n");
>> +		ret = PTR_ERR(buf->vma);
>> +		goto fail_pages;
>> +	}
>> +
>> +	/* extract page list from userspace mapping */
>> +	ret = vb2_dc_get_user_pages(start, pages, n_pages, buf->vma, write);
>>  	if (ret) {
>> -		printk(KERN_ERR "Failed acquiring VMA for vaddr 0x%08lx\n",
>> -				vaddr);
>> -		kfree(buf);
>> -		return ERR_PTR(ret);
>> +		printk(KERN_ERR "failed to get user pages\n");
>> +		goto fail_vma;
>> +	}
>> +
>> +	sgt = vb2_dc_pages_to_sgt(pages, n_pages, offset, offset2);
>> +	if (IS_ERR(sgt)) {
>> +		printk(KERN_ERR "failed to create scatterlist table\n");
>> +		ret = -ENOMEM;
>> +		goto fail_get_user_pages;
>> +	}
>> +
>> +	/* pages are no longer needed */
>> +	kfree(pages);
>> +	pages = NULL;
>> +
>> +	sgt->nents = dma_map_sg(buf->dev, sgt->sgl, sgt->orig_nents,
>> +		buf->dma_dir);
>> +	if (sgt->nents <= 0) {
>> +		printk(KERN_ERR "failed to map scatterlist\n");
>> +		ret = -EIO;
>> +		goto fail_sgt;
>> +	}
>> +
>> +	contig_size = vb2_dc_get_contiguous_size(sgt);
>> +	if (contig_size < size) {
>> +		printk(KERN_ERR "contiguous mapping is too small %lu/%lu\n",
>> +			contig_size, size);
>> +		ret = -EFAULT;
>> +		goto fail_map_sg;
>>  	}
>>
>> +	buf->dma_addr = sg_dma_address(sgt->sgl);
>>  	buf->size = size;
>> -	buf->dma_addr = dma_addr;
>> -	buf->vma = vma;
>> +	buf->dma_sgt = sgt;
>> +
>> +	atomic_inc(&buf->refcount);
> 
> refcount is only used for MMAP buffers as far as I can tell, I don't think you 
> need to increment refcount here.

right ... thanks for spotting it.
Fortunately, that put_userptr does not check this counter. Otherwise it would
be a memleak :).

>>
>>  	return buf;
>> -}
>>
>> -static void vb2_dc_put_userptr(void *mem_priv)
>> -{
>> -	struct vb2_dc_buf *buf = mem_priv;
>> +fail_map_sg:
>> +	dma_unmap_sg(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
>>
>> -	if (!buf)
>> -		return;
>> +fail_sgt:
>> +	if (!vma_is_io(buf->vma))
>> +		vb2_dc_sgt_foreach_page(sgt, put_page);
>> +	vb2_dc_release_sgtable(sgt);
>> +
>> +fail_get_user_pages:
>> +	if (pages && !vma_is_io(buf->vma))
>> +		while (n_pages)
>> +			put_page(pages[--n_pages]);
>>
>> +fail_vma:
>>  	vb2_put_vma(buf->vma);
>> +
>> +fail_pages:
>> +	kfree(pages); /* kfree is NULL-proof */
>> +
>> +fail_buf:
>>  	kfree(buf);
>> +
>> +	return ERR_PTR(ret);
>>  }
>>
>>  /*********************************************/

Regards,
Tomasz Stanislawski
Laurent Pinchart April 17, 2012, 12:40 a.m. UTC | #3
Hi Tomasz,

On Wednesday 11 April 2012 12:06:59 Tomasz Stanislawski wrote:
> On 04/06/2012 05:02 PM, Laurent Pinchart wrote:
> > On Thursday 05 April 2012 16:00:05 Tomasz Stanislawski wrote:

[snip]

> > Also, Documentation/CodingStyle favors one variable declaration per line,
> > without commas for multiple declarations.
> 
> does it include loop counters like i and j?

According to CodingStyle, yes :-)

[snip]

> >> +static struct vm_area_struct *vb2_dc_get_user_vma(
> >> +	unsigned long start, unsigned long size)
> >> +{
> >> +	struct vm_area_struct *vma;
> >> +
> >> +	/* current->mm->mmap_sem is taken by videobuf2 core */
> >> +	vma = find_vma(current->mm, start);
> >> +	if (!vma) {
> >> +		printk(KERN_ERR "no vma for address %lu\n", start);
> >> +		return ERR_PTR(-EFAULT);
> >> +	}
> >> +
> >> +	if (vma->vm_end - vma->vm_start < size) {
> >> +		printk(KERN_ERR "vma at %lu is too small for %lu bytes\n",
> >> +			start, size);
> >> +		return ERR_PTR(-EFAULT);
> >> +	}
> > 
> > Should we support multiple VMAs, or do you think that's not worth it ?
> 
> What do you mean by multiple VMAs?

I mean multiple VMAs for a single userspace buffer. It's probably overkill, 
but I'm not familiar enough with the memory management code to be sure. Do you 
have more insight ?

> >> +	vma = vb2_get_vma(vma);
> >> +	if (!vma) {
> >> +		printk(KERN_ERR "failed to copy vma\n");
> >> +		return ERR_PTR(-ENOMEM);
> >> +	}
> > 
> > I still think there's no need to copy the VMA. get_user_pages() will make
> > sure the memory doesn't get paged out, and we don't need to ensure that
> > the userspace mapping stays in place as our cache operations use a
> > scatter list. Storing the result of vma_is_io() in vb2_dc_buf should be
> > enough.
> 
> As I understand calling get_user_pages ensures that pages are not going to
> be swapped or freed. I agree that it provides enough protection for the
> memory.
> 
> IO mappings are the problem. As you mentioned few mails ago get_page would
> likely crash for such pages. AFAIK increasing reference count for VMA could
> be a reliable mechanism for protecting the memory from being freed.

The main use case here (which is actually the only use case I have 
encountered) is memory reserved at boot time to be used by specific devices 
such as frame buffers. That memory will never be paged out, so I don't think 
there's an issue here. Regarding freeing, it will likely not be freed either, 
and if it does, I doubt that duplicating the VMA will make any difference. 

> The problem is that VMA has no reference counters in it. Calling open ops
> will protect the memory. However it will not protect VMA structure from
> being freed!
> 
> Analyze following scenario:
> 
> - mmap -> returns userptr
> - qbuf (userptr)
> - unmap (userptr)
> - dqbuf
> 
> The VMA will be destroyed at unmap but memory will not be released.
>
> The reason is that open ops was called at qbuf.

I think I see your point. You want to make sure that the exporter driver (on 
which mmap() has been called) will not release the memory, and to do so you 
call the exporter's open() vm operation when you acquire the memory. To call 
the exporter's close() operation when you release the memory you need a 
pointer to the VMA, but the original VMA might have disappeared. To work 
around the problem you make a copy of the VMA and use it when releasing the 
memory.

That's a pretty dirty hack. Most of the copy VMA fields will be invalid when 
you use it. On a side note, would storing vm_ops and vm_private_data be enough 
? I'm also not sure if we need to call get_file() and put_file().

> In order to free the memory the VB2 has to call close ops. This
> callback takes pointer to VMA as an argument. The original VMA
> cannot be used here because it is not longer valid.
> 
> Therefore VMA has to be copied at qbuf operation. The function vb2_get_vma
> is used for this purpose.
>
> The workaround could be dropping support for IO mappings in VB2.
> However it will handicap all s5p media drivers because mapping
> produced by dma_mmap_coherent (aka writecombine) are IO mapping.
> As result s5p-fimc could no longer create a pipeline with s5p-mfc.

There's no reason to drop that, even if it's currently hackish :-) (at least 
until we have a working replacement).
 
> Introducing DMABUF to V4L is a good alternative but only if exporting
> is supported.
> 
> For now I think it is better to keep support for IO mappings. At least
> until DMA mapping redesign and DMABUF exporting in V4L is merged.

I agree. DMABUF is the way to go, but we need to get it in first.

> >> +	return vma;
> >> +}
Marek Szyprowski April 17, 2012, 11:25 a.m. UTC | #4
Hi Laurent,

On Tuesday, April 17, 2012 2:41 AM Laurent Pinchart wrote:

(snipped)

> > >> +static struct vm_area_struct *vb2_dc_get_user_vma(
> > >> +	unsigned long start, unsigned long size)
> > >> +{
> > >> +	struct vm_area_struct *vma;
> > >> +
> > >> +	/* current->mm->mmap_sem is taken by videobuf2 core */
> > >> +	vma = find_vma(current->mm, start);
> > >> +	if (!vma) {
> > >> +		printk(KERN_ERR "no vma for address %lu\n", start);
> > >> +		return ERR_PTR(-EFAULT);
> > >> +	}
> > >> +
> > >> +	if (vma->vm_end - vma->vm_start < size) {
> > >> +		printk(KERN_ERR "vma at %lu is too small for %lu bytes\n",
> > >> +			start, size);
> > >> +		return ERR_PTR(-EFAULT);
> > >> +	}
> > >
> > > Should we support multiple VMAs, or do you think that's not worth it ?
> >
> > What do you mean by multiple VMAs?
> 
> I mean multiple VMAs for a single userspace buffer. It's probably overkill,
> but I'm not familiar enough with the memory management code to be sure. Do you
> have more insight ?

Multiple VMAs means that userspace did something really hacky in the specified 
address range, I'm really convinced that we should not bother supporting such 
cases.

With user pointer mode You usually want to get access to either anonymous pages
(malloc and friends) or the memory somehow allocated by the other device 
(mmaped to userspace). In both cases it available as a single VMA. VMAs with 
anonymous memory are merged together if they get extended to meet side-by-side 
each other.

> 
> > >> +	vma = vb2_get_vma(vma);
> > >> +	if (!vma) {
> > >> +		printk(KERN_ERR "failed to copy vma\n");
> > >> +		return ERR_PTR(-ENOMEM);
> > >> +	}
> > >
> > > I still think there's no need to copy the VMA. get_user_pages() will make
> > > sure the memory doesn't get paged out, and we don't need to ensure that
> > > the userspace mapping stays in place as our cache operations use a
> > > scatter list. Storing the result of vma_is_io() in vb2_dc_buf should be
> > > enough.
> >
> > As I understand calling get_user_pages ensures that pages are not going to
> > be swapped or freed. I agree that it provides enough protection for the
> > memory.
> >
> > IO mappings are the problem. As you mentioned few mails ago get_page would
> > likely crash for such pages. AFAIK increasing reference count for VMA could
> > be a reliable mechanism for protecting the memory from being freed.
> 
> The main use case here (which is actually the only use case I have
> encountered) is memory reserved at boot time to be used by specific devices
> such as frame buffers. That memory will never be paged out, so I don't think
> there's an issue here. Regarding freeing, it will likely not be freed either,
> and if it does, I doubt that duplicating the VMA will make any difference.

We use user pointer method also to access buffers allocated dynamically by 
other v4l2 devices (we have quite a lot of the in our system). In this case
duplicating VMA is necessary.

> > The problem is that VMA has no reference counters in it. Calling open ops
> > will protect the memory. However it will not protect VMA structure from
> > being freed!
> >
> > Analyze following scenario:
> >
> > - mmap -> returns userptr
> > - qbuf (userptr)
> > - unmap (userptr)
> > - dqbuf
> >
> > The VMA will be destroyed at unmap but memory will not be released.
> >
> > The reason is that open ops was called at qbuf.
> 
> I think I see your point. You want to make sure that the exporter driver (on
> which mmap() has been called) will not release the memory, and to do so you
> call the exporter's open() vm operation when you acquire the memory. To call
> the exporter's close() operation when you release the memory you need a
> pointer to the VMA, but the original VMA might have disappeared. To work
> around the problem you make a copy of the VMA and use it when releasing the
> memory.
> 
> That's a pretty dirty hack. Most of the copy VMA fields will be invalid when
> you use it. On a side note, would storing vm_ops and vm_private_data be enough
> ? I'm also not sure if we need to call get_file() and put_file().

This code is there from the beginning of the videobuf2. The main problem is the
fact that you cannot get a reliable access to user pointer memory which is not 
described with anonymous pages. The hacks/workarounds we use at works really
well with the memory mmaped by the other v4l2 devices (which use vm_open/
vm_close refcounting) and framebuffers which use no refcounting on vma, but we
force them not to release memory by calling get_file() (so the framebuffer 
driver cannot be removed/unloaded once we use its memory, yes, pretty 
theoretical case).

Copying vma was the only solution for the races that usually happen on process 
cleanup or special 'nasty' test case which closed the device before closing the
other v4l2 which used its memory with user pointer method.

The most critical parts of vma are NULLed (vm_mm, vm_next, vm_prev) to catch 
possible issues, but the sane close callback should not touch them anyway. 
Besides vm_private_data, close callback might need to access vm_file, 
vm_start/vm_end and vm_flags. Maybe coping them explicitly while keeping 
everything else NULLed would be a better idea. 

(snipped)

Best regards
Laurent Pinchart April 17, 2012, 5:11 p.m. UTC | #5
Hi Marek,

On Tuesday 17 April 2012 13:25:56 Marek Szyprowski wrote:
> On Tuesday, April 17, 2012 2:41 AM Laurent Pinchart wrote:
> 
> (snipped)
> 
> > > >> +static struct vm_area_struct *vb2_dc_get_user_vma(
> > > >> +	unsigned long start, unsigned long size)
> > > >> +{
> > > >> +	struct vm_area_struct *vma;
> > > >> +
> > > >> +	/* current->mm->mmap_sem is taken by videobuf2 core */
> > > >> +	vma = find_vma(current->mm, start);
> > > >> +	if (!vma) {
> > > >> +		printk(KERN_ERR "no vma for address %lu\n", start);
> > > >> +		return ERR_PTR(-EFAULT);
> > > >> +	}
> > > >> +
> > > >> +	if (vma->vm_end - vma->vm_start < size) {
> > > >> +		printk(KERN_ERR "vma at %lu is too small for %lu bytes\n",
> > > >> +			start, size);
> > > >> +		return ERR_PTR(-EFAULT);
> > > >> +	}
> > > > 
> > > > Should we support multiple VMAs, or do you think that's not worth it ?
> > > 
> > > What do you mean by multiple VMAs?
> > 
> > I mean multiple VMAs for a single userspace buffer. It's probably
> > overkill, but I'm not familiar enough with the memory management code to
> > be sure. Do you have more insight ?
> 
> Multiple VMAs means that userspace did something really hacky in the
> specified address range, I'm really convinced that we should not bother
> supporting such cases.

I agree. I just wanted to make sure that I wasn't forgetting an important use 
case.

> With user pointer mode You usually want to get access to either anonymous
> pages (malloc and friends) or the memory somehow allocated by the other
> device (mmaped to userspace). In both cases it available as a single VMA.
> VMAs with anonymous memory are merged together if they get extended to meet
> side-by-side each other.
> 
> > > >> +	vma = vb2_get_vma(vma);
> > > >> +	if (!vma) {
> > > >> +		printk(KERN_ERR "failed to copy vma\n");
> > > >> +		return ERR_PTR(-ENOMEM);
> > > >> +	}
> > > > 
> > > > I still think there's no need to copy the VMA. get_user_pages() will
> > > > make sure the memory doesn't get paged out, and we don't need to
> > > > ensure that the userspace mapping stays in place as our cache
> > > > operations use a scatter list. Storing the result of vma_is_io() in
> > > > vb2_dc_buf should be enough.
> > > 
> > > As I understand calling get_user_pages ensures that pages are not going
> > > to be swapped or freed. I agree that it provides enough protection for
> > > the memory.
> > > 
> > > IO mappings are the problem. As you mentioned few mails ago get_page
> > > would likely crash for such pages. AFAIK increasing reference count for
> > > VMA could be a reliable mechanism for protecting the memory from being
> > > freed.
> > 
> > The main use case here (which is actually the only use case I have
> > encountered) is memory reserved at boot time to be used by specific
> > devices such as frame buffers. That memory will never be paged out, so I
> > don't think there's an issue here. Regarding freeing, it will likely not
> > be freed either, and if it does, I doubt that duplicating the VMA will
> > make any difference.
>
> We use user pointer method also to access buffers allocated dynamically by
> other v4l2 devices (we have quite a lot of the in our system). In this case
> duplicating VMA is necessary.
> 
> > > The problem is that VMA has no reference counters in it. Calling open
> > > ops will protect the memory. However it will not protect VMA structure
> > > from being freed!
> > > 
> > > Analyze following scenario:
> > > 
> > > - mmap -> returns userptr
> > > - qbuf (userptr)
> > > - unmap (userptr)
> > > - dqbuf
> > > 
> > > The VMA will be destroyed at unmap but memory will not be released.
> > > 
> > > The reason is that open ops was called at qbuf.
> > 
> > I think I see your point. You want to make sure that the exporter driver
> > (on which mmap() has been called) will not release the memory, and to do
> > so you call the exporter's open() vm operation when you acquire the
> > memory. To call the exporter's close() operation when you release the
> > memory you need a pointer to the VMA, but the original VMA might have
> > disappeared. To work around the problem you make a copy of the VMA and
> > use it when releasing the memory.
> > 
> > That's a pretty dirty hack. Most of the copy VMA fields will be invalid
> > when you use it. On a side note, would storing vm_ops and vm_private_data
> > be enough ? I'm also not sure if we need to call get_file() and
> > put_file().
> 
> This code is there from the beginning of the videobuf2. The main problem is
> the fact that you cannot get a reliable access to user pointer memory which
> is not described with anonymous pages. The hacks/workarounds we use at
> works really well with the memory mmaped by the other v4l2 devices (which
> use vm_open/ vm_close refcounting) and framebuffers which use no
> refcounting on vma, but we force them not to release memory by calling
> get_file() (so the framebuffer driver cannot be removed/unloaded once we
> use its memory, yes, pretty theoretical case).
> 
> Copying vma was the only solution for the races that usually happen on
> process cleanup or special 'nasty' test case which closed the device before
> closing the other v4l2 which used its memory with user pointer method.
> 
> The most critical parts of vma are NULLed (vm_mm, vm_next, vm_prev) to catch
> possible issues, but the sane close callback should not touch them anyway.
> Besides vm_private_data, close callback might need to access vm_file,
> vm_start/vm_end and vm_flags. Maybe coping them explicitly while keeping
> everything else NULLed would be a better idea.

Thank you for the clarification. I'm very eager to see DMABUF being adopted 
everywhere, so that we can get rid of those hacks :-)
diff mbox

Patch

diff --git a/drivers/media/video/videobuf2-dma-contig.c b/drivers/media/video/videobuf2-dma-contig.c
index 476e536..6ab3165 100644
--- a/drivers/media/video/videobuf2-dma-contig.c
+++ b/drivers/media/video/videobuf2-dma-contig.c
@@ -11,6 +11,8 @@ 
  */
 
 #include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 
@@ -22,6 +24,8 @@  struct vb2_dc_buf {
 	void				*vaddr;
 	unsigned long			size;
 	dma_addr_t			dma_addr;
+	enum dma_data_direction		dma_dir;
+	struct sg_table			*dma_sgt;
 
 	/* MMAP related */
 	struct vb2_vmarea_handler	handler;
@@ -32,6 +36,98 @@  struct vb2_dc_buf {
 };
 
 /*********************************************/
+/*        scatterlist table functions        */
+/*********************************************/
+
+static struct sg_table *vb2_dc_pages_to_sgt(struct page **pages,
+	unsigned long n_pages, size_t offset, size_t offset2)
+{
+	struct sg_table *sgt;
+	int i, j; /* loop counters */
+	int cur_page, chunks;
+	int ret;
+	struct scatterlist *s;
+
+	sgt = kzalloc(sizeof *sgt, GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	/* compute number of chunks */
+	chunks = 1;
+	for (i = 1; i < n_pages; ++i)
+		if (pages[i] != pages[i - 1] + 1)
+			++chunks;
+
+	ret = sg_alloc_table(sgt, chunks, GFP_KERNEL);
+	if (ret) {
+		kfree(sgt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* merging chunks and putting them into the scatterlist */
+	cur_page = 0;
+	for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+		size_t size = PAGE_SIZE;
+
+		for (j = cur_page + 1; j < n_pages; ++j) {
+			if (pages[j] != pages[j - 1] + 1)
+				break;
+			size += PAGE_SIZE;
+		}
+
+		/* cut offset if chunk starts at the first page */
+		if (cur_page == 0)
+			size -= offset;
+		/* cut offset2 if chunk ends at the last page */
+		if (j == n_pages)
+			size -= offset2;
+
+		sg_set_page(s, pages[cur_page], size, offset);
+		offset = 0;
+		cur_page = j;
+	}
+
+	return sgt;
+}
+
+static void vb2_dc_release_sgtable(struct sg_table *sgt)
+{
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+static void vb2_dc_sgt_foreach_page(struct sg_table *sgt,
+	void (*cb)(struct page *pg))
+{
+	struct scatterlist *s;
+	int i, j;
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		struct page *page = sg_page(s);
+		int n_pages = PAGE_ALIGN(s->offset + s->length) >> PAGE_SHIFT;
+
+		for (j = 0; j < n_pages; ++j, ++page)
+			cb(page);
+	}
+}
+
+static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
+{
+	struct scatterlist *s;
+	dma_addr_t expected = sg_dma_address(sgt->sgl);
+	int i;
+	unsigned long size = 0;
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		if (sg_dma_address(s) != expected)
+			break;
+		expected = sg_dma_address(s) + sg_dma_len(s);
+		size += sg_dma_len(s);
+	}
+	return size;
+}
+
+/*********************************************/
 /*         callbacks for all buffers         */
 /*********************************************/
 
@@ -116,42 +212,194 @@  static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma)
 /*       callbacks for USERPTR buffers       */
 /*********************************************/
 
+static inline int vma_is_io(struct vm_area_struct *vma)
+{
+	return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
+}
+
+static struct vm_area_struct *vb2_dc_get_user_vma(
+	unsigned long start, unsigned long size)
+{
+	struct vm_area_struct *vma;
+
+	/* current->mm->mmap_sem is taken by videobuf2 core */
+	vma = find_vma(current->mm, start);
+	if (!vma) {
+		printk(KERN_ERR "no vma for address %lu\n", start);
+		return ERR_PTR(-EFAULT);
+	}
+
+	if (vma->vm_end - vma->vm_start < size) {
+		printk(KERN_ERR "vma at %lu is too small for %lu bytes\n",
+			start, size);
+		return ERR_PTR(-EFAULT);
+	}
+
+	vma = vb2_get_vma(vma);
+	if (!vma) {
+		printk(KERN_ERR "failed to copy vma\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return vma;
+}
+
+static int vb2_dc_get_user_pages(unsigned long start, struct page **pages,
+	int n_pages, struct vm_area_struct *vma, int write)
+{
+	int n;
+
+	if (vma_is_io(vma)) {
+		for (n = 0; n < n_pages; ++n, start += PAGE_SIZE) {
+			unsigned long pfn;
+			int ret = follow_pfn(vma, start, &pfn);
+
+			if (ret) {
+				printk(KERN_ERR "no page for address %lu\n",
+					start);
+				return ret;
+			}
+			pages[n] = pfn_to_page(pfn);
+		}
+	} else {
+		n = get_user_pages(current, current->mm, start & PAGE_MASK,
+			n_pages, write, 1, pages, NULL);
+		if (n != n_pages) {
+			printk(KERN_ERR "got only %d of %d user pages\n",
+				n, n_pages);
+			while (n)
+				put_page(pages[--n]);
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static void vb2_dc_set_page_dirty(struct page *page)
+{
+	set_page_dirty_lock(page);
+}
+
+static void vb2_dc_put_userptr(void *buf_priv)
+{
+	struct vb2_dc_buf *buf = buf_priv;
+	struct sg_table *sgt = buf->dma_sgt;
+
+	dma_unmap_sg(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);
+	if (!vma_is_io(buf->vma)) {
+		vb2_dc_sgt_foreach_page(sgt, vb2_dc_set_page_dirty);
+		vb2_dc_sgt_foreach_page(sgt, put_page);
+	}
+
+	vb2_dc_release_sgtable(sgt);
+	vb2_put_vma(buf->vma);
+	kfree(buf);
+}
+
 static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
-					unsigned long size, int write)
+	unsigned long size, int write)
 {
 	struct vb2_dc_buf *buf;
-	struct vm_area_struct *vma;
-	dma_addr_t dma_addr = 0;
-	int ret;
+	unsigned long start, end, offset, offset2;
+	struct page **pages;
+	int n_pages;
+	int ret = 0;
+	struct sg_table *sgt;
+	unsigned long contig_size;
 
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	ret = vb2_get_contig_userptr(vaddr, size, &vma, &dma_addr);
+	buf->dev = alloc_ctx;
+	buf->dma_dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	start = (unsigned long)vaddr & PAGE_MASK;
+	offset = (unsigned long)vaddr & ~PAGE_MASK;
+	end = PAGE_ALIGN((unsigned long)vaddr + size);
+	offset2 = end - (unsigned long)vaddr - size;
+	n_pages = (end - start) >> PAGE_SHIFT;
+
+	pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
+	if (!pages) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "failed to allocate pages table\n");
+		goto fail_buf;
+	}
+
+	buf->vma = vb2_dc_get_user_vma(start, end - start);
+	if (IS_ERR(buf->vma)) {
+		printk(KERN_ERR "failed to get VMA\n");
+		ret = PTR_ERR(buf->vma);
+		goto fail_pages;
+	}
+
+	/* extract page list from userspace mapping */
+	ret = vb2_dc_get_user_pages(start, pages, n_pages, buf->vma, write);
 	if (ret) {
-		printk(KERN_ERR "Failed acquiring VMA for vaddr 0x%08lx\n",
-				vaddr);
-		kfree(buf);
-		return ERR_PTR(ret);
+		printk(KERN_ERR "failed to get user pages\n");
+		goto fail_vma;
+	}
+
+	sgt = vb2_dc_pages_to_sgt(pages, n_pages, offset, offset2);
+	if (IS_ERR(sgt)) {
+		printk(KERN_ERR "failed to create scatterlist table\n");
+		ret = -ENOMEM;
+		goto fail_get_user_pages;
+	}
+
+	/* pages are no longer needed */
+	kfree(pages);
+	pages = NULL;
+
+	sgt->nents = dma_map_sg(buf->dev, sgt->sgl, sgt->orig_nents,
+		buf->dma_dir);
+	if (sgt->nents <= 0) {
+		printk(KERN_ERR "failed to map scatterlist\n");
+		ret = -EIO;
+		goto fail_sgt;
+	}
+
+	contig_size = vb2_dc_get_contiguous_size(sgt);
+	if (contig_size < size) {
+		printk(KERN_ERR "contiguous mapping is too small %lu/%lu\n",
+			contig_size, size);
+		ret = -EFAULT;
+		goto fail_map_sg;
 	}
 
+	buf->dma_addr = sg_dma_address(sgt->sgl);
 	buf->size = size;
-	buf->dma_addr = dma_addr;
-	buf->vma = vma;
+	buf->dma_sgt = sgt;
+
+	atomic_inc(&buf->refcount);
 
 	return buf;
-}
 
-static void vb2_dc_put_userptr(void *mem_priv)
-{
-	struct vb2_dc_buf *buf = mem_priv;
+fail_map_sg:
+	dma_unmap_sg(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
 
-	if (!buf)
-		return;
+fail_sgt:
+	if (!vma_is_io(buf->vma))
+		vb2_dc_sgt_foreach_page(sgt, put_page);
+	vb2_dc_release_sgtable(sgt);
+
+fail_get_user_pages:
+	if (pages && !vma_is_io(buf->vma))
+		while (n_pages)
+			put_page(pages[--n_pages]);
 
+fail_vma:
 	vb2_put_vma(buf->vma);
+
+fail_pages:
+	kfree(pages); /* kfree is NULL-proof */
+
+fail_buf:
 	kfree(buf);
+
+	return ERR_PTR(ret);
 }
 
 /*********************************************/