diff mbox

[PATCHv9,10/10] ARM: dma-mapping: add support for IOMMU mapper

Message ID 1334756652-30830-11-git-send-email-m.szyprowski@samsung.com
State New
Headers show

Commit Message

Marek Szyprowski April 18, 2012, 1:44 p.m. UTC
This patch add a complete implementation of DMA-mapping API for
devices which have IOMMU support.

This implementation tries to optimize dma address space usage by remapping
all possible physical memory chunks into a single dma address space chunk.

DMA address space is managed on top of the bitmap stored in the
dma_iommu_mapping structure stored in device->archdata. Platform setup
code has to initialize parameters of the dma address space (base address,
size, allocation precision order) with arm_iommu_create_mapping() function.
To reduce the size of the bitmap, all allocations are aligned to the
specified order of base 4 KiB pages.

dma_alloc_* functions allocate physical memory in chunks, each with
alloc_pages() function to avoid failing if the physical memory gets
fragmented. In worst case the allocated buffer is composed of 4 KiB page
chunks.

dma_map_sg() function minimizes the total number of dma address space
chunks by merging of physical memory chunks into one larger dma address
space chunk. If requested chunk (scatter list entry) boundaries
match physical page boundaries, most calls to dma_map_sg() requests will
result in creating only one chunk in dma address space.

dma_map_page() simply creates a mapping for the given page(s) in the dma
address space.

All dma functions also perform required cache operation like their
counterparts from the arm linear physical memory mapping version.

This patch contains code and fixes kindly provided by:
- Krishna Reddy <vdumpa@nvidia.com>,
- Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
- Hiroshi DOYU <hdoyu@nvidia.com>

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-By: Subash Patel <subash.ramaswamy@linaro.org>
---
 arch/arm/Kconfig                 |    8 +
 arch/arm/include/asm/device.h    |    3 +
 arch/arm/include/asm/dma-iommu.h |   34 ++
 arch/arm/mm/dma-mapping.c        |  727 +++++++++++++++++++++++++++++++++++++-
 arch/arm/mm/vmregion.h           |    2 +-
 5 files changed, 759 insertions(+), 15 deletions(-)
 create mode 100644 arch/arm/include/asm/dma-iommu.h

Comments

Abhinav Kochhar April 20, 2012, 1:44 a.m. UTC | #1
Hi Marek,

dma_addr_t dma_addr is an unused argument passed to the function
arm_iommu_mmap_attrs

+static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct
*vma,
+                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                   struct dma_attrs *attrs)
+{
+       struct arm_vmregion *c;
+
+       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+       c = arm_vmregion_find(&consistent_
head, (unsigned long)cpu_addr);
+
+       if (c) {
+               struct page **pages = c->priv;
+
+               unsigned long uaddr = vma->vm_start;
+               unsigned long usize = vma->vm_end - vma->vm_start;
+               int i = 0;
+
+               do {
+                       int ret;
+
+                       ret = vm_insert_page(vma, uaddr, pages[i++]);
+                       if (ret) {
+                               pr_err("Remapping memory, error: %d\n",
ret);
+                               return ret;
+                       }
+
+                       uaddr += PAGE_SIZE;
+                       usize -= PAGE_SIZE;
+               } while (usize > 0);
+       }
+       return 0;
+}


On Wed, Apr 18, 2012 at 10:44 PM, Marek Szyprowski <m.szyprowski@samsung.com
> wrote:

> This patch add a complete implementation of DMA-mapping API for
> devices which have IOMMU support.
>
> This implementation tries to optimize dma address space usage by remapping
> all possible physical memory chunks into a single dma address space chunk.
>
> DMA address space is managed on top of the bitmap stored in the
> dma_iommu_mapping structure stored in device->archdata. Platform setup
> code has to initialize parameters of the dma address space (base address,
> size, allocation precision order) with arm_iommu_create_mapping() function.
> To reduce the size of the bitmap, all allocations are aligned to the
> specified order of base 4 KiB pages.
>
> dma_alloc_* functions allocate physical memory in chunks, each with
> alloc_pages() function to avoid failing if the physical memory gets
> fragmented. In worst case the allocated buffer is composed of 4 KiB page
> chunks.
>
> dma_map_sg() function minimizes the total number of dma address space
> chunks by merging of physical memory chunks into one larger dma address
> space chunk. If requested chunk (scatter list entry) boundaries
> match physical page boundaries, most calls to dma_map_sg() requests will
> result in creating only one chunk in dma address space.
>
> dma_map_page() simply creates a mapping for the given page(s) in the dma
> address space.
>
> All dma functions also perform required cache operation like their
> counterparts from the arm linear physical memory mapping version.
>
> This patch contains code and fixes kindly provided by:
> - Krishna Reddy <vdumpa@nvidia.com>,
> - Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
> - Hiroshi DOYU <hdoyu@nvidia.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Tested-By: Subash Patel <subash.ramaswamy@linaro.org>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  727
> +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 759 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 0fd27d4..874e519 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -46,6 +46,14 @@ config ARM
>  config ARM_HAS_SG_CHAIN
>        bool
>
> +config NEED_SG_DMA_LENGTH
> +       bool
> +
> +config ARM_DMA_USE_IOMMU
> +       select NEED_SG_DMA_LENGTH
> +       select ARM_HAS_SG_CHAIN
> +       bool
> +
>  config HAVE_PWM
>        bool
>
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index 6e2cb0e..b69c0d3 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,6 +14,9 @@ struct dev_archdata {
>  #ifdef CONFIG_IOMMU_API
>        void *iommu; /* private IOMMU data */
>  #endif
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +       struct dma_iommu_mapping        *mapping;
> +#endif
>  };
>
>  struct omap_device;
> diff --git a/arch/arm/include/asm/dma-iommu.h
> b/arch/arm/include/asm/dma-iommu.h
> new file mode 100644
> index 0000000..799b094
> --- /dev/null
> +++ b/arch/arm/include/asm/dma-iommu.h
> @@ -0,0 +1,34 @@
> +#ifndef ASMARM_DMA_IOMMU_H
> +#define ASMARM_DMA_IOMMU_H
> +
> +#ifdef __KERNEL__
> +
> +#include <linux/mm_types.h>
> +#include <linux/scatterlist.h>
> +#include <linux/dma-debug.h>
> +#include <linux/kmemcheck.h>
> +
> +struct dma_iommu_mapping {
> +       /* iommu specific data */
> +       struct iommu_domain     *domain;
> +
> +       void                    *bitmap;
> +       size_t                  bits;
> +       unsigned int            order;
> +       dma_addr_t              base;
> +
> +       spinlock_t              lock;
> +       struct kref             kref;
> +};
> +
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
> size,
> +                        int order);
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> +
> +int arm_iommu_attach_device(struct device *dev,
> +                                       struct dma_iommu_mapping *mapping);
> +
> +#endif /* __KERNEL__ */
> +#endif
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index d4aad65..2d11aa0 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -19,6 +19,8 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/highmem.h>
>  #include <linux/slab.h>
> +#include <linux/iommu.h>
> +#include <linux/vmalloc.h>
>
>  #include <asm/memory.h>
>  #include <asm/highmem.h>
> @@ -26,6 +28,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sizes.h>
>  #include <asm/mach/arch.h>
> +#include <asm/dma-iommu.h>
>
>  #include "mm.h"
>
> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>        return mask;
>  }
>
> +static void __dma_clear_buffer(struct page *page, size_t size)
> +{
> +       void *ptr;
> +       /*
> +        * Ensure that the allocated pages are zeroed, and that any data
> +        * lurking in the kernel direct-mapped region is invalidated.
> +        */
> +       ptr = page_address(page);
> +       if (ptr) {
> +               memset(ptr, 0, size);
> +               dmac_flush_range(ptr, ptr + size);
> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       }
> +}
> +
>  /*
>  * Allocate a DMA buffer for 'dev' of size 'size' using the
>  * specified gfp mask.  Note that 'size' must be page aligned.
> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device
> *dev, size_t size, gfp_t gf
>  {
>        unsigned long order = get_order(size);
>        struct page *page, *p, *e;
> -       void *ptr;
>        u64 mask = get_coherent_dma_mask(dev);
>
>  #ifdef CONFIG_DMA_API_DEBUG
> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device
> *dev, size_t size, gfp_t gf
>        for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p <
> e; p++)
>                __free_page(p);
>
> -       /*
> -        * Ensure that the allocated pages are zeroed, and that any data
> -        * lurking in the kernel direct-mapped region is invalidated.
> -        */
> -       ptr = page_address(page);
> -       memset(ptr, 0, size);
> -       dmac_flush_range(ptr, ptr + size);
> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> +       __dma_clear_buffer(page, size);
>
>        return page;
>  }
> @@ -348,7 +358,7 @@ __dma_alloc_remap(struct page *page, size_t size,
> gfp_t gfp, pgprot_t prot,
>                u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
>
>                pte = consistent_pte[idx] + off;
> -               c->vm_pages = page;
> +               c->priv = page;
>
>                do {
>                        BUG_ON(!pte_none(*pte));
> @@ -461,6 +471,14 @@ __dma_alloc(struct device *dev, size_t size,
> dma_addr_t *handle, gfp_t gfp,
>        return addr;
>  }
>
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t
> prot)
> +{
> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> +                           pgprot_writecombine(prot) :
> +                           pgprot_dmacoherent(prot);
> +       return prot;
> +}
> +
>  /*
>  * Allocate DMA-coherent memory space and return both the kernel remapped
>  * virtual and bus address for that space.
> @@ -468,9 +486,7 @@ __dma_alloc(struct device *dev, size_t size,
> dma_addr_t *handle, gfp_t gfp,
>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>                    gfp_t gfp, struct dma_attrs *attrs)
>  {
> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> -                       pgprot_writecombine(pgprot_kernel) :
> -                       pgprot_dmacoherent(pgprot_kernel);
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>        void *memory;
>
>        if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -497,16 +513,20 @@ int arm_dma_mmap(struct device *dev, struct
> vm_area_struct *vma,
>                            pgprot_writecombine(vma->vm_page_prot) :
>                            pgprot_dmacoherent(vma->vm_page_prot);
>
> +       if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
> +               return ret;
> +
>        c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>        if (c) {
>                unsigned long off = vma->vm_pgoff;
> +               struct page *pages = c->priv;
>
>                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>
>                if (off < kern_size &&
>                    user_size <= (kern_size - off)) {
>                        ret = remap_pfn_range(vma, vma->vm_start,
> -                                             page_to_pfn(c->vm_pages) +
> off,
> +                                             page_to_pfn(pages) + off,
>                                              user_size << PAGE_SHIFT,
>                                              vma->vm_page_prot);
>                }
> @@ -645,6 +665,9 @@ int arm_dma_map_sg(struct device *dev, struct
> scatterlist *sg, int nents,
>        int i, j;
>
>        for_each_sg(sg, s, nents, i) {
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> +               s->dma_length = s->length;
> +#endif
>                s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>                                                s->length, dir, attrs);
>                if (dma_mapping_error(dev, s->dma_address))
> @@ -753,3 +776,679 @@ static int __init dma_debug_do_init(void)
>        return 0;
>  }
>  fs_initcall(dma_debug_do_init);
> +
> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> +
> +/* IOMMU */
> +
> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
> +                                     size_t size)
> +{
> +       unsigned int order = get_order(size);
> +       unsigned int align = 0;
> +       unsigned int count, start;
> +       unsigned long flags;
> +
> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> +                (1 << mapping->order) - 1) >> mapping->order;
> +
> +       if (order > mapping->order)
> +               align = (1 << (order - mapping->order)) - 1;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits,
> 0,
> +                                          count, align);
> +       if (start > mapping->bits) {
> +               spin_unlock_irqrestore(&mapping->lock, flags);
> +               return DMA_ERROR_CODE;
> +       }
> +
> +       bitmap_set(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +
> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> +}
> +
> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> +                              dma_addr_t addr, size_t size)
> +{
> +       unsigned int start = (addr - mapping->base) >>
> +                            (mapping->order + PAGE_SHIFT);
> +       unsigned int count = ((size >> PAGE_SHIFT) +
> +                             (1 << mapping->order) - 1) >> mapping->order;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&mapping->lock, flags);
> +       bitmap_clear(mapping->bitmap, start, count);
> +       spin_unlock_irqrestore(&mapping->lock, flags);
> +}
> +
> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t
> size, gfp_t gfp)
> +{
> +       struct page **pages;
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i = 0;
> +
> +       if (array_size <= PAGE_SIZE)
> +               pages = kzalloc(array_size, gfp);
> +       else
> +               pages = vzalloc(array_size);
> +       if (!pages)
> +               return NULL;
> +
> +       while (count) {
> +               int j, order = __ffs(count);
> +
> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> +               while (!pages[i] && order)
> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN,
> --order);
> +               if (!pages[i])
> +                       goto error;
> +
> +               if (order)
> +                       split_page(pages[i], order);
> +               j = 1 << order;
> +               while (--j)
> +                       pages[i + j] = pages[i] + j;
> +
> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> +               i += 1 << order;
> +               count -= 1 << order;
> +       }
> +
> +       return pages;
> +error:
> +       while (--i)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return NULL;
> +}
> +
> +static int __iommu_free_buffer(struct device *dev, struct page **pages,
> size_t size)
> +{
> +       int count = size >> PAGE_SHIFT;
> +       int array_size = count * sizeof(struct page *);
> +       int i;
> +       for (i = 0; i < count; i++)
> +               if (pages[i])
> +                       __free_pages(pages[i], 0);
> +       if (array_size < PAGE_SIZE)
> +               kfree(pages);
> +       else
> +               vfree(pages);
> +       return 0;
> +}
> +
> +/*
> + * Create a CPU mapping for a specified pages
> + */
> +static void *
> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t
> prot)
> +{
> +       struct arm_vmregion *c;
> +       size_t align;
> +       size_t count = size >> PAGE_SHIFT;
> +       int bit;
> +
> +       if (!consistent_pte[0]) {
> +               pr_err("%s: not initialised\n", __func__);
> +               dump_stack();
> +               return NULL;
> +       }
> +
> +       /*
> +        * Align the virtual region allocation - maximum alignment is
> +        * a section size, minimum is a page size.  This helps reduce
> +        * fragmentation of the DMA space, and also prevents allocations
> +        * smaller than a section from crossing a section boundary.
> +        */
> +       bit = fls(size - 1);
> +       if (bit > SECTION_SHIFT)
> +               bit = SECTION_SHIFT;
> +       align = 1 << bit;
> +
> +       /*
> +        * Allocate a virtual address in the consistent mapping region.
> +        */
> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
> +       if (c) {
> +               pte_t *pte;
> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> +               int i = 0;
> +               u32 off = CONSISTENT_OFFSET(c->vm_start) &
> (PTRS_PER_PTE-1);
> +
> +               pte = consistent_pte[idx] + off;
> +               c->priv = pages;
> +
> +               do {
> +                       BUG_ON(!pte_none(*pte));
> +
> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> +                       pte++;
> +                       off++;
> +                       i++;
> +                       if (off >= PTRS_PER_PTE) {
> +                               off = 0;
> +                               pte = consistent_pte[++idx];
> +                       }
> +               } while (i < count);
> +
> +               dsb();
> +
> +               return (void *)c->vm_start;
> +       }
> +       return NULL;
> +}
> +
> +/*
> + * Create a mapping in device IO address space for specified pages
> + */
> +static dma_addr_t
> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t
> size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +       dma_addr_t dma_addr, iova;
> +       int i, ret = DMA_ERROR_CODE;
> +
> +       dma_addr = __alloc_iova(mapping, size);
> +       if (dma_addr == DMA_ERROR_CODE)
> +               return dma_addr;
> +
> +       iova = dma_addr;
> +       for (i = 0; i < count; ) {
> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> +               phys_addr_t phys = page_to_phys(pages[i]);
> +               unsigned int len, j;
> +
> +               for (j = i + 1; j < count; j++, next_pfn++)
> +                       if (page_to_pfn(pages[j]) != next_pfn)
> +                               break;
> +
> +               len = (j - i) << PAGE_SHIFT;
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               iova += len;
> +               i = j;
> +       }
> +       return dma_addr;
> +fail:
> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> +       __free_iova(mapping, dma_addr, size);
> +       return DMA_ERROR_CODE;
> +}
> +
> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova,
> size_t size)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +
> +       /*
> +        * add optional in-page offset from iova to size and align
> +        * result to page size
> +        */
> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> +       iova &= PAGE_MASK;
> +
> +       iommu_unmap(mapping->domain, iova, size);
> +       __free_iova(mapping, iova, size);
> +       return 0;
> +}
> +
> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> +       struct page **pages;
> +       void *addr = NULL;
> +
> +       *handle = DMA_ERROR_CODE;
> +       size = PAGE_ALIGN(size);
> +
> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> +       if (!pages)
> +               return NULL;
> +
> +       *handle = __iommu_create_mapping(dev, pages, size);
> +       if (*handle == DMA_ERROR_CODE)
> +               goto err_buffer;
> +
> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> +       if (!addr)
> +               goto err_mapping;
> +
> +       return addr;
> +
> +err_mapping:
> +       __iommu_remove_mapping(dev, *handle, size);
> +err_buffer:
> +       __iommu_free_buffer(dev, pages, size);
> +       return NULL;
> +}
> +
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct
> *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n",
> ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
> +
> +/*
> + * free a page as defined by the above mapping.
> + * Must not be called with IRQs disabled.
> + */
> +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> +                         dma_addr_t handle, struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +       size = PAGE_ALIGN(size);
> +
> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> +       if (c) {
> +               struct page **pages = c->priv;
> +               __dma_free_remap(cpu_addr, size);
> +               __iommu_remove_mapping(dev, handle, size);
> +               __iommu_free_buffer(dev, pages, size);
> +       }
> +}
> +
> +/*
> + * Map a part of the scatter-gather list into contiguous io address space
> + */
> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> +                         size_t size, dma_addr_t *handle,
> +                         enum dma_data_direction dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova, iova_base;
> +       int ret = 0;
> +       unsigned int count;
> +       struct scatterlist *s;
> +
> +       size = PAGE_ALIGN(size);
> +       *handle = DMA_ERROR_CODE;
> +
> +       iova_base = iova = __alloc_iova(mapping, size);
> +       if (iova == DMA_ERROR_CODE)
> +               return -ENOMEM;
> +
> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s =
> sg_next(s)) {
> +               phys_addr_t phys = page_to_phys(sg_page(s));
> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> +
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
> s->length, dir);
> +
> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> +               if (ret < 0)
> +                       goto fail;
> +               count += len >> PAGE_SHIFT;
> +               iova += len;
> +       }
> +       *handle = iova_base;
> +
> +       return 0;
> +fail:
> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> +       __free_iova(mapping, iova_base, size);
> +       return ret;
> +}
> +
> +/**
> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map
> + * @dir: DMA transfer direction
> + *
> + * Map a set of buffers described by scatterlist in streaming mode for
> DMA.
> + * The scatter gather list elements are merged together (if possible) and
> + * tagged with the appropriate dma address and length. They are obtained
> via
> + * sg_dma_{address,length}.
> + */
> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int
> nents,
> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
> +{
> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> +       int i, count = 0;
> +       unsigned int offset = s->offset;
> +       unsigned int size = s->offset + s->length;
> +       unsigned int max = dma_get_max_seg_size(dev);
> +
> +       for (i = 1; i < nents; i++) {
> +               s = sg_next(s);
> +
> +               s->dma_address = DMA_ERROR_CODE;
> +               s->dma_length = 0;
> +
> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length >
> max) {
> +                       if (__map_sg_chunk(dev, start, size,
> &dma->dma_address,
> +                           dir) < 0)
> +                               goto bad_mapping;
> +
> +                       dma->dma_address += offset;
> +                       dma->dma_length = size - offset;
> +
> +                       size = offset = s->offset;
> +                       start = s;
> +                       dma = sg_next(dma);
> +                       count += 1;
> +               }
> +               size += s->length;
> +       }
> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
> +               goto bad_mapping;
> +
> +       dma->dma_address += offset;
> +       dma->dma_length = size - offset;
> +
> +       return count+1;
> +
> +bad_mapping:
> +       for_each_sg(sg, s, count, i)
> +               __iommu_remove_mapping(dev, sg_dma_address(s),
> sg_dma_len(s));
> +       return 0;
> +}
> +
> +/**
> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + *
> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> + * rules concerning calls here are the same as for dma_unmap_single().
> + */
> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int
> nents,
> +                       enum dma_data_direction dir, struct dma_attrs
> *attrs)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i) {
> +               if (sg_dma_len(s))
> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> +                                              sg_dma_len(s));
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> +                                             s->length, dir);
> +       }
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_cpu
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> s->length, dir);
> +
> +}
> +
> +/**
> + * arm_iommu_sync_sg_for_device
> + * @dev: valid struct device pointer
> + * @sg: list of buffers
> + * @nents: number of buffers to map (returned from dma_map_sg)
> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> + */
> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist
> *sg,
> +                       int nents, enum dma_data_direction dir)
> +{
> +       struct scatterlist *s;
> +       int i;
> +
> +       for_each_sg(sg, s, nents, i)
> +               if (!arch_is_coherent())
> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
> s->length, dir);
> +}
> +
> +
> +/**
> + * arm_iommu_map_page
> + * @dev: valid struct device pointer
> + * @page: page that buffer resides in
> + * @offset: offset into page for start of buffer
> + * @size: size of buffer to map
> + * @dir: DMA transfer direction
> + *
> + * IOMMU aware version of arm_dma_map_page()
> + */
> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page
> *page,
> +            unsigned long offset, size_t size, enum dma_data_direction
> dir,
> +            struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t dma_addr;
> +       int ret, len = PAGE_ALIGN(size + offset);
> +
> +       if (!arch_is_coherent())
> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> +
> +       dma_addr = __alloc_iova(mapping, len);
> +       if (dma_addr == DMA_ERROR_CODE)
> +               return dma_addr;
> +
> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page),
> len, 0);
> +       if (ret < 0)
> +               goto fail;
> +
> +       return dma_addr + offset;
> +fail:
> +       __free_iova(mapping, dma_addr, len);
> +       return DMA_ERROR_CODE;
> +}
> +
> +/**
> + * arm_iommu_unmap_page
> + * @dev: valid struct device pointer
> + * @handle: DMA address of buffer
> + * @size: size of buffer (same as passed to dma_map_page)
> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> + *
> + * IOMMU aware version of arm_dma_unmap_page()
> + */
> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> +               size_t size, enum dma_data_direction dir,
> +               struct dma_attrs *attrs)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page =
> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       int offset = handle & ~PAGE_MASK;
> +       int len = PAGE_ALIGN(size + offset);
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +
> +       iommu_unmap(mapping->domain, iova, len);
> +       __free_iova(mapping, iova, len);
> +}
> +
> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction
> dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page =
> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       if (!arch_is_coherent())
> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> +}
> +
> +static void arm_iommu_sync_single_for_device(struct device *dev,
> +               dma_addr_t handle, size_t size, enum dma_data_direction
> dir)
> +{
> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> +       dma_addr_t iova = handle & PAGE_MASK;
> +       struct page *page =
> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> +       unsigned int offset = handle & ~PAGE_MASK;
> +
> +       if (!iova)
> +               return;
> +
> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> +}
> +
> +struct dma_map_ops iommu_ops = {
> +       .alloc          = arm_iommu_alloc_attrs,
> +       .free           = arm_iommu_free_attrs,
> +       .mmap           = arm_iommu_mmap_attrs,
> +
> +       .map_page               = arm_iommu_map_page,
> +       .unmap_page             = arm_iommu_unmap_page,
> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> +
> +       .map_sg                 = arm_iommu_map_sg,
> +       .unmap_sg               = arm_iommu_unmap_sg,
> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> +};
> +
> +/**
> + * arm_iommu_create_mapping
> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> + * @base: start address of the valid IO address space
> + * @size: size of the valid IO address space
> + * @order: accuracy of the IO addresses allocations
> + *
> + * Creates a mapping structure which holds information about used/unused
> + * IO address ranges, which is required to perform memory allocation and
> + * mapping with IOMMU aware functions.
> + *
> + * The client device need to be attached to the mapping with
> + * arm_iommu_attach_device function.
> + */
> +struct dma_iommu_mapping *
> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
> size,
> +                        int order)
> +{
> +       unsigned int count = size >> (PAGE_SHIFT + order);
> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> +       struct dma_iommu_mapping *mapping;
> +       int err = -ENOMEM;
> +
> +       if (!count)
> +               return ERR_PTR(-EINVAL);
> +
> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> +       if (!mapping)
> +               goto err;
> +
> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> +       if (!mapping->bitmap)
> +               goto err2;
> +
> +       mapping->base = base;
> +       mapping->bits = BITS_PER_BYTE * bitmap_size;
> +       mapping->order = order;
> +       spin_lock_init(&mapping->lock);
> +
> +       mapping->domain = iommu_domain_alloc(bus);
> +       if (!mapping->domain)
> +               goto err3;
> +
> +       kref_init(&mapping->kref);
> +       return mapping;
> +err3:
> +       kfree(mapping->bitmap);
> +err2:
> +       kfree(mapping);
> +err:
> +       return ERR_PTR(err);
> +}
> +
> +static void release_iommu_mapping(struct kref *kref)
> +{
> +       struct dma_iommu_mapping *mapping =
> +               container_of(kref, struct dma_iommu_mapping, kref);
> +
> +       iommu_domain_free(mapping->domain);
> +       kfree(mapping->bitmap);
> +       kfree(mapping);
> +}
> +
> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
> +{
> +       if (mapping)
> +               kref_put(&mapping->kref, release_iommu_mapping);
> +}
> +
> +/**
> + * arm_iommu_attach_device
> + * @dev: valid struct device pointer
> + * @mapping: io address space mapping structure (returned from
> + *     arm_iommu_create_mapping)
> + *
> + * Attaches specified io address space mapping to the provided device,
> + * this replaces the dma operations (dma_map_ops pointer) with the
> + * IOMMU aware version. More than one client might be attached to
> + * the same io address space mapping.
> + */
> +int arm_iommu_attach_device(struct device *dev,
> +                           struct dma_iommu_mapping *mapping)
> +{
> +       int err;
> +
> +       err = iommu_attach_device(mapping->domain, dev);
> +       if (err)
> +               return err;
> +
> +       kref_get(&mapping->kref);
> +       dev->archdata.mapping = mapping;
> +       set_dma_ops(dev, &iommu_ops);
> +
> +       pr_info("Attached IOMMU controller to %s device.\n",
> dev_name(dev));
> +       return 0;
> +}
> +
> +#endif
> diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
> index 162be66..bf312c3 100644
> --- a/arch/arm/mm/vmregion.h
> +++ b/arch/arm/mm/vmregion.h
> @@ -17,7 +17,7 @@ struct arm_vmregion {
>        struct list_head        vm_list;
>        unsigned long           vm_start;
>        unsigned long           vm_end;
> -       struct page             *vm_pages;
> +       void                    *priv;
>        int                     vm_active;
>        const void              *caller;
>  };
> --
> 1.7.1.569.g6f426
>
>
> _______________________________________________
> Linaro-mm-sig mailing list
> Linaro-mm-sig@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/linaro-mm-sig
>
Abhinav Kochhar April 20, 2012, 1:48 a.m. UTC | #2
Even "size_t size" is unused

On Fri, Apr 20, 2012 at 10:44 AM, Abhinav Kochhar <kochhar.abhinav@gmail.com
> wrote:

> Hi Marek,
>
> dma_addr_t dma_addr is an unused argument passed to the function
> arm_iommu_mmap_attrs
>
>
> +static int arm_iommu_mmap_attrs(struct device *dev, struct
> vm_area_struct *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_
> head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n",
> ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
>
>
> On Wed, Apr 18, 2012 at 10:44 PM, Marek Szyprowski <
> m.szyprowski@samsung.com> wrote:
>
>> This patch add a complete implementation of DMA-mapping API for
>> devices which have IOMMU support.
>>
>> This implementation tries to optimize dma address space usage by remapping
>> all possible physical memory chunks into a single dma address space chunk.
>>
>> DMA address space is managed on top of the bitmap stored in the
>> dma_iommu_mapping structure stored in device->archdata. Platform setup
>> code has to initialize parameters of the dma address space (base address,
>> size, allocation precision order) with arm_iommu_create_mapping()
>> function.
>> To reduce the size of the bitmap, all allocations are aligned to the
>> specified order of base 4 KiB pages.
>>
>> dma_alloc_* functions allocate physical memory in chunks, each with
>> alloc_pages() function to avoid failing if the physical memory gets
>> fragmented. In worst case the allocated buffer is composed of 4 KiB page
>> chunks.
>>
>> dma_map_sg() function minimizes the total number of dma address space
>> chunks by merging of physical memory chunks into one larger dma address
>> space chunk. If requested chunk (scatter list entry) boundaries
>> match physical page boundaries, most calls to dma_map_sg() requests will
>> result in creating only one chunk in dma address space.
>>
>> dma_map_page() simply creates a mapping for the given page(s) in the dma
>> address space.
>>
>> All dma functions also perform required cache operation like their
>> counterparts from the arm linear physical memory mapping version.
>>
>> This patch contains code and fixes kindly provided by:
>> - Krishna Reddy <vdumpa@nvidia.com>,
>> - Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
>> - Hiroshi DOYU <hdoyu@nvidia.com>
>>
>> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
>> Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
>> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>> Tested-By: Subash Patel <subash.ramaswamy@linaro.org>
>> ---
>>  arch/arm/Kconfig                 |    8 +
>>  arch/arm/include/asm/device.h    |    3 +
>>  arch/arm/include/asm/dma-iommu.h |   34 ++
>>  arch/arm/mm/dma-mapping.c        |  727
>> +++++++++++++++++++++++++++++++++++++-
>>  arch/arm/mm/vmregion.h           |    2 +-
>>  5 files changed, 759 insertions(+), 15 deletions(-)
>>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>>
>> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
>> index 0fd27d4..874e519 100644
>> --- a/arch/arm/Kconfig
>> +++ b/arch/arm/Kconfig
>> @@ -46,6 +46,14 @@ config ARM
>>  config ARM_HAS_SG_CHAIN
>>        bool
>>
>> +config NEED_SG_DMA_LENGTH
>> +       bool
>> +
>> +config ARM_DMA_USE_IOMMU
>> +       select NEED_SG_DMA_LENGTH
>> +       select ARM_HAS_SG_CHAIN
>> +       bool
>> +
>>  config HAVE_PWM
>>        bool
>>
>> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
>> index 6e2cb0e..b69c0d3 100644
>> --- a/arch/arm/include/asm/device.h
>> +++ b/arch/arm/include/asm/device.h
>> @@ -14,6 +14,9 @@ struct dev_archdata {
>>  #ifdef CONFIG_IOMMU_API
>>        void *iommu; /* private IOMMU data */
>>  #endif
>> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
>> +       struct dma_iommu_mapping        *mapping;
>> +#endif
>>  };
>>
>>  struct omap_device;
>> diff --git a/arch/arm/include/asm/dma-iommu.h
>> b/arch/arm/include/asm/dma-iommu.h
>> new file mode 100644
>> index 0000000..799b094
>> --- /dev/null
>> +++ b/arch/arm/include/asm/dma-iommu.h
>> @@ -0,0 +1,34 @@
>> +#ifndef ASMARM_DMA_IOMMU_H
>> +#define ASMARM_DMA_IOMMU_H
>> +
>> +#ifdef __KERNEL__
>> +
>> +#include <linux/mm_types.h>
>> +#include <linux/scatterlist.h>
>> +#include <linux/dma-debug.h>
>> +#include <linux/kmemcheck.h>
>> +
>> +struct dma_iommu_mapping {
>> +       /* iommu specific data */
>> +       struct iommu_domain     *domain;
>> +
>> +       void                    *bitmap;
>> +       size_t                  bits;
>> +       unsigned int            order;
>> +       dma_addr_t              base;
>> +
>> +       spinlock_t              lock;
>> +       struct kref             kref;
>> +};
>> +
>> +struct dma_iommu_mapping *
>> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
>> size,
>> +                        int order);
>> +
>> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
>> +
>> +int arm_iommu_attach_device(struct device *dev,
>> +                                       struct dma_iommu_mapping
>> *mapping);
>> +
>> +#endif /* __KERNEL__ */
>> +#endif
>> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
>> index d4aad65..2d11aa0 100644
>> --- a/arch/arm/mm/dma-mapping.c
>> +++ b/arch/arm/mm/dma-mapping.c
>> @@ -19,6 +19,8 @@
>>  #include <linux/dma-mapping.h>
>>  #include <linux/highmem.h>
>>  #include <linux/slab.h>
>> +#include <linux/iommu.h>
>> +#include <linux/vmalloc.h>
>>
>>  #include <asm/memory.h>
>>  #include <asm/highmem.h>
>> @@ -26,6 +28,7 @@
>>  #include <asm/tlbflush.h>
>>  #include <asm/sizes.h>
>>  #include <asm/mach/arch.h>
>> +#include <asm/dma-iommu.h>
>>
>>  #include "mm.h"
>>
>> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>>        return mask;
>>  }
>>
>> +static void __dma_clear_buffer(struct page *page, size_t size)
>> +{
>> +       void *ptr;
>> +       /*
>> +        * Ensure that the allocated pages are zeroed, and that any data
>> +        * lurking in the kernel direct-mapped region is invalidated.
>> +        */
>> +       ptr = page_address(page);
>> +       if (ptr) {
>> +               memset(ptr, 0, size);
>> +               dmac_flush_range(ptr, ptr + size);
>> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
>> +       }
>> +}
>> +
>>  /*
>>  * Allocate a DMA buffer for 'dev' of size 'size' using the
>>  * specified gfp mask.  Note that 'size' must be page aligned.
>> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device
>> *dev, size_t size, gfp_t gf
>>  {
>>        unsigned long order = get_order(size);
>>        struct page *page, *p, *e;
>> -       void *ptr;
>>        u64 mask = get_coherent_dma_mask(dev);
>>
>>  #ifdef CONFIG_DMA_API_DEBUG
>> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device
>> *dev, size_t size, gfp_t gf
>>        for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p <
>> e; p++)
>>                __free_page(p);
>>
>> -       /*
>> -        * Ensure that the allocated pages are zeroed, and that any data
>> -        * lurking in the kernel direct-mapped region is invalidated.
>> -        */
>> -       ptr = page_address(page);
>> -       memset(ptr, 0, size);
>> -       dmac_flush_range(ptr, ptr + size);
>> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
>> +       __dma_clear_buffer(page, size);
>>
>>        return page;
>>  }
>> @@ -348,7 +358,7 @@ __dma_alloc_remap(struct page *page, size_t size,
>> gfp_t gfp, pgprot_t prot,
>>                u32 off = CONSISTENT_OFFSET(c->vm_start) &
>> (PTRS_PER_PTE-1);
>>
>>                pte = consistent_pte[idx] + off;
>> -               c->vm_pages = page;
>> +               c->priv = page;
>>
>>                do {
>>                        BUG_ON(!pte_none(*pte));
>> @@ -461,6 +471,14 @@ __dma_alloc(struct device *dev, size_t size,
>> dma_addr_t *handle, gfp_t gfp,
>>        return addr;
>>  }
>>
>> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs,
>> pgprot_t prot)
>> +{
>> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
>> +                           pgprot_writecombine(prot) :
>> +                           pgprot_dmacoherent(prot);
>> +       return prot;
>> +}
>> +
>>  /*
>>  * Allocate DMA-coherent memory space and return both the kernel remapped
>>  * virtual and bus address for that space.
>> @@ -468,9 +486,7 @@ __dma_alloc(struct device *dev, size_t size,
>> dma_addr_t *handle, gfp_t gfp,
>>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>>                    gfp_t gfp, struct dma_attrs *attrs)
>>  {
>> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
>> -                       pgprot_writecombine(pgprot_kernel) :
>> -                       pgprot_dmacoherent(pgprot_kernel);
>> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>>        void *memory;
>>
>>        if (dma_alloc_from_coherent(dev, size, handle, &memory))
>> @@ -497,16 +513,20 @@ int arm_dma_mmap(struct device *dev, struct
>> vm_area_struct *vma,
>>                            pgprot_writecombine(vma->vm_page_prot) :
>>                            pgprot_dmacoherent(vma->vm_page_prot);
>>
>> +       if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
>> +               return ret;
>> +
>>        c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>>        if (c) {
>>                unsigned long off = vma->vm_pgoff;
>> +               struct page *pages = c->priv;
>>
>>                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>>
>>                if (off < kern_size &&
>>                    user_size <= (kern_size - off)) {
>>                        ret = remap_pfn_range(vma, vma->vm_start,
>> -                                             page_to_pfn(c->vm_pages) +
>> off,
>> +                                             page_to_pfn(pages) + off,
>>                                              user_size << PAGE_SHIFT,
>>                                              vma->vm_page_prot);
>>                }
>> @@ -645,6 +665,9 @@ int arm_dma_map_sg(struct device *dev, struct
>> scatterlist *sg, int nents,
>>        int i, j;
>>
>>        for_each_sg(sg, s, nents, i) {
>> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
>> +               s->dma_length = s->length;
>> +#endif
>>                s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>>                                                s->length, dir, attrs);
>>                if (dma_mapping_error(dev, s->dma_address))
>> @@ -753,3 +776,679 @@ static int __init dma_debug_do_init(void)
>>        return 0;
>>  }
>>  fs_initcall(dma_debug_do_init);
>> +
>> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
>> +
>> +/* IOMMU */
>> +
>> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
>> +                                     size_t size)
>> +{
>> +       unsigned int order = get_order(size);
>> +       unsigned int align = 0;
>> +       unsigned int count, start;
>> +       unsigned long flags;
>> +
>> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
>> +                (1 << mapping->order) - 1) >> mapping->order;
>> +
>> +       if (order > mapping->order)
>> +               align = (1 << (order - mapping->order)) - 1;
>> +
>> +       spin_lock_irqsave(&mapping->lock, flags);
>> +       start = bitmap_find_next_zero_area(mapping->bitmap,
>> mapping->bits, 0,
>> +                                          count, align);
>> +       if (start > mapping->bits) {
>> +               spin_unlock_irqrestore(&mapping->lock, flags);
>> +               return DMA_ERROR_CODE;
>> +       }
>> +
>> +       bitmap_set(mapping->bitmap, start, count);
>> +       spin_unlock_irqrestore(&mapping->lock, flags);
>> +
>> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
>> +}
>> +
>> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
>> +                              dma_addr_t addr, size_t size)
>> +{
>> +       unsigned int start = (addr - mapping->base) >>
>> +                            (mapping->order + PAGE_SHIFT);
>> +       unsigned int count = ((size >> PAGE_SHIFT) +
>> +                             (1 << mapping->order) - 1) >>
>> mapping->order;
>> +       unsigned long flags;
>> +
>> +       spin_lock_irqsave(&mapping->lock, flags);
>> +       bitmap_clear(mapping->bitmap, start, count);
>> +       spin_unlock_irqrestore(&mapping->lock, flags);
>> +}
>> +
>> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t
>> size, gfp_t gfp)
>> +{
>> +       struct page **pages;
>> +       int count = size >> PAGE_SHIFT;
>> +       int array_size = count * sizeof(struct page *);
>> +       int i = 0;
>> +
>> +       if (array_size <= PAGE_SIZE)
>> +               pages = kzalloc(array_size, gfp);
>> +       else
>> +               pages = vzalloc(array_size);
>> +       if (!pages)
>> +               return NULL;
>> +
>> +       while (count) {
>> +               int j, order = __ffs(count);
>> +
>> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
>> +               while (!pages[i] && order)
>> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN,
>> --order);
>> +               if (!pages[i])
>> +                       goto error;
>> +
>> +               if (order)
>> +                       split_page(pages[i], order);
>> +               j = 1 << order;
>> +               while (--j)
>> +                       pages[i + j] = pages[i] + j;
>> +
>> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
>> +               i += 1 << order;
>> +               count -= 1 << order;
>> +       }
>> +
>> +       return pages;
>> +error:
>> +       while (--i)
>> +               if (pages[i])
>> +                       __free_pages(pages[i], 0);
>> +       if (array_size < PAGE_SIZE)
>> +               kfree(pages);
>> +       else
>> +               vfree(pages);
>> +       return NULL;
>> +}
>> +
>> +static int __iommu_free_buffer(struct device *dev, struct page **pages,
>> size_t size)
>> +{
>> +       int count = size >> PAGE_SHIFT;
>> +       int array_size = count * sizeof(struct page *);
>> +       int i;
>> +       for (i = 0; i < count; i++)
>> +               if (pages[i])
>> +                       __free_pages(pages[i], 0);
>> +       if (array_size < PAGE_SIZE)
>> +               kfree(pages);
>> +       else
>> +               vfree(pages);
>> +       return 0;
>> +}
>> +
>> +/*
>> + * Create a CPU mapping for a specified pages
>> + */
>> +static void *
>> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp,
>> pgprot_t prot)
>> +{
>> +       struct arm_vmregion *c;
>> +       size_t align;
>> +       size_t count = size >> PAGE_SHIFT;
>> +       int bit;
>> +
>> +       if (!consistent_pte[0]) {
>> +               pr_err("%s: not initialised\n", __func__);
>> +               dump_stack();
>> +               return NULL;
>> +       }
>> +
>> +       /*
>> +        * Align the virtual region allocation - maximum alignment is
>> +        * a section size, minimum is a page size.  This helps reduce
>> +        * fragmentation of the DMA space, and also prevents allocations
>> +        * smaller than a section from crossing a section boundary.
>> +        */
>> +       bit = fls(size - 1);
>> +       if (bit > SECTION_SHIFT)
>> +               bit = SECTION_SHIFT;
>> +       align = 1 << bit;
>> +
>> +       /*
>> +        * Allocate a virtual address in the consistent mapping region.
>> +        */
>> +       c = arm_vmregion_alloc(&consistent_head, align, size,
>> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
>> +       if (c) {
>> +               pte_t *pte;
>> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
>> +               int i = 0;
>> +               u32 off = CONSISTENT_OFFSET(c->vm_start) &
>> (PTRS_PER_PTE-1);
>> +
>> +               pte = consistent_pte[idx] + off;
>> +               c->priv = pages;
>> +
>> +               do {
>> +                       BUG_ON(!pte_none(*pte));
>> +
>> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
>> +                       pte++;
>> +                       off++;
>> +                       i++;
>> +                       if (off >= PTRS_PER_PTE) {
>> +                               off = 0;
>> +                               pte = consistent_pte[++idx];
>> +                       }
>> +               } while (i < count);
>> +
>> +               dsb();
>> +
>> +               return (void *)c->vm_start;
>> +       }
>> +       return NULL;
>> +}
>> +
>> +/*
>> + * Create a mapping in device IO address space for specified pages
>> + */
>> +static dma_addr_t
>> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t
>> size)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>> +       dma_addr_t dma_addr, iova;
>> +       int i, ret = DMA_ERROR_CODE;
>> +
>> +       dma_addr = __alloc_iova(mapping, size);
>> +       if (dma_addr == DMA_ERROR_CODE)
>> +               return dma_addr;
>> +
>> +       iova = dma_addr;
>> +       for (i = 0; i < count; ) {
>> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
>> +               phys_addr_t phys = page_to_phys(pages[i]);
>> +               unsigned int len, j;
>> +
>> +               for (j = i + 1; j < count; j++, next_pfn++)
>> +                       if (page_to_pfn(pages[j]) != next_pfn)
>> +                               break;
>> +
>> +               len = (j - i) << PAGE_SHIFT;
>> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
>> +               if (ret < 0)
>> +                       goto fail;
>> +               iova += len;
>> +               i = j;
>> +       }
>> +       return dma_addr;
>> +fail:
>> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
>> +       __free_iova(mapping, dma_addr, size);
>> +       return DMA_ERROR_CODE;
>> +}
>> +
>> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova,
>> size_t size)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +
>> +       /*
>> +        * add optional in-page offset from iova to size and align
>> +        * result to page size
>> +        */
>> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
>> +       iova &= PAGE_MASK;
>> +
>> +       iommu_unmap(mapping->domain, iova, size);
>> +       __free_iova(mapping, iova, size);
>> +       return 0;
>> +}
>> +
>> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
>> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
>> +{
>> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>> +       struct page **pages;
>> +       void *addr = NULL;
>> +
>> +       *handle = DMA_ERROR_CODE;
>> +       size = PAGE_ALIGN(size);
>> +
>> +       pages = __iommu_alloc_buffer(dev, size, gfp);
>> +       if (!pages)
>> +               return NULL;
>> +
>> +       *handle = __iommu_create_mapping(dev, pages, size);
>> +       if (*handle == DMA_ERROR_CODE)
>> +               goto err_buffer;
>> +
>> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
>> +       if (!addr)
>> +               goto err_mapping;
>> +
>> +       return addr;
>> +
>> +err_mapping:
>> +       __iommu_remove_mapping(dev, *handle, size);
>> +err_buffer:
>> +       __iommu_free_buffer(dev, pages, size);
>> +       return NULL;
>> +}
>> +
>> +static int arm_iommu_mmap_attrs(struct device *dev, struct
>> vm_area_struct *vma,
>> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
>> +                   struct dma_attrs *attrs)
>> +{
>> +       struct arm_vmregion *c;
>> +
>> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
>> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>> +
>> +       if (c) {
>> +               struct page **pages = c->priv;
>> +
>> +               unsigned long uaddr = vma->vm_start;
>> +               unsigned long usize = vma->vm_end - vma->vm_start;
>> +               int i = 0;
>> +
>> +               do {
>> +                       int ret;
>> +
>> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
>> +                       if (ret) {
>> +                               pr_err("Remapping memory, error: %d\n",
>> ret);
>> +                               return ret;
>> +                       }
>> +
>> +                       uaddr += PAGE_SIZE;
>> +                       usize -= PAGE_SIZE;
>> +               } while (usize > 0);
>> +       }
>> +       return 0;
>> +}
>> +
>> +/*
>> + * free a page as defined by the above mapping.
>> + * Must not be called with IRQs disabled.
>> + */
>> +void arm_iommu_free_attrs(struct device *dev, size_t size, void
>> *cpu_addr,
>> +                         dma_addr_t handle, struct dma_attrs *attrs)
>> +{
>> +       struct arm_vmregion *c;
>> +       size = PAGE_ALIGN(size);
>> +
>> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>> +       if (c) {
>> +               struct page **pages = c->priv;
>> +               __dma_free_remap(cpu_addr, size);
>> +               __iommu_remove_mapping(dev, handle, size);
>> +               __iommu_free_buffer(dev, pages, size);
>> +       }
>> +}
>> +
>> +/*
>> + * Map a part of the scatter-gather list into contiguous io address space
>> + */
>> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
>> +                         size_t size, dma_addr_t *handle,
>> +                         enum dma_data_direction dir)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova, iova_base;
>> +       int ret = 0;
>> +       unsigned int count;
>> +       struct scatterlist *s;
>> +
>> +       size = PAGE_ALIGN(size);
>> +       *handle = DMA_ERROR_CODE;
>> +
>> +       iova_base = iova = __alloc_iova(mapping, size);
>> +       if (iova == DMA_ERROR_CODE)
>> +               return -ENOMEM;
>> +
>> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s =
>> sg_next(s)) {
>> +               phys_addr_t phys = page_to_phys(sg_page(s));
>> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
>> +
>> +               if (!arch_is_coherent())
>> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
>> s->length, dir);
>> +
>> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
>> +               if (ret < 0)
>> +                       goto fail;
>> +               count += len >> PAGE_SHIFT;
>> +               iova += len;
>> +       }
>> +       *handle = iova_base;
>> +
>> +       return 0;
>> +fail:
>> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
>> +       __free_iova(mapping, iova_base, size);
>> +       return ret;
>> +}
>> +
>> +/**
>> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to map
>> + * @dir: DMA transfer direction
>> + *
>> + * Map a set of buffers described by scatterlist in streaming mode for
>> DMA.
>> + * The scatter gather list elements are merged together (if possible) and
>> + * tagged with the appropriate dma address and length. They are obtained
>> via
>> + * sg_dma_{address,length}.
>> + */
>> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int
>> nents,
>> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
>> +{
>> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
>> +       int i, count = 0;
>> +       unsigned int offset = s->offset;
>> +       unsigned int size = s->offset + s->length;
>> +       unsigned int max = dma_get_max_seg_size(dev);
>> +
>> +       for (i = 1; i < nents; i++) {
>> +               s = sg_next(s);
>> +
>> +               s->dma_address = DMA_ERROR_CODE;
>> +               s->dma_length = 0;
>> +
>> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length
>> > max) {
>> +                       if (__map_sg_chunk(dev, start, size,
>> &dma->dma_address,
>> +                           dir) < 0)
>> +                               goto bad_mapping;
>> +
>> +                       dma->dma_address += offset;
>> +                       dma->dma_length = size - offset;
>> +
>> +                       size = offset = s->offset;
>> +                       start = s;
>> +                       dma = sg_next(dma);
>> +                       count += 1;
>> +               }
>> +               size += s->length;
>> +       }
>> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
>> +               goto bad_mapping;
>> +
>> +       dma->dma_address += offset;
>> +       dma->dma_length = size - offset;
>> +
>> +       return count+1;
>> +
>> +bad_mapping:
>> +       for_each_sg(sg, s, count, i)
>> +               __iommu_remove_mapping(dev, sg_dma_address(s),
>> sg_dma_len(s));
>> +       return 0;
>> +}
>> +
>> +/**
>> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
>> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
>> + *
>> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
>> + * rules concerning calls here are the same as for dma_unmap_single().
>> + */
>> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int
>> nents,
>> +                       enum dma_data_direction dir, struct dma_attrs
>> *attrs)
>> +{
>> +       struct scatterlist *s;
>> +       int i;
>> +
>> +       for_each_sg(sg, s, nents, i) {
>> +               if (sg_dma_len(s))
>> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
>> +                                              sg_dma_len(s));
>> +               if (!arch_is_coherent())
>> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
>> +                                             s->length, dir);
>> +       }
>> +}
>> +
>> +/**
>> + * arm_iommu_sync_sg_for_cpu
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to map (returned from dma_map_sg)
>> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
>> + */
>> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist
>> *sg,
>> +                       int nents, enum dma_data_direction dir)
>> +{
>> +       struct scatterlist *s;
>> +       int i;
>> +
>> +       for_each_sg(sg, s, nents, i)
>> +               if (!arch_is_coherent())
>> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
>> s->length, dir);
>> +
>> +}
>> +
>> +/**
>> + * arm_iommu_sync_sg_for_device
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to map (returned from dma_map_sg)
>> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
>> + */
>> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist
>> *sg,
>> +                       int nents, enum dma_data_direction dir)
>> +{
>> +       struct scatterlist *s;
>> +       int i;
>> +
>> +       for_each_sg(sg, s, nents, i)
>> +               if (!arch_is_coherent())
>> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
>> s->length, dir);
>> +}
>> +
>> +
>> +/**
>> + * arm_iommu_map_page
>> + * @dev: valid struct device pointer
>> + * @page: page that buffer resides in
>> + * @offset: offset into page for start of buffer
>> + * @size: size of buffer to map
>> + * @dir: DMA transfer direction
>> + *
>> + * IOMMU aware version of arm_dma_map_page()
>> + */
>> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page
>> *page,
>> +            unsigned long offset, size_t size, enum dma_data_direction
>> dir,
>> +            struct dma_attrs *attrs)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t dma_addr;
>> +       int ret, len = PAGE_ALIGN(size + offset);
>> +
>> +       if (!arch_is_coherent())
>> +               __dma_page_cpu_to_dev(page, offset, size, dir);
>> +
>> +       dma_addr = __alloc_iova(mapping, len);
>> +       if (dma_addr == DMA_ERROR_CODE)
>> +               return dma_addr;
>> +
>> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page),
>> len, 0);
>> +       if (ret < 0)
>> +               goto fail;
>> +
>> +       return dma_addr + offset;
>> +fail:
>> +       __free_iova(mapping, dma_addr, len);
>> +       return DMA_ERROR_CODE;
>> +}
>> +
>> +/**
>> + * arm_iommu_unmap_page
>> + * @dev: valid struct device pointer
>> + * @handle: DMA address of buffer
>> + * @size: size of buffer (same as passed to dma_map_page)
>> + * @dir: DMA transfer direction (same as passed to dma_map_page)
>> + *
>> + * IOMMU aware version of arm_dma_unmap_page()
>> + */
>> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
>> +               size_t size, enum dma_data_direction dir,
>> +               struct dma_attrs *attrs)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova = handle & PAGE_MASK;
>> +       struct page *page =
>> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
>> +       int offset = handle & ~PAGE_MASK;
>> +       int len = PAGE_ALIGN(size + offset);
>> +
>> +       if (!iova)
>> +               return;
>> +
>> +       if (!arch_is_coherent())
>> +               __dma_page_dev_to_cpu(page, offset, size, dir);
>> +
>> +       iommu_unmap(mapping->domain, iova, len);
>> +       __free_iova(mapping, iova, len);
>> +}
>> +
>> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
>> +               dma_addr_t handle, size_t size, enum dma_data_direction
>> dir)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova = handle & PAGE_MASK;
>> +       struct page *page =
>> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
>> +       unsigned int offset = handle & ~PAGE_MASK;
>> +
>> +       if (!iova)
>> +               return;
>> +
>> +       if (!arch_is_coherent())
>> +               __dma_page_dev_to_cpu(page, offset, size, dir);
>> +}
>> +
>> +static void arm_iommu_sync_single_for_device(struct device *dev,
>> +               dma_addr_t handle, size_t size, enum dma_data_direction
>> dir)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova = handle & PAGE_MASK;
>> +       struct page *page =
>> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
>> +       unsigned int offset = handle & ~PAGE_MASK;
>> +
>> +       if (!iova)
>> +               return;
>> +
>> +       __dma_page_cpu_to_dev(page, offset, size, dir);
>> +}
>> +
>> +struct dma_map_ops iommu_ops = {
>> +       .alloc          = arm_iommu_alloc_attrs,
>> +       .free           = arm_iommu_free_attrs,
>> +       .mmap           = arm_iommu_mmap_attrs,
>> +
>> +       .map_page               = arm_iommu_map_page,
>> +       .unmap_page             = arm_iommu_unmap_page,
>> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
>> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
>> +
>> +       .map_sg                 = arm_iommu_map_sg,
>> +       .unmap_sg               = arm_iommu_unmap_sg,
>> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
>> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
>> +};
>> +
>> +/**
>> + * arm_iommu_create_mapping
>> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
>> + * @base: start address of the valid IO address space
>> + * @size: size of the valid IO address space
>> + * @order: accuracy of the IO addresses allocations
>> + *
>> + * Creates a mapping structure which holds information about used/unused
>> + * IO address ranges, which is required to perform memory allocation and
>> + * mapping with IOMMU aware functions.
>> + *
>> + * The client device need to be attached to the mapping with
>> + * arm_iommu_attach_device function.
>> + */
>> +struct dma_iommu_mapping *
>> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
>> size,
>> +                        int order)
>> +{
>> +       unsigned int count = size >> (PAGE_SHIFT + order);
>> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
>> +       struct dma_iommu_mapping *mapping;
>> +       int err = -ENOMEM;
>> +
>> +       if (!count)
>> +               return ERR_PTR(-EINVAL);
>> +
>> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
>> +       if (!mapping)
>> +               goto err;
>> +
>> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
>> +       if (!mapping->bitmap)
>> +               goto err2;
>> +
>> +       mapping->base = base;
>> +       mapping->bits = BITS_PER_BYTE * bitmap_size;
>> +       mapping->order = order;
>> +       spin_lock_init(&mapping->lock);
>> +
>> +       mapping->domain = iommu_domain_alloc(bus);
>> +       if (!mapping->domain)
>> +               goto err3;
>> +
>> +       kref_init(&mapping->kref);
>> +       return mapping;
>> +err3:
>> +       kfree(mapping->bitmap);
>> +err2:
>> +       kfree(mapping);
>> +err:
>> +       return ERR_PTR(err);
>> +}
>> +
>> +static void release_iommu_mapping(struct kref *kref)
>> +{
>> +       struct dma_iommu_mapping *mapping =
>> +               container_of(kref, struct dma_iommu_mapping, kref);
>> +
>> +       iommu_domain_free(mapping->domain);
>> +       kfree(mapping->bitmap);
>> +       kfree(mapping);
>> +}
>> +
>> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
>> +{
>> +       if (mapping)
>> +               kref_put(&mapping->kref, release_iommu_mapping);
>> +}
>> +
>> +/**
>> + * arm_iommu_attach_device
>> + * @dev: valid struct device pointer
>> + * @mapping: io address space mapping structure (returned from
>> + *     arm_iommu_create_mapping)
>> + *
>> + * Attaches specified io address space mapping to the provided device,
>> + * this replaces the dma operations (dma_map_ops pointer) with the
>> + * IOMMU aware version. More than one client might be attached to
>> + * the same io address space mapping.
>> + */
>> +int arm_iommu_attach_device(struct device *dev,
>> +                           struct dma_iommu_mapping *mapping)
>> +{
>> +       int err;
>> +
>> +       err = iommu_attach_device(mapping->domain, dev);
>> +       if (err)
>> +               return err;
>> +
>> +       kref_get(&mapping->kref);
>> +       dev->archdata.mapping = mapping;
>> +       set_dma_ops(dev, &iommu_ops);
>> +
>> +       pr_info("Attached IOMMU controller to %s device.\n",
>> dev_name(dev));
>> +       return 0;
>> +}
>> +
>> +#endif
>> diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
>> index 162be66..bf312c3 100644
>> --- a/arch/arm/mm/vmregion.h
>> +++ b/arch/arm/mm/vmregion.h
>> @@ -17,7 +17,7 @@ struct arm_vmregion {
>>        struct list_head        vm_list;
>>        unsigned long           vm_start;
>>        unsigned long           vm_end;
>> -       struct page             *vm_pages;
>> +       void                    *priv;
>>        int                     vm_active;
>>        const void              *caller;
>>  };
>> --
>> 1.7.1.569.g6f426
>>
>>
>> _______________________________________________
>> Linaro-mm-sig mailing list
>> Linaro-mm-sig@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/linaro-mm-sig
>>
>
>
Kyungmin Park April 20, 2012, 1:51 a.m. UTC | #3
On 4/20/12, Abhinav Kochhar <kochhar.abhinav@gmail.com> wrote:
> Hi Marek,
>
> dma_addr_t dma_addr is an unused argument passed to the function
> arm_iommu_mmap_attrs

Even though it's not used at here. it's mmap function field at dma_map_ops.
To match the type, it's required.

struct dma_map_ops iommu_ops = {
       .alloc          = arm_iommu_alloc_attrs,
       .free           = arm_iommu_free_attrs,
       .mmap           = arm_iommu_mmap_attrs,

Thank you,
Kyungmin Park
>
> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct
> *vma,
> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> +                   struct dma_attrs *attrs)
> +{
> +       struct arm_vmregion *c;
> +
> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> +       c = arm_vmregion_find(&consistent_
> head, (unsigned long)cpu_addr);
> +
> +       if (c) {
> +               struct page **pages = c->priv;
> +
> +               unsigned long uaddr = vma->vm_start;
> +               unsigned long usize = vma->vm_end - vma->vm_start;
> +               int i = 0;
> +
> +               do {
> +                       int ret;
> +
> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> +                       if (ret) {
> +                               pr_err("Remapping memory, error: %d\n",
> ret);
> +                               return ret;
> +                       }
> +
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +               } while (usize > 0);
> +       }
> +       return 0;
> +}
>
>
> On Wed, Apr 18, 2012 at 10:44 PM, Marek Szyprowski <m.szyprowski@samsung.com
>> wrote:
>
>> This patch add a complete implementation of DMA-mapping API for
>> devices which have IOMMU support.
>>
>> This implementation tries to optimize dma address space usage by remapping
>> all possible physical memory chunks into a single dma address space chunk.
>>
>> DMA address space is managed on top of the bitmap stored in the
>> dma_iommu_mapping structure stored in device->archdata. Platform setup
>> code has to initialize parameters of the dma address space (base address,
>> size, allocation precision order) with arm_iommu_create_mapping()
>> function.
>> To reduce the size of the bitmap, all allocations are aligned to the
>> specified order of base 4 KiB pages.
>>
>> dma_alloc_* functions allocate physical memory in chunks, each with
>> alloc_pages() function to avoid failing if the physical memory gets
>> fragmented. In worst case the allocated buffer is composed of 4 KiB page
>> chunks.
>>
>> dma_map_sg() function minimizes the total number of dma address space
>> chunks by merging of physical memory chunks into one larger dma address
>> space chunk. If requested chunk (scatter list entry) boundaries
>> match physical page boundaries, most calls to dma_map_sg() requests will
>> result in creating only one chunk in dma address space.
>>
>> dma_map_page() simply creates a mapping for the given page(s) in the dma
>> address space.
>>
>> All dma functions also perform required cache operation like their
>> counterparts from the arm linear physical memory mapping version.
>>
>> This patch contains code and fixes kindly provided by:
>> - Krishna Reddy <vdumpa@nvidia.com>,
>> - Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
>> - Hiroshi DOYU <hdoyu@nvidia.com>
>>
>> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
>> Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
>> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>> Tested-By: Subash Patel <subash.ramaswamy@linaro.org>
>> ---
>>  arch/arm/Kconfig                 |    8 +
>>  arch/arm/include/asm/device.h    |    3 +
>>  arch/arm/include/asm/dma-iommu.h |   34 ++
>>  arch/arm/mm/dma-mapping.c        |  727
>> +++++++++++++++++++++++++++++++++++++-
>>  arch/arm/mm/vmregion.h           |    2 +-
>>  5 files changed, 759 insertions(+), 15 deletions(-)
>>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>>
>> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
>> index 0fd27d4..874e519 100644
>> --- a/arch/arm/Kconfig
>> +++ b/arch/arm/Kconfig
>> @@ -46,6 +46,14 @@ config ARM
>>  config ARM_HAS_SG_CHAIN
>>        bool
>>
>> +config NEED_SG_DMA_LENGTH
>> +       bool
>> +
>> +config ARM_DMA_USE_IOMMU
>> +       select NEED_SG_DMA_LENGTH
>> +       select ARM_HAS_SG_CHAIN
>> +       bool
>> +
>>  config HAVE_PWM
>>        bool
>>
>> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
>> index 6e2cb0e..b69c0d3 100644
>> --- a/arch/arm/include/asm/device.h
>> +++ b/arch/arm/include/asm/device.h
>> @@ -14,6 +14,9 @@ struct dev_archdata {
>>  #ifdef CONFIG_IOMMU_API
>>        void *iommu; /* private IOMMU data */
>>  #endif
>> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
>> +       struct dma_iommu_mapping        *mapping;
>> +#endif
>>  };
>>
>>  struct omap_device;
>> diff --git a/arch/arm/include/asm/dma-iommu.h
>> b/arch/arm/include/asm/dma-iommu.h
>> new file mode 100644
>> index 0000000..799b094
>> --- /dev/null
>> +++ b/arch/arm/include/asm/dma-iommu.h
>> @@ -0,0 +1,34 @@
>> +#ifndef ASMARM_DMA_IOMMU_H
>> +#define ASMARM_DMA_IOMMU_H
>> +
>> +#ifdef __KERNEL__
>> +
>> +#include <linux/mm_types.h>
>> +#include <linux/scatterlist.h>
>> +#include <linux/dma-debug.h>
>> +#include <linux/kmemcheck.h>
>> +
>> +struct dma_iommu_mapping {
>> +       /* iommu specific data */
>> +       struct iommu_domain     *domain;
>> +
>> +       void                    *bitmap;
>> +       size_t                  bits;
>> +       unsigned int            order;
>> +       dma_addr_t              base;
>> +
>> +       spinlock_t              lock;
>> +       struct kref             kref;
>> +};
>> +
>> +struct dma_iommu_mapping *
>> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
>> size,
>> +                        int order);
>> +
>> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
>> +
>> +int arm_iommu_attach_device(struct device *dev,
>> +                                       struct dma_iommu_mapping
>> *mapping);
>> +
>> +#endif /* __KERNEL__ */
>> +#endif
>> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
>> index d4aad65..2d11aa0 100644
>> --- a/arch/arm/mm/dma-mapping.c
>> +++ b/arch/arm/mm/dma-mapping.c
>> @@ -19,6 +19,8 @@
>>  #include <linux/dma-mapping.h>
>>  #include <linux/highmem.h>
>>  #include <linux/slab.h>
>> +#include <linux/iommu.h>
>> +#include <linux/vmalloc.h>
>>
>>  #include <asm/memory.h>
>>  #include <asm/highmem.h>
>> @@ -26,6 +28,7 @@
>>  #include <asm/tlbflush.h>
>>  #include <asm/sizes.h>
>>  #include <asm/mach/arch.h>
>> +#include <asm/dma-iommu.h>
>>
>>  #include "mm.h"
>>
>> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev)
>>        return mask;
>>  }
>>
>> +static void __dma_clear_buffer(struct page *page, size_t size)
>> +{
>> +       void *ptr;
>> +       /*
>> +        * Ensure that the allocated pages are zeroed, and that any data
>> +        * lurking in the kernel direct-mapped region is invalidated.
>> +        */
>> +       ptr = page_address(page);
>> +       if (ptr) {
>> +               memset(ptr, 0, size);
>> +               dmac_flush_range(ptr, ptr + size);
>> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
>> +       }
>> +}
>> +
>>  /*
>>  * Allocate a DMA buffer for 'dev' of size 'size' using the
>>  * specified gfp mask.  Note that 'size' must be page aligned.
>> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device
>> *dev, size_t size, gfp_t gf
>>  {
>>        unsigned long order = get_order(size);
>>        struct page *page, *p, *e;
>> -       void *ptr;
>>        u64 mask = get_coherent_dma_mask(dev);
>>
>>  #ifdef CONFIG_DMA_API_DEBUG
>> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device
>> *dev, size_t size, gfp_t gf
>>        for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p <
>> e; p++)
>>                __free_page(p);
>>
>> -       /*
>> -        * Ensure that the allocated pages are zeroed, and that any data
>> -        * lurking in the kernel direct-mapped region is invalidated.
>> -        */
>> -       ptr = page_address(page);
>> -       memset(ptr, 0, size);
>> -       dmac_flush_range(ptr, ptr + size);
>> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
>> +       __dma_clear_buffer(page, size);
>>
>>        return page;
>>  }
>> @@ -348,7 +358,7 @@ __dma_alloc_remap(struct page *page, size_t size,
>> gfp_t gfp, pgprot_t prot,
>>                u32 off = CONSISTENT_OFFSET(c->vm_start) &
>> (PTRS_PER_PTE-1);
>>
>>                pte = consistent_pte[idx] + off;
>> -               c->vm_pages = page;
>> +               c->priv = page;
>>
>>                do {
>>                        BUG_ON(!pte_none(*pte));
>> @@ -461,6 +471,14 @@ __dma_alloc(struct device *dev, size_t size,
>> dma_addr_t *handle, gfp_t gfp,
>>        return addr;
>>  }
>>
>> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t
>> prot)
>> +{
>> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
>> +                           pgprot_writecombine(prot) :
>> +                           pgprot_dmacoherent(prot);
>> +       return prot;
>> +}
>> +
>>  /*
>>  * Allocate DMA-coherent memory space and return both the kernel remapped
>>  * virtual and bus address for that space.
>> @@ -468,9 +486,7 @@ __dma_alloc(struct device *dev, size_t size,
>> dma_addr_t *handle, gfp_t gfp,
>>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>>                    gfp_t gfp, struct dma_attrs *attrs)
>>  {
>> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
>> -                       pgprot_writecombine(pgprot_kernel) :
>> -                       pgprot_dmacoherent(pgprot_kernel);
>> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>>        void *memory;
>>
>>        if (dma_alloc_from_coherent(dev, size, handle, &memory))
>> @@ -497,16 +513,20 @@ int arm_dma_mmap(struct device *dev, struct
>> vm_area_struct *vma,
>>                            pgprot_writecombine(vma->vm_page_prot) :
>>                            pgprot_dmacoherent(vma->vm_page_prot);
>>
>> +       if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
>> +               return ret;
>> +
>>        c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>>        if (c) {
>>                unsigned long off = vma->vm_pgoff;
>> +               struct page *pages = c->priv;
>>
>>                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
>>
>>                if (off < kern_size &&
>>                    user_size <= (kern_size - off)) {
>>                        ret = remap_pfn_range(vma, vma->vm_start,
>> -                                             page_to_pfn(c->vm_pages) +
>> off,
>> +                                             page_to_pfn(pages) + off,
>>                                              user_size << PAGE_SHIFT,
>>                                              vma->vm_page_prot);
>>                }
>> @@ -645,6 +665,9 @@ int arm_dma_map_sg(struct device *dev, struct
>> scatterlist *sg, int nents,
>>        int i, j;
>>
>>        for_each_sg(sg, s, nents, i) {
>> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
>> +               s->dma_length = s->length;
>> +#endif
>>                s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
>>                                                s->length, dir, attrs);
>>                if (dma_mapping_error(dev, s->dma_address))
>> @@ -753,3 +776,679 @@ static int __init dma_debug_do_init(void)
>>        return 0;
>>  }
>>  fs_initcall(dma_debug_do_init);
>> +
>> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
>> +
>> +/* IOMMU */
>> +
>> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
>> +                                     size_t size)
>> +{
>> +       unsigned int order = get_order(size);
>> +       unsigned int align = 0;
>> +       unsigned int count, start;
>> +       unsigned long flags;
>> +
>> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
>> +                (1 << mapping->order) - 1) >> mapping->order;
>> +
>> +       if (order > mapping->order)
>> +               align = (1 << (order - mapping->order)) - 1;
>> +
>> +       spin_lock_irqsave(&mapping->lock, flags);
>> +       start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits,
>> 0,
>> +                                          count, align);
>> +       if (start > mapping->bits) {
>> +               spin_unlock_irqrestore(&mapping->lock, flags);
>> +               return DMA_ERROR_CODE;
>> +       }
>> +
>> +       bitmap_set(mapping->bitmap, start, count);
>> +       spin_unlock_irqrestore(&mapping->lock, flags);
>> +
>> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
>> +}
>> +
>> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
>> +                              dma_addr_t addr, size_t size)
>> +{
>> +       unsigned int start = (addr - mapping->base) >>
>> +                            (mapping->order + PAGE_SHIFT);
>> +       unsigned int count = ((size >> PAGE_SHIFT) +
>> +                             (1 << mapping->order) - 1) >>
>> mapping->order;
>> +       unsigned long flags;
>> +
>> +       spin_lock_irqsave(&mapping->lock, flags);
>> +       bitmap_clear(mapping->bitmap, start, count);
>> +       spin_unlock_irqrestore(&mapping->lock, flags);
>> +}
>> +
>> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t
>> size, gfp_t gfp)
>> +{
>> +       struct page **pages;
>> +       int count = size >> PAGE_SHIFT;
>> +       int array_size = count * sizeof(struct page *);
>> +       int i = 0;
>> +
>> +       if (array_size <= PAGE_SIZE)
>> +               pages = kzalloc(array_size, gfp);
>> +       else
>> +               pages = vzalloc(array_size);
>> +       if (!pages)
>> +               return NULL;
>> +
>> +       while (count) {
>> +               int j, order = __ffs(count);
>> +
>> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
>> +               while (!pages[i] && order)
>> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN,
>> --order);
>> +               if (!pages[i])
>> +                       goto error;
>> +
>> +               if (order)
>> +                       split_page(pages[i], order);
>> +               j = 1 << order;
>> +               while (--j)
>> +                       pages[i + j] = pages[i] + j;
>> +
>> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
>> +               i += 1 << order;
>> +               count -= 1 << order;
>> +       }
>> +
>> +       return pages;
>> +error:
>> +       while (--i)
>> +               if (pages[i])
>> +                       __free_pages(pages[i], 0);
>> +       if (array_size < PAGE_SIZE)
>> +               kfree(pages);
>> +       else
>> +               vfree(pages);
>> +       return NULL;
>> +}
>> +
>> +static int __iommu_free_buffer(struct device *dev, struct page **pages,
>> size_t size)
>> +{
>> +       int count = size >> PAGE_SHIFT;
>> +       int array_size = count * sizeof(struct page *);
>> +       int i;
>> +       for (i = 0; i < count; i++)
>> +               if (pages[i])
>> +                       __free_pages(pages[i], 0);
>> +       if (array_size < PAGE_SIZE)
>> +               kfree(pages);
>> +       else
>> +               vfree(pages);
>> +       return 0;
>> +}
>> +
>> +/*
>> + * Create a CPU mapping for a specified pages
>> + */
>> +static void *
>> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t
>> prot)
>> +{
>> +       struct arm_vmregion *c;
>> +       size_t align;
>> +       size_t count = size >> PAGE_SHIFT;
>> +       int bit;
>> +
>> +       if (!consistent_pte[0]) {
>> +               pr_err("%s: not initialised\n", __func__);
>> +               dump_stack();
>> +               return NULL;
>> +       }
>> +
>> +       /*
>> +        * Align the virtual region allocation - maximum alignment is
>> +        * a section size, minimum is a page size.  This helps reduce
>> +        * fragmentation of the DMA space, and also prevents allocations
>> +        * smaller than a section from crossing a section boundary.
>> +        */
>> +       bit = fls(size - 1);
>> +       if (bit > SECTION_SHIFT)
>> +               bit = SECTION_SHIFT;
>> +       align = 1 << bit;
>> +
>> +       /*
>> +        * Allocate a virtual address in the consistent mapping region.
>> +        */
>> +       c = arm_vmregion_alloc(&consistent_head, align, size,
>> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
>> +       if (c) {
>> +               pte_t *pte;
>> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
>> +               int i = 0;
>> +               u32 off = CONSISTENT_OFFSET(c->vm_start) &
>> (PTRS_PER_PTE-1);
>> +
>> +               pte = consistent_pte[idx] + off;
>> +               c->priv = pages;
>> +
>> +               do {
>> +                       BUG_ON(!pte_none(*pte));
>> +
>> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
>> +                       pte++;
>> +                       off++;
>> +                       i++;
>> +                       if (off >= PTRS_PER_PTE) {
>> +                               off = 0;
>> +                               pte = consistent_pte[++idx];
>> +                       }
>> +               } while (i < count);
>> +
>> +               dsb();
>> +
>> +               return (void *)c->vm_start;
>> +       }
>> +       return NULL;
>> +}
>> +
>> +/*
>> + * Create a mapping in device IO address space for specified pages
>> + */
>> +static dma_addr_t
>> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t
>> size)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>> +       dma_addr_t dma_addr, iova;
>> +       int i, ret = DMA_ERROR_CODE;
>> +
>> +       dma_addr = __alloc_iova(mapping, size);
>> +       if (dma_addr == DMA_ERROR_CODE)
>> +               return dma_addr;
>> +
>> +       iova = dma_addr;
>> +       for (i = 0; i < count; ) {
>> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
>> +               phys_addr_t phys = page_to_phys(pages[i]);
>> +               unsigned int len, j;
>> +
>> +               for (j = i + 1; j < count; j++, next_pfn++)
>> +                       if (page_to_pfn(pages[j]) != next_pfn)
>> +                               break;
>> +
>> +               len = (j - i) << PAGE_SHIFT;
>> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
>> +               if (ret < 0)
>> +                       goto fail;
>> +               iova += len;
>> +               i = j;
>> +       }
>> +       return dma_addr;
>> +fail:
>> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
>> +       __free_iova(mapping, dma_addr, size);
>> +       return DMA_ERROR_CODE;
>> +}
>> +
>> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova,
>> size_t size)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +
>> +       /*
>> +        * add optional in-page offset from iova to size and align
>> +        * result to page size
>> +        */
>> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
>> +       iova &= PAGE_MASK;
>> +
>> +       iommu_unmap(mapping->domain, iova, size);
>> +       __free_iova(mapping, iova, size);
>> +       return 0;
>> +}
>> +
>> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
>> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
>> +{
>> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
>> +       struct page **pages;
>> +       void *addr = NULL;
>> +
>> +       *handle = DMA_ERROR_CODE;
>> +       size = PAGE_ALIGN(size);
>> +
>> +       pages = __iommu_alloc_buffer(dev, size, gfp);
>> +       if (!pages)
>> +               return NULL;
>> +
>> +       *handle = __iommu_create_mapping(dev, pages, size);
>> +       if (*handle == DMA_ERROR_CODE)
>> +               goto err_buffer;
>> +
>> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
>> +       if (!addr)
>> +               goto err_mapping;
>> +
>> +       return addr;
>> +
>> +err_mapping:
>> +       __iommu_remove_mapping(dev, *handle, size);
>> +err_buffer:
>> +       __iommu_free_buffer(dev, pages, size);
>> +       return NULL;
>> +}
>> +
>> +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct
>> *vma,
>> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
>> +                   struct dma_attrs *attrs)
>> +{
>> +       struct arm_vmregion *c;
>> +
>> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
>> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>> +
>> +       if (c) {
>> +               struct page **pages = c->priv;
>> +
>> +               unsigned long uaddr = vma->vm_start;
>> +               unsigned long usize = vma->vm_end - vma->vm_start;
>> +               int i = 0;
>> +
>> +               do {
>> +                       int ret;
>> +
>> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
>> +                       if (ret) {
>> +                               pr_err("Remapping memory, error: %d\n",
>> ret);
>> +                               return ret;
>> +                       }
>> +
>> +                       uaddr += PAGE_SIZE;
>> +                       usize -= PAGE_SIZE;
>> +               } while (usize > 0);
>> +       }
>> +       return 0;
>> +}
>> +
>> +/*
>> + * free a page as defined by the above mapping.
>> + * Must not be called with IRQs disabled.
>> + */
>> +void arm_iommu_free_attrs(struct device *dev, size_t size, void
>> *cpu_addr,
>> +                         dma_addr_t handle, struct dma_attrs *attrs)
>> +{
>> +       struct arm_vmregion *c;
>> +       size = PAGE_ALIGN(size);
>> +
>> +       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
>> +       if (c) {
>> +               struct page **pages = c->priv;
>> +               __dma_free_remap(cpu_addr, size);
>> +               __iommu_remove_mapping(dev, handle, size);
>> +               __iommu_free_buffer(dev, pages, size);
>> +       }
>> +}
>> +
>> +/*
>> + * Map a part of the scatter-gather list into contiguous io address space
>> + */
>> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
>> +                         size_t size, dma_addr_t *handle,
>> +                         enum dma_data_direction dir)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova, iova_base;
>> +       int ret = 0;
>> +       unsigned int count;
>> +       struct scatterlist *s;
>> +
>> +       size = PAGE_ALIGN(size);
>> +       *handle = DMA_ERROR_CODE;
>> +
>> +       iova_base = iova = __alloc_iova(mapping, size);
>> +       if (iova == DMA_ERROR_CODE)
>> +               return -ENOMEM;
>> +
>> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s =
>> sg_next(s)) {
>> +               phys_addr_t phys = page_to_phys(sg_page(s));
>> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
>> +
>> +               if (!arch_is_coherent())
>> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
>> s->length, dir);
>> +
>> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
>> +               if (ret < 0)
>> +                       goto fail;
>> +               count += len >> PAGE_SHIFT;
>> +               iova += len;
>> +       }
>> +       *handle = iova_base;
>> +
>> +       return 0;
>> +fail:
>> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
>> +       __free_iova(mapping, iova_base, size);
>> +       return ret;
>> +}
>> +
>> +/**
>> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to map
>> + * @dir: DMA transfer direction
>> + *
>> + * Map a set of buffers described by scatterlist in streaming mode for
>> DMA.
>> + * The scatter gather list elements are merged together (if possible) and
>> + * tagged with the appropriate dma address and length. They are obtained
>> via
>> + * sg_dma_{address,length}.
>> + */
>> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int
>> nents,
>> +                    enum dma_data_direction dir, struct dma_attrs *attrs)
>> +{
>> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
>> +       int i, count = 0;
>> +       unsigned int offset = s->offset;
>> +       unsigned int size = s->offset + s->length;
>> +       unsigned int max = dma_get_max_seg_size(dev);
>> +
>> +       for (i = 1; i < nents; i++) {
>> +               s = sg_next(s);
>> +
>> +               s->dma_address = DMA_ERROR_CODE;
>> +               s->dma_length = 0;
>> +
>> +               if (s->offset || (size & ~PAGE_MASK) || size + s->length >
>> max) {
>> +                       if (__map_sg_chunk(dev, start, size,
>> &dma->dma_address,
>> +                           dir) < 0)
>> +                               goto bad_mapping;
>> +
>> +                       dma->dma_address += offset;
>> +                       dma->dma_length = size - offset;
>> +
>> +                       size = offset = s->offset;
>> +                       start = s;
>> +                       dma = sg_next(dma);
>> +                       count += 1;
>> +               }
>> +               size += s->length;
>> +       }
>> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
>> +               goto bad_mapping;
>> +
>> +       dma->dma_address += offset;
>> +       dma->dma_length = size - offset;
>> +
>> +       return count+1;
>> +
>> +bad_mapping:
>> +       for_each_sg(sg, s, count, i)
>> +               __iommu_remove_mapping(dev, sg_dma_address(s),
>> sg_dma_len(s));
>> +       return 0;
>> +}
>> +
>> +/**
>> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
>> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
>> + *
>> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
>> + * rules concerning calls here are the same as for dma_unmap_single().
>> + */
>> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int
>> nents,
>> +                       enum dma_data_direction dir, struct dma_attrs
>> *attrs)
>> +{
>> +       struct scatterlist *s;
>> +       int i;
>> +
>> +       for_each_sg(sg, s, nents, i) {
>> +               if (sg_dma_len(s))
>> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
>> +                                              sg_dma_len(s));
>> +               if (!arch_is_coherent())
>> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
>> +                                             s->length, dir);
>> +       }
>> +}
>> +
>> +/**
>> + * arm_iommu_sync_sg_for_cpu
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to map (returned from dma_map_sg)
>> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
>> + */
>> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist
>> *sg,
>> +                       int nents, enum dma_data_direction dir)
>> +{
>> +       struct scatterlist *s;
>> +       int i;
>> +
>> +       for_each_sg(sg, s, nents, i)
>> +               if (!arch_is_coherent())
>> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
>> s->length, dir);
>> +
>> +}
>> +
>> +/**
>> + * arm_iommu_sync_sg_for_device
>> + * @dev: valid struct device pointer
>> + * @sg: list of buffers
>> + * @nents: number of buffers to map (returned from dma_map_sg)
>> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
>> + */
>> +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist
>> *sg,
>> +                       int nents, enum dma_data_direction dir)
>> +{
>> +       struct scatterlist *s;
>> +       int i;
>> +
>> +       for_each_sg(sg, s, nents, i)
>> +               if (!arch_is_coherent())
>> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
>> s->length, dir);
>> +}
>> +
>> +
>> +/**
>> + * arm_iommu_map_page
>> + * @dev: valid struct device pointer
>> + * @page: page that buffer resides in
>> + * @offset: offset into page for start of buffer
>> + * @size: size of buffer to map
>> + * @dir: DMA transfer direction
>> + *
>> + * IOMMU aware version of arm_dma_map_page()
>> + */
>> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page
>> *page,
>> +            unsigned long offset, size_t size, enum dma_data_direction
>> dir,
>> +            struct dma_attrs *attrs)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t dma_addr;
>> +       int ret, len = PAGE_ALIGN(size + offset);
>> +
>> +       if (!arch_is_coherent())
>> +               __dma_page_cpu_to_dev(page, offset, size, dir);
>> +
>> +       dma_addr = __alloc_iova(mapping, len);
>> +       if (dma_addr == DMA_ERROR_CODE)
>> +               return dma_addr;
>> +
>> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page),
>> len, 0);
>> +       if (ret < 0)
>> +               goto fail;
>> +
>> +       return dma_addr + offset;
>> +fail:
>> +       __free_iova(mapping, dma_addr, len);
>> +       return DMA_ERROR_CODE;
>> +}
>> +
>> +/**
>> + * arm_iommu_unmap_page
>> + * @dev: valid struct device pointer
>> + * @handle: DMA address of buffer
>> + * @size: size of buffer (same as passed to dma_map_page)
>> + * @dir: DMA transfer direction (same as passed to dma_map_page)
>> + *
>> + * IOMMU aware version of arm_dma_unmap_page()
>> + */
>> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
>> +               size_t size, enum dma_data_direction dir,
>> +               struct dma_attrs *attrs)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova = handle & PAGE_MASK;
>> +       struct page *page =
>> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
>> +       int offset = handle & ~PAGE_MASK;
>> +       int len = PAGE_ALIGN(size + offset);
>> +
>> +       if (!iova)
>> +               return;
>> +
>> +       if (!arch_is_coherent())
>> +               __dma_page_dev_to_cpu(page, offset, size, dir);
>> +
>> +       iommu_unmap(mapping->domain, iova, len);
>> +       __free_iova(mapping, iova, len);
>> +}
>> +
>> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
>> +               dma_addr_t handle, size_t size, enum dma_data_direction
>> dir)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova = handle & PAGE_MASK;
>> +       struct page *page =
>> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
>> +       unsigned int offset = handle & ~PAGE_MASK;
>> +
>> +       if (!iova)
>> +               return;
>> +
>> +       if (!arch_is_coherent())
>> +               __dma_page_dev_to_cpu(page, offset, size, dir);
>> +}
>> +
>> +static void arm_iommu_sync_single_for_device(struct device *dev,
>> +               dma_addr_t handle, size_t size, enum dma_data_direction
>> dir)
>> +{
>> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
>> +       dma_addr_t iova = handle & PAGE_MASK;
>> +       struct page *page =
>> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
>> +       unsigned int offset = handle & ~PAGE_MASK;
>> +
>> +       if (!iova)
>> +               return;
>> +
>> +       __dma_page_cpu_to_dev(page, offset, size, dir);
>> +}
>> +
>> +struct dma_map_ops iommu_ops = {
>> +       .alloc          = arm_iommu_alloc_attrs,
>> +       .free           = arm_iommu_free_attrs,
>> +       .mmap           = arm_iommu_mmap_attrs,
>> +
>> +       .map_page               = arm_iommu_map_page,
>> +       .unmap_page             = arm_iommu_unmap_page,
>> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
>> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
>> +
>> +       .map_sg                 = arm_iommu_map_sg,
>> +       .unmap_sg               = arm_iommu_unmap_sg,
>> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
>> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
>> +};
>> +
>> +/**
>> + * arm_iommu_create_mapping
>> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
>> + * @base: start address of the valid IO address space
>> + * @size: size of the valid IO address space
>> + * @order: accuracy of the IO addresses allocations
>> + *
>> + * Creates a mapping structure which holds information about used/unused
>> + * IO address ranges, which is required to perform memory allocation and
>> + * mapping with IOMMU aware functions.
>> + *
>> + * The client device need to be attached to the mapping with
>> + * arm_iommu_attach_device function.
>> + */
>> +struct dma_iommu_mapping *
>> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
>> size,
>> +                        int order)
>> +{
>> +       unsigned int count = size >> (PAGE_SHIFT + order);
>> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
>> +       struct dma_iommu_mapping *mapping;
>> +       int err = -ENOMEM;
>> +
>> +       if (!count)
>> +               return ERR_PTR(-EINVAL);
>> +
>> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
>> +       if (!mapping)
>> +               goto err;
>> +
>> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
>> +       if (!mapping->bitmap)
>> +               goto err2;
>> +
>> +       mapping->base = base;
>> +       mapping->bits = BITS_PER_BYTE * bitmap_size;
>> +       mapping->order = order;
>> +       spin_lock_init(&mapping->lock);
>> +
>> +       mapping->domain = iommu_domain_alloc(bus);
>> +       if (!mapping->domain)
>> +               goto err3;
>> +
>> +       kref_init(&mapping->kref);
>> +       return mapping;
>> +err3:
>> +       kfree(mapping->bitmap);
>> +err2:
>> +       kfree(mapping);
>> +err:
>> +       return ERR_PTR(err);
>> +}
>> +
>> +static void release_iommu_mapping(struct kref *kref)
>> +{
>> +       struct dma_iommu_mapping *mapping =
>> +               container_of(kref, struct dma_iommu_mapping, kref);
>> +
>> +       iommu_domain_free(mapping->domain);
>> +       kfree(mapping->bitmap);
>> +       kfree(mapping);
>> +}
>> +
>> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
>> +{
>> +       if (mapping)
>> +               kref_put(&mapping->kref, release_iommu_mapping);
>> +}
>> +
>> +/**
>> + * arm_iommu_attach_device
>> + * @dev: valid struct device pointer
>> + * @mapping: io address space mapping structure (returned from
>> + *     arm_iommu_create_mapping)
>> + *
>> + * Attaches specified io address space mapping to the provided device,
>> + * this replaces the dma operations (dma_map_ops pointer) with the
>> + * IOMMU aware version. More than one client might be attached to
>> + * the same io address space mapping.
>> + */
>> +int arm_iommu_attach_device(struct device *dev,
>> +                           struct dma_iommu_mapping *mapping)
>> +{
>> +       int err;
>> +
>> +       err = iommu_attach_device(mapping->domain, dev);
>> +       if (err)
>> +               return err;
>> +
>> +       kref_get(&mapping->kref);
>> +       dev->archdata.mapping = mapping;
>> +       set_dma_ops(dev, &iommu_ops);
>> +
>> +       pr_info("Attached IOMMU controller to %s device.\n",
>> dev_name(dev));
>> +       return 0;
>> +}
>> +
>> +#endif
>> diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
>> index 162be66..bf312c3 100644
>> --- a/arch/arm/mm/vmregion.h
>> +++ b/arch/arm/mm/vmregion.h
>> @@ -17,7 +17,7 @@ struct arm_vmregion {
>>        struct list_head        vm_list;
>>        unsigned long           vm_start;
>>        unsigned long           vm_end;
>> -       struct page             *vm_pages;
>> +       void                    *priv;
>>        int                     vm_active;
>>        const void              *caller;
>>  };
>> --
>> 1.7.1.569.g6f426
>>
>>
>> _______________________________________________
>> Linaro-mm-sig mailing list
>> Linaro-mm-sig@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/linaro-mm-sig
>>
>
Abhinav Kochhar April 23, 2012, 10:42 a.m. UTC | #4
Hi,

I see a bottle-neck with the current dma-mapping framework.
Issue seems to be with the Virtual memory allocation for access in kernel
address space.

1. In "arch/arm/mm/dma-mapping.c" there is a initialization call to
"consistent_init". It reserves size 32MB of Kernel Address space.
2. "consistent_init" allocates memory for kernel page directory and page
tables.

3. "__iommu_alloc_remap" function allocates virtual memory region in kernel
address space reserved in step 1.

4. "__iommu_alloc_remap" function then maps the allocated pages to the
address space reserved in step 3.

Since the virtual memory area allocated for mapping these pages in kernel
address space is only 32MB,

eventually the calls for allocation and mapping new pages into kernel
address space are going to fail once 32 MB is exhausted.

e.g., For Exynos 5 platform Each framebuffer for 1280x800 resolution
consumes around 4MB.

We have a scenario where X11 DRI driver would allocate Non-contig pages for
all "Pixmaps" through "exynos_drm_gem_create" function which will follow
the path given above in steps 1 - 4.

Now the problem is the size limitation of 32MB. We may want to allocate
more than 8 such buffers when X11 DRI driver is integrated.

Possible solutions:

1. Why do we need to create a kernel virtual address space? Are we going to
access these pages in kernel using this address?

If we are not going to access anything in kernel then why do we need to map
these pages in kernel address space?. If we can avoid this then the problem
can be solved.

OR

2 Is it used for only book-keeping to retrieve "struct pages" later on for
passing/mapping to different devices?

If yes, then we have to find another way.

For "dmabuf" framework one solution could be to add a new member variable
"pages" in the exporting driver's local object and use that for
passing/mapping to different devices.

Moreover, even if we increase to say 64 MB that would not be enough for our
use, we never know how many graphic applications would be spawned by the
user.
Let me know your opinion on this.

Regards,
Abhinav

On Fri, Apr 20, 2012 at 10:51 AM, Kyungmin Park
<kyungmin.park@samsung.com>wrote:

> On 4/20/12, Abhinav Kochhar <kochhar.abhinav@gmail.com> wrote:
> > Hi Marek,
> >
> > dma_addr_t dma_addr is an unused argument passed to the function
> > arm_iommu_mmap_attrs
>
> Even though it's not used at here. it's mmap function field at dma_map_ops.
> To match the type, it's required.
>
> struct dma_map_ops iommu_ops = {
>       .alloc          = arm_iommu_alloc_attrs,
>       .free           = arm_iommu_free_attrs,
>       .mmap           = arm_iommu_mmap_attrs,
>
> Thank you,
> Kyungmin Park
> >
> > +static int arm_iommu_mmap_attrs(struct device *dev, struct
> vm_area_struct
> > *vma,
> > +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> > +                   struct dma_attrs *attrs)
> > +{
> > +       struct arm_vmregion *c;
> > +
> > +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> > +       c = arm_vmregion_find(&consistent_
> > head, (unsigned long)cpu_addr);
> > +
> > +       if (c) {
> > +               struct page **pages = c->priv;
> > +
> > +               unsigned long uaddr = vma->vm_start;
> > +               unsigned long usize = vma->vm_end - vma->vm_start;
> > +               int i = 0;
> > +
> > +               do {
> > +                       int ret;
> > +
> > +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> > +                       if (ret) {
> > +                               pr_err("Remapping memory, error: %d\n",
> > ret);
> > +                               return ret;
> > +                       }
> > +
> > +                       uaddr += PAGE_SIZE;
> > +                       usize -= PAGE_SIZE;
> > +               } while (usize > 0);
> > +       }
> > +       return 0;
> > +}
> >
> >
> > On Wed, Apr 18, 2012 at 10:44 PM, Marek Szyprowski <
> m.szyprowski@samsung.com
> >> wrote:
> >
> >> This patch add a complete implementation of DMA-mapping API for
> >> devices which have IOMMU support.
> >>
> >> This implementation tries to optimize dma address space usage by
> remapping
> >> all possible physical memory chunks into a single dma address space
> chunk.
> >>
> >> DMA address space is managed on top of the bitmap stored in the
> >> dma_iommu_mapping structure stored in device->archdata. Platform setup
> >> code has to initialize parameters of the dma address space (base
> address,
> >> size, allocation precision order) with arm_iommu_create_mapping()
> >> function.
> >> To reduce the size of the bitmap, all allocations are aligned to the
> >> specified order of base 4 KiB pages.
> >>
> >> dma_alloc_* functions allocate physical memory in chunks, each with
> >> alloc_pages() function to avoid failing if the physical memory gets
> >> fragmented. In worst case the allocated buffer is composed of 4 KiB page
> >> chunks.
> >>
> >> dma_map_sg() function minimizes the total number of dma address space
> >> chunks by merging of physical memory chunks into one larger dma address
> >> space chunk. If requested chunk (scatter list entry) boundaries
> >> match physical page boundaries, most calls to dma_map_sg() requests will
> >> result in creating only one chunk in dma address space.
> >>
> >> dma_map_page() simply creates a mapping for the given page(s) in the dma
> >> address space.
> >>
> >> All dma functions also perform required cache operation like their
> >> counterparts from the arm linear physical memory mapping version.
> >>
> >> This patch contains code and fixes kindly provided by:
> >> - Krishna Reddy <vdumpa@nvidia.com>,
> >> - Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
> >> - Hiroshi DOYU <hdoyu@nvidia.com>
> >>
> >> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> >> Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
> >> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> >> Tested-By: Subash Patel <subash.ramaswamy@linaro.org>
> >> ---
> >>  arch/arm/Kconfig                 |    8 +
> >>  arch/arm/include/asm/device.h    |    3 +
> >>  arch/arm/include/asm/dma-iommu.h |   34 ++
> >>  arch/arm/mm/dma-mapping.c        |  727
> >> +++++++++++++++++++++++++++++++++++++-
> >>  arch/arm/mm/vmregion.h           |    2 +-
> >>  5 files changed, 759 insertions(+), 15 deletions(-)
> >>  create mode 100644 arch/arm/include/asm/dma-iommu.h
> >>
> >> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> >> index 0fd27d4..874e519 100644
> >> --- a/arch/arm/Kconfig
> >> +++ b/arch/arm/Kconfig
> >> @@ -46,6 +46,14 @@ config ARM
> >>  config ARM_HAS_SG_CHAIN
> >>        bool
> >>
> >> +config NEED_SG_DMA_LENGTH
> >> +       bool
> >> +
> >> +config ARM_DMA_USE_IOMMU
> >> +       select NEED_SG_DMA_LENGTH
> >> +       select ARM_HAS_SG_CHAIN
> >> +       bool
> >> +
> >>  config HAVE_PWM
> >>        bool
> >>
> >> diff --git a/arch/arm/include/asm/device.h
> b/arch/arm/include/asm/device.h
> >> index 6e2cb0e..b69c0d3 100644
> >> --- a/arch/arm/include/asm/device.h
> >> +++ b/arch/arm/include/asm/device.h
> >> @@ -14,6 +14,9 @@ struct dev_archdata {
> >>  #ifdef CONFIG_IOMMU_API
> >>        void *iommu; /* private IOMMU data */
> >>  #endif
> >> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> >> +       struct dma_iommu_mapping        *mapping;
> >> +#endif
> >>  };
> >>
> >>  struct omap_device;
> >> diff --git a/arch/arm/include/asm/dma-iommu.h
> >> b/arch/arm/include/asm/dma-iommu.h
> >> new file mode 100644
> >> index 0000000..799b094
> >> --- /dev/null
> >> +++ b/arch/arm/include/asm/dma-iommu.h
> >> @@ -0,0 +1,34 @@
> >> +#ifndef ASMARM_DMA_IOMMU_H
> >> +#define ASMARM_DMA_IOMMU_H
> >> +
> >> +#ifdef __KERNEL__
> >> +
> >> +#include <linux/mm_types.h>
> >> +#include <linux/scatterlist.h>
> >> +#include <linux/dma-debug.h>
> >> +#include <linux/kmemcheck.h>
> >> +
> >> +struct dma_iommu_mapping {
> >> +       /* iommu specific data */
> >> +       struct iommu_domain     *domain;
> >> +
> >> +       void                    *bitmap;
> >> +       size_t                  bits;
> >> +       unsigned int            order;
> >> +       dma_addr_t              base;
> >> +
> >> +       spinlock_t              lock;
> >> +       struct kref             kref;
> >> +};
> >> +
> >> +struct dma_iommu_mapping *
> >> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
> >> size,
> >> +                        int order);
> >> +
> >> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
> >> +
> >> +int arm_iommu_attach_device(struct device *dev,
> >> +                                       struct dma_iommu_mapping
> >> *mapping);
> >> +
> >> +#endif /* __KERNEL__ */
> >> +#endif
> >> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> >> index d4aad65..2d11aa0 100644
> >> --- a/arch/arm/mm/dma-mapping.c
> >> +++ b/arch/arm/mm/dma-mapping.c
> >> @@ -19,6 +19,8 @@
> >>  #include <linux/dma-mapping.h>
> >>  #include <linux/highmem.h>
> >>  #include <linux/slab.h>
> >> +#include <linux/iommu.h>
> >> +#include <linux/vmalloc.h>
> >>
> >>  #include <asm/memory.h>
> >>  #include <asm/highmem.h>
> >> @@ -26,6 +28,7 @@
> >>  #include <asm/tlbflush.h>
> >>  #include <asm/sizes.h>
> >>  #include <asm/mach/arch.h>
> >> +#include <asm/dma-iommu.h>
> >>
> >>  #include "mm.h"
> >>
> >> @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device
> *dev)
> >>        return mask;
> >>  }
> >>
> >> +static void __dma_clear_buffer(struct page *page, size_t size)
> >> +{
> >> +       void *ptr;
> >> +       /*
> >> +        * Ensure that the allocated pages are zeroed, and that any data
> >> +        * lurking in the kernel direct-mapped region is invalidated.
> >> +        */
> >> +       ptr = page_address(page);
> >> +       if (ptr) {
> >> +               memset(ptr, 0, size);
> >> +               dmac_flush_range(ptr, ptr + size);
> >> +               outer_flush_range(__pa(ptr), __pa(ptr) + size);
> >> +       }
> >> +}
> >> +
> >>  /*
> >>  * Allocate a DMA buffer for 'dev' of size 'size' using the
> >>  * specified gfp mask.  Note that 'size' must be page aligned.
> >> @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device
> >> *dev, size_t size, gfp_t gf
> >>  {
> >>        unsigned long order = get_order(size);
> >>        struct page *page, *p, *e;
> >> -       void *ptr;
> >>        u64 mask = get_coherent_dma_mask(dev);
> >>
> >>  #ifdef CONFIG_DMA_API_DEBUG
> >> @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct
> device
> >> *dev, size_t size, gfp_t gf
> >>        for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p
> <
> >> e; p++)
> >>                __free_page(p);
> >>
> >> -       /*
> >> -        * Ensure that the allocated pages are zeroed, and that any data
> >> -        * lurking in the kernel direct-mapped region is invalidated.
> >> -        */
> >> -       ptr = page_address(page);
> >> -       memset(ptr, 0, size);
> >> -       dmac_flush_range(ptr, ptr + size);
> >> -       outer_flush_range(__pa(ptr), __pa(ptr) + size);
> >> +       __dma_clear_buffer(page, size);
> >>
> >>        return page;
> >>  }
> >> @@ -348,7 +358,7 @@ __dma_alloc_remap(struct page *page, size_t size,
> >> gfp_t gfp, pgprot_t prot,
> >>                u32 off = CONSISTENT_OFFSET(c->vm_start) &
> >> (PTRS_PER_PTE-1);
> >>
> >>                pte = consistent_pte[idx] + off;
> >> -               c->vm_pages = page;
> >> +               c->priv = page;
> >>
> >>                do {
> >>                        BUG_ON(!pte_none(*pte));
> >> @@ -461,6 +471,14 @@ __dma_alloc(struct device *dev, size_t size,
> >> dma_addr_t *handle, gfp_t gfp,
> >>        return addr;
> >>  }
> >>
> >> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs,
> pgprot_t
> >> prot)
> >> +{
> >> +       prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> >> +                           pgprot_writecombine(prot) :
> >> +                           pgprot_dmacoherent(prot);
> >> +       return prot;
> >> +}
> >> +
> >>  /*
> >>  * Allocate DMA-coherent memory space and return both the kernel
> remapped
> >>  * virtual and bus address for that space.
> >> @@ -468,9 +486,7 @@ __dma_alloc(struct device *dev, size_t size,
> >> dma_addr_t *handle, gfp_t gfp,
> >>  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t
> *handle,
> >>                    gfp_t gfp, struct dma_attrs *attrs)
> >>  {
> >> -       pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> >> -                       pgprot_writecombine(pgprot_kernel) :
> >> -                       pgprot_dmacoherent(pgprot_kernel);
> >> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> >>        void *memory;
> >>
> >>        if (dma_alloc_from_coherent(dev, size, handle, &memory))
> >> @@ -497,16 +513,20 @@ int arm_dma_mmap(struct device *dev, struct
> >> vm_area_struct *vma,
> >>                            pgprot_writecombine(vma->vm_page_prot) :
> >>                            pgprot_dmacoherent(vma->vm_page_prot);
> >>
> >> +       if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
> >> +               return ret;
> >> +
> >>        c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
> >>        if (c) {
> >>                unsigned long off = vma->vm_pgoff;
> >> +               struct page *pages = c->priv;
> >>
> >>                kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
> >>
> >>                if (off < kern_size &&
> >>                    user_size <= (kern_size - off)) {
> >>                        ret = remap_pfn_range(vma, vma->vm_start,
> >> -                                             page_to_pfn(c->vm_pages) +
> >> off,
> >> +                                             page_to_pfn(pages) + off,
> >>                                              user_size << PAGE_SHIFT,
> >>                                              vma->vm_page_prot);
> >>                }
> >> @@ -645,6 +665,9 @@ int arm_dma_map_sg(struct device *dev, struct
> >> scatterlist *sg, int nents,
> >>        int i, j;
> >>
> >>        for_each_sg(sg, s, nents, i) {
> >> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> >> +               s->dma_length = s->length;
> >> +#endif
> >>                s->dma_address = ops->map_page(dev, sg_page(s),
> s->offset,
> >>                                                s->length, dir, attrs);
> >>                if (dma_mapping_error(dev, s->dma_address))
> >> @@ -753,3 +776,679 @@ static int __init dma_debug_do_init(void)
> >>        return 0;
> >>  }
> >>  fs_initcall(dma_debug_do_init);
> >> +
> >> +#ifdef CONFIG_ARM_DMA_USE_IOMMU
> >> +
> >> +/* IOMMU */
> >> +
> >> +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping
> *mapping,
> >> +                                     size_t size)
> >> +{
> >> +       unsigned int order = get_order(size);
> >> +       unsigned int align = 0;
> >> +       unsigned int count, start;
> >> +       unsigned long flags;
> >> +
> >> +       count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
> >> +                (1 << mapping->order) - 1) >> mapping->order;
> >> +
> >> +       if (order > mapping->order)
> >> +               align = (1 << (order - mapping->order)) - 1;
> >> +
> >> +       spin_lock_irqsave(&mapping->lock, flags);
> >> +       start = bitmap_find_next_zero_area(mapping->bitmap,
> mapping->bits,
> >> 0,
> >> +                                          count, align);
> >> +       if (start > mapping->bits) {
> >> +               spin_unlock_irqrestore(&mapping->lock, flags);
> >> +               return DMA_ERROR_CODE;
> >> +       }
> >> +
> >> +       bitmap_set(mapping->bitmap, start, count);
> >> +       spin_unlock_irqrestore(&mapping->lock, flags);
> >> +
> >> +       return mapping->base + (start << (mapping->order + PAGE_SHIFT));
> >> +}
> >> +
> >> +static inline void __free_iova(struct dma_iommu_mapping *mapping,
> >> +                              dma_addr_t addr, size_t size)
> >> +{
> >> +       unsigned int start = (addr - mapping->base) >>
> >> +                            (mapping->order + PAGE_SHIFT);
> >> +       unsigned int count = ((size >> PAGE_SHIFT) +
> >> +                             (1 << mapping->order) - 1) >>
> >> mapping->order;
> >> +       unsigned long flags;
> >> +
> >> +       spin_lock_irqsave(&mapping->lock, flags);
> >> +       bitmap_clear(mapping->bitmap, start, count);
> >> +       spin_unlock_irqrestore(&mapping->lock, flags);
> >> +}
> >> +
> >> +static struct page **__iommu_alloc_buffer(struct device *dev, size_t
> >> size, gfp_t gfp)
> >> +{
> >> +       struct page **pages;
> >> +       int count = size >> PAGE_SHIFT;
> >> +       int array_size = count * sizeof(struct page *);
> >> +       int i = 0;
> >> +
> >> +       if (array_size <= PAGE_SIZE)
> >> +               pages = kzalloc(array_size, gfp);
> >> +       else
> >> +               pages = vzalloc(array_size);
> >> +       if (!pages)
> >> +               return NULL;
> >> +
> >> +       while (count) {
> >> +               int j, order = __ffs(count);
> >> +
> >> +               pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
> >> +               while (!pages[i] && order)
> >> +                       pages[i] = alloc_pages(gfp | __GFP_NOWARN,
> >> --order);
> >> +               if (!pages[i])
> >> +                       goto error;
> >> +
> >> +               if (order)
> >> +                       split_page(pages[i], order);
> >> +               j = 1 << order;
> >> +               while (--j)
> >> +                       pages[i + j] = pages[i] + j;
> >> +
> >> +               __dma_clear_buffer(pages[i], PAGE_SIZE << order);
> >> +               i += 1 << order;
> >> +               count -= 1 << order;
> >> +       }
> >> +
> >> +       return pages;
> >> +error:
> >> +       while (--i)
> >> +               if (pages[i])
> >> +                       __free_pages(pages[i], 0);
> >> +       if (array_size < PAGE_SIZE)
> >> +               kfree(pages);
> >> +       else
> >> +               vfree(pages);
> >> +       return NULL;
> >> +}
> >> +
> >> +static int __iommu_free_buffer(struct device *dev, struct page **pages,
> >> size_t size)
> >> +{
> >> +       int count = size >> PAGE_SHIFT;
> >> +       int array_size = count * sizeof(struct page *);
> >> +       int i;
> >> +       for (i = 0; i < count; i++)
> >> +               if (pages[i])
> >> +                       __free_pages(pages[i], 0);
> >> +       if (array_size < PAGE_SIZE)
> >> +               kfree(pages);
> >> +       else
> >> +               vfree(pages);
> >> +       return 0;
> >> +}
> >> +
> >> +/*
> >> + * Create a CPU mapping for a specified pages
> >> + */
> >> +static void *
> >> +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp,
> pgprot_t
> >> prot)
> >> +{
> >> +       struct arm_vmregion *c;
> >> +       size_t align;
> >> +       size_t count = size >> PAGE_SHIFT;
> >> +       int bit;
> >> +
> >> +       if (!consistent_pte[0]) {
> >> +               pr_err("%s: not initialised\n", __func__);
> >> +               dump_stack();
> >> +               return NULL;
> >> +       }
> >> +
> >> +       /*
> >> +        * Align the virtual region allocation - maximum alignment is
> >> +        * a section size, minimum is a page size.  This helps reduce
> >> +        * fragmentation of the DMA space, and also prevents allocations
> >> +        * smaller than a section from crossing a section boundary.
> >> +        */
> >> +       bit = fls(size - 1);
> >> +       if (bit > SECTION_SHIFT)
> >> +               bit = SECTION_SHIFT;
> >> +       align = 1 << bit;
> >> +
> >> +       /*
> >> +        * Allocate a virtual address in the consistent mapping region.
> >> +        */
> >> +       c = arm_vmregion_alloc(&consistent_head, align, size,
> >> +                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
> >> +       if (c) {
> >> +               pte_t *pte;
> >> +               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
> >> +               int i = 0;
> >> +               u32 off = CONSISTENT_OFFSET(c->vm_start) &
> >> (PTRS_PER_PTE-1);
> >> +
> >> +               pte = consistent_pte[idx] + off;
> >> +               c->priv = pages;
> >> +
> >> +               do {
> >> +                       BUG_ON(!pte_none(*pte));
> >> +
> >> +                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
> >> +                       pte++;
> >> +                       off++;
> >> +                       i++;
> >> +                       if (off >= PTRS_PER_PTE) {
> >> +                               off = 0;
> >> +                               pte = consistent_pte[++idx];
> >> +                       }
> >> +               } while (i < count);
> >> +
> >> +               dsb();
> >> +
> >> +               return (void *)c->vm_start;
> >> +       }
> >> +       return NULL;
> >> +}
> >> +
> >> +/*
> >> + * Create a mapping in device IO address space for specified pages
> >> + */
> >> +static dma_addr_t
> >> +__iommu_create_mapping(struct device *dev, struct page **pages, size_t
> >> size)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> >> +       dma_addr_t dma_addr, iova;
> >> +       int i, ret = DMA_ERROR_CODE;
> >> +
> >> +       dma_addr = __alloc_iova(mapping, size);
> >> +       if (dma_addr == DMA_ERROR_CODE)
> >> +               return dma_addr;
> >> +
> >> +       iova = dma_addr;
> >> +       for (i = 0; i < count; ) {
> >> +               unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
> >> +               phys_addr_t phys = page_to_phys(pages[i]);
> >> +               unsigned int len, j;
> >> +
> >> +               for (j = i + 1; j < count; j++, next_pfn++)
> >> +                       if (page_to_pfn(pages[j]) != next_pfn)
> >> +                               break;
> >> +
> >> +               len = (j - i) << PAGE_SHIFT;
> >> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> >> +               if (ret < 0)
> >> +                       goto fail;
> >> +               iova += len;
> >> +               i = j;
> >> +       }
> >> +       return dma_addr;
> >> +fail:
> >> +       iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
> >> +       __free_iova(mapping, dma_addr, size);
> >> +       return DMA_ERROR_CODE;
> >> +}
> >> +
> >> +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova,
> >> size_t size)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +
> >> +       /*
> >> +        * add optional in-page offset from iova to size and align
> >> +        * result to page size
> >> +        */
> >> +       size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
> >> +       iova &= PAGE_MASK;
> >> +
> >> +       iommu_unmap(mapping->domain, iova, size);
> >> +       __free_iova(mapping, iova, size);
> >> +       return 0;
> >> +}
> >> +
> >> +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> >> +           dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> >> +{
> >> +       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> >> +       struct page **pages;
> >> +       void *addr = NULL;
> >> +
> >> +       *handle = DMA_ERROR_CODE;
> >> +       size = PAGE_ALIGN(size);
> >> +
> >> +       pages = __iommu_alloc_buffer(dev, size, gfp);
> >> +       if (!pages)
> >> +               return NULL;
> >> +
> >> +       *handle = __iommu_create_mapping(dev, pages, size);
> >> +       if (*handle == DMA_ERROR_CODE)
> >> +               goto err_buffer;
> >> +
> >> +       addr = __iommu_alloc_remap(pages, size, gfp, prot);
> >> +       if (!addr)
> >> +               goto err_mapping;
> >> +
> >> +       return addr;
> >> +
> >> +err_mapping:
> >> +       __iommu_remove_mapping(dev, *handle, size);
> >> +err_buffer:
> >> +       __iommu_free_buffer(dev, pages, size);
> >> +       return NULL;
> >> +}
> >> +
> >> +static int arm_iommu_mmap_attrs(struct device *dev, struct
> vm_area_struct
> >> *vma,
> >> +                   void *cpu_addr, dma_addr_t dma_addr, size_t size,
> >> +                   struct dma_attrs *attrs)
> >> +{
> >> +       struct arm_vmregion *c;
> >> +
> >> +       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> >> +       c = arm_vmregion_find(&consistent_head, (unsigned
> long)cpu_addr);
> >> +
> >> +       if (c) {
> >> +               struct page **pages = c->priv;
> >> +
> >> +               unsigned long uaddr = vma->vm_start;
> >> +               unsigned long usize = vma->vm_end - vma->vm_start;
> >> +               int i = 0;
> >> +
> >> +               do {
> >> +                       int ret;
> >> +
> >> +                       ret = vm_insert_page(vma, uaddr, pages[i++]);
> >> +                       if (ret) {
> >> +                               pr_err("Remapping memory, error: %d\n",
> >> ret);
> >> +                               return ret;
> >> +                       }
> >> +
> >> +                       uaddr += PAGE_SIZE;
> >> +                       usize -= PAGE_SIZE;
> >> +               } while (usize > 0);
> >> +       }
> >> +       return 0;
> >> +}
> >> +
> >> +/*
> >> + * free a page as defined by the above mapping.
> >> + * Must not be called with IRQs disabled.
> >> + */
> >> +void arm_iommu_free_attrs(struct device *dev, size_t size, void
> >> *cpu_addr,
> >> +                         dma_addr_t handle, struct dma_attrs *attrs)
> >> +{
> >> +       struct arm_vmregion *c;
> >> +       size = PAGE_ALIGN(size);
> >> +
> >> +       c = arm_vmregion_find(&consistent_head, (unsigned
> long)cpu_addr);
> >> +       if (c) {
> >> +               struct page **pages = c->priv;
> >> +               __dma_free_remap(cpu_addr, size);
> >> +               __iommu_remove_mapping(dev, handle, size);
> >> +               __iommu_free_buffer(dev, pages, size);
> >> +       }
> >> +}
> >> +
> >> +/*
> >> + * Map a part of the scatter-gather list into contiguous io address
> space
> >> + */
> >> +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
> >> +                         size_t size, dma_addr_t *handle,
> >> +                         enum dma_data_direction dir)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +       dma_addr_t iova, iova_base;
> >> +       int ret = 0;
> >> +       unsigned int count;
> >> +       struct scatterlist *s;
> >> +
> >> +       size = PAGE_ALIGN(size);
> >> +       *handle = DMA_ERROR_CODE;
> >> +
> >> +       iova_base = iova = __alloc_iova(mapping, size);
> >> +       if (iova == DMA_ERROR_CODE)
> >> +               return -ENOMEM;
> >> +
> >> +       for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s =
> >> sg_next(s)) {
> >> +               phys_addr_t phys = page_to_phys(sg_page(s));
> >> +               unsigned int len = PAGE_ALIGN(s->offset + s->length);
> >> +
> >> +               if (!arch_is_coherent())
> >> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
> >> s->length, dir);
> >> +
> >> +               ret = iommu_map(mapping->domain, iova, phys, len, 0);
> >> +               if (ret < 0)
> >> +                       goto fail;
> >> +               count += len >> PAGE_SHIFT;
> >> +               iova += len;
> >> +       }
> >> +       *handle = iova_base;
> >> +
> >> +       return 0;
> >> +fail:
> >> +       iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
> >> +       __free_iova(mapping, iova_base, size);
> >> +       return ret;
> >> +}
> >> +
> >> +/**
> >> + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
> >> + * @dev: valid struct device pointer
> >> + * @sg: list of buffers
> >> + * @nents: number of buffers to map
> >> + * @dir: DMA transfer direction
> >> + *
> >> + * Map a set of buffers described by scatterlist in streaming mode for
> >> DMA.
> >> + * The scatter gather list elements are merged together (if possible)
> and
> >> + * tagged with the appropriate dma address and length. They are
> obtained
> >> via
> >> + * sg_dma_{address,length}.
> >> + */
> >> +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int
> >> nents,
> >> +                    enum dma_data_direction dir, struct dma_attrs
> *attrs)
> >> +{
> >> +       struct scatterlist *s = sg, *dma = sg, *start = sg;
> >> +       int i, count = 0;
> >> +       unsigned int offset = s->offset;
> >> +       unsigned int size = s->offset + s->length;
> >> +       unsigned int max = dma_get_max_seg_size(dev);
> >> +
> >> +       for (i = 1; i < nents; i++) {
> >> +               s = sg_next(s);
> >> +
> >> +               s->dma_address = DMA_ERROR_CODE;
> >> +               s->dma_length = 0;
> >> +
> >> +               if (s->offset || (size & ~PAGE_MASK) || size +
> s->length >
> >> max) {
> >> +                       if (__map_sg_chunk(dev, start, size,
> >> &dma->dma_address,
> >> +                           dir) < 0)
> >> +                               goto bad_mapping;
> >> +
> >> +                       dma->dma_address += offset;
> >> +                       dma->dma_length = size - offset;
> >> +
> >> +                       size = offset = s->offset;
> >> +                       start = s;
> >> +                       dma = sg_next(dma);
> >> +                       count += 1;
> >> +               }
> >> +               size += s->length;
> >> +       }
> >> +       if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) <
> 0)
> >> +               goto bad_mapping;
> >> +
> >> +       dma->dma_address += offset;
> >> +       dma->dma_length = size - offset;
> >> +
> >> +       return count+1;
> >> +
> >> +bad_mapping:
> >> +       for_each_sg(sg, s, count, i)
> >> +               __iommu_remove_mapping(dev, sg_dma_address(s),
> >> sg_dma_len(s));
> >> +       return 0;
> >> +}
> >> +
> >> +/**
> >> + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
> >> + * @dev: valid struct device pointer
> >> + * @sg: list of buffers
> >> + * @nents: number of buffers to unmap (same as was passed to
> dma_map_sg)
> >> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> >> + *
> >> + * Unmap a set of streaming mode DMA translations.  Again, CPU access
> >> + * rules concerning calls here are the same as for dma_unmap_single().
> >> + */
> >> +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int
> >> nents,
> >> +                       enum dma_data_direction dir, struct dma_attrs
> >> *attrs)
> >> +{
> >> +       struct scatterlist *s;
> >> +       int i;
> >> +
> >> +       for_each_sg(sg, s, nents, i) {
> >> +               if (sg_dma_len(s))
> >> +                       __iommu_remove_mapping(dev, sg_dma_address(s),
> >> +                                              sg_dma_len(s));
> >> +               if (!arch_is_coherent())
> >> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> >> +                                             s->length, dir);
> >> +       }
> >> +}
> >> +
> >> +/**
> >> + * arm_iommu_sync_sg_for_cpu
> >> + * @dev: valid struct device pointer
> >> + * @sg: list of buffers
> >> + * @nents: number of buffers to map (returned from dma_map_sg)
> >> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> >> + */
> >> +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist
> >> *sg,
> >> +                       int nents, enum dma_data_direction dir)
> >> +{
> >> +       struct scatterlist *s;
> >> +       int i;
> >> +
> >> +       for_each_sg(sg, s, nents, i)
> >> +               if (!arch_is_coherent())
> >> +                       __dma_page_dev_to_cpu(sg_page(s), s->offset,
> >> s->length, dir);
> >> +
> >> +}
> >> +
> >> +/**
> >> + * arm_iommu_sync_sg_for_device
> >> + * @dev: valid struct device pointer
> >> + * @sg: list of buffers
> >> + * @nents: number of buffers to map (returned from dma_map_sg)
> >> + * @dir: DMA transfer direction (same as was passed to dma_map_sg)
> >> + */
> >> +void arm_iommu_sync_sg_for_device(struct device *dev, struct
> scatterlist
> >> *sg,
> >> +                       int nents, enum dma_data_direction dir)
> >> +{
> >> +       struct scatterlist *s;
> >> +       int i;
> >> +
> >> +       for_each_sg(sg, s, nents, i)
> >> +               if (!arch_is_coherent())
> >> +                       __dma_page_cpu_to_dev(sg_page(s), s->offset,
> >> s->length, dir);
> >> +}
> >> +
> >> +
> >> +/**
> >> + * arm_iommu_map_page
> >> + * @dev: valid struct device pointer
> >> + * @page: page that buffer resides in
> >> + * @offset: offset into page for start of buffer
> >> + * @size: size of buffer to map
> >> + * @dir: DMA transfer direction
> >> + *
> >> + * IOMMU aware version of arm_dma_map_page()
> >> + */
> >> +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page
> >> *page,
> >> +            unsigned long offset, size_t size, enum dma_data_direction
> >> dir,
> >> +            struct dma_attrs *attrs)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +       dma_addr_t dma_addr;
> >> +       int ret, len = PAGE_ALIGN(size + offset);
> >> +
> >> +       if (!arch_is_coherent())
> >> +               __dma_page_cpu_to_dev(page, offset, size, dir);
> >> +
> >> +       dma_addr = __alloc_iova(mapping, len);
> >> +       if (dma_addr == DMA_ERROR_CODE)
> >> +               return dma_addr;
> >> +
> >> +       ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page),
> >> len, 0);
> >> +       if (ret < 0)
> >> +               goto fail;
> >> +
> >> +       return dma_addr + offset;
> >> +fail:
> >> +       __free_iova(mapping, dma_addr, len);
> >> +       return DMA_ERROR_CODE;
> >> +}
> >> +
> >> +/**
> >> + * arm_iommu_unmap_page
> >> + * @dev: valid struct device pointer
> >> + * @handle: DMA address of buffer
> >> + * @size: size of buffer (same as passed to dma_map_page)
> >> + * @dir: DMA transfer direction (same as passed to dma_map_page)
> >> + *
> >> + * IOMMU aware version of arm_dma_unmap_page()
> >> + */
> >> +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
> >> +               size_t size, enum dma_data_direction dir,
> >> +               struct dma_attrs *attrs)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +       dma_addr_t iova = handle & PAGE_MASK;
> >> +       struct page *page =
> >> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> >> +       int offset = handle & ~PAGE_MASK;
> >> +       int len = PAGE_ALIGN(size + offset);
> >> +
> >> +       if (!iova)
> >> +               return;
> >> +
> >> +       if (!arch_is_coherent())
> >> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> >> +
> >> +       iommu_unmap(mapping->domain, iova, len);
> >> +       __free_iova(mapping, iova, len);
> >> +}
> >> +
> >> +static void arm_iommu_sync_single_for_cpu(struct device *dev,
> >> +               dma_addr_t handle, size_t size, enum dma_data_direction
> >> dir)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +       dma_addr_t iova = handle & PAGE_MASK;
> >> +       struct page *page =
> >> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> >> +       unsigned int offset = handle & ~PAGE_MASK;
> >> +
> >> +       if (!iova)
> >> +               return;
> >> +
> >> +       if (!arch_is_coherent())
> >> +               __dma_page_dev_to_cpu(page, offset, size, dir);
> >> +}
> >> +
> >> +static void arm_iommu_sync_single_for_device(struct device *dev,
> >> +               dma_addr_t handle, size_t size, enum dma_data_direction
> >> dir)
> >> +{
> >> +       struct dma_iommu_mapping *mapping = dev->archdata.mapping;
> >> +       dma_addr_t iova = handle & PAGE_MASK;
> >> +       struct page *page =
> >> phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
> >> +       unsigned int offset = handle & ~PAGE_MASK;
> >> +
> >> +       if (!iova)
> >> +               return;
> >> +
> >> +       __dma_page_cpu_to_dev(page, offset, size, dir);
> >> +}
> >> +
> >> +struct dma_map_ops iommu_ops = {
> >> +       .alloc          = arm_iommu_alloc_attrs,
> >> +       .free           = arm_iommu_free_attrs,
> >> +       .mmap           = arm_iommu_mmap_attrs,
> >> +
> >> +       .map_page               = arm_iommu_map_page,
> >> +       .unmap_page             = arm_iommu_unmap_page,
> >> +       .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
> >> +       .sync_single_for_device = arm_iommu_sync_single_for_device,
> >> +
> >> +       .map_sg                 = arm_iommu_map_sg,
> >> +       .unmap_sg               = arm_iommu_unmap_sg,
> >> +       .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
> >> +       .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
> >> +};
> >> +
> >> +/**
> >> + * arm_iommu_create_mapping
> >> + * @bus: pointer to the bus holding the client device (for IOMMU calls)
> >> + * @base: start address of the valid IO address space
> >> + * @size: size of the valid IO address space
> >> + * @order: accuracy of the IO addresses allocations
> >> + *
> >> + * Creates a mapping structure which holds information about
> used/unused
> >> + * IO address ranges, which is required to perform memory allocation
> and
> >> + * mapping with IOMMU aware functions.
> >> + *
> >> + * The client device need to be attached to the mapping with
> >> + * arm_iommu_attach_device function.
> >> + */
> >> +struct dma_iommu_mapping *
> >> +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t
> >> size,
> >> +                        int order)
> >> +{
> >> +       unsigned int count = size >> (PAGE_SHIFT + order);
> >> +       unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
> >> +       struct dma_iommu_mapping *mapping;
> >> +       int err = -ENOMEM;
> >> +
> >> +       if (!count)
> >> +               return ERR_PTR(-EINVAL);
> >> +
> >> +       mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
> >> +       if (!mapping)
> >> +               goto err;
> >> +
> >> +       mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
> >> +       if (!mapping->bitmap)
> >> +               goto err2;
> >> +
> >> +       mapping->base = base;
> >> +       mapping->bits = BITS_PER_BYTE * bitmap_size;
> >> +       mapping->order = order;
> >> +       spin_lock_init(&mapping->lock);
> >> +
> >> +       mapping->domain = iommu_domain_alloc(bus);
> >> +       if (!mapping->domain)
> >> +               goto err3;
> >> +
> >> +       kref_init(&mapping->kref);
> >> +       return mapping;
> >> +err3:
> >> +       kfree(mapping->bitmap);
> >> +err2:
> >> +       kfree(mapping);
> >> +err:
> >> +       return ERR_PTR(err);
> >> +}
> >> +
> >> +static void release_iommu_mapping(struct kref *kref)
> >> +{
> >> +       struct dma_iommu_mapping *mapping =
> >> +               container_of(kref, struct dma_iommu_mapping, kref);
> >> +
> >> +       iommu_domain_free(mapping->domain);
> >> +       kfree(mapping->bitmap);
> >> +       kfree(mapping);
> >> +}
> >> +
> >> +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
> >> +{
> >> +       if (mapping)
> >> +               kref_put(&mapping->kref, release_iommu_mapping);
> >> +}
> >> +
> >> +/**
> >> + * arm_iommu_attach_device
> >> + * @dev: valid struct device pointer
> >> + * @mapping: io address space mapping structure (returned from
> >> + *     arm_iommu_create_mapping)
> >> + *
> >> + * Attaches specified io address space mapping to the provided device,
> >> + * this replaces the dma operations (dma_map_ops pointer) with the
> >> + * IOMMU aware version. More than one client might be attached to
> >> + * the same io address space mapping.
> >> + */
> >> +int arm_iommu_attach_device(struct device *dev,
> >> +                           struct dma_iommu_mapping *mapping)
> >> +{
> >> +       int err;
> >> +
> >> +       err = iommu_attach_device(mapping->domain, dev);
> >> +       if (err)
> >> +               return err;
> >> +
> >> +       kref_get(&mapping->kref);
> >> +       dev->archdata.mapping = mapping;
> >> +       set_dma_ops(dev, &iommu_ops);
> >> +
> >> +       pr_info("Attached IOMMU controller to %s device.\n",
> >> dev_name(dev));
> >> +       return 0;
> >> +}
> >> +
> >> +#endif
> >> diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
> >> index 162be66..bf312c3 100644
> >> --- a/arch/arm/mm/vmregion.h
> >> +++ b/arch/arm/mm/vmregion.h
> >> @@ -17,7 +17,7 @@ struct arm_vmregion {
> >>        struct list_head        vm_list;
> >>        unsigned long           vm_start;
> >>        unsigned long           vm_end;
> >> -       struct page             *vm_pages;
> >> +       void                    *priv;
> >>        int                     vm_active;
> >>        const void              *caller;
> >>  };
> >> --
> >> 1.7.1.569.g6f426
> >>
> >>
> >> _______________________________________________
> >> Linaro-mm-sig mailing list
> >> Linaro-mm-sig@lists.linaro.org
> >> http://lists.linaro.org/mailman/listinfo/linaro-mm-sig
> >>
> >
>
Paul Gortmaker May 11, 2012, 2:08 a.m. UTC | #5
On Wed, Apr 18, 2012 at 9:44 AM, Marek Szyprowski
<m.szyprowski@samsung.com> wrote:
> This patch add a complete implementation of DMA-mapping API for
> devices which have IOMMU support.

Hi Marek,

It looks like this patch breaks no-MMU builds on ARM, at least
according to git bisect.  Here is a link to a linux-next failure:

http://kisskb.ellerman.id.au/kisskb/buildresult/6291233/

arch/arm/mm/dma-mapping.c:726:42: error: 'pgprot_kernel' undeclared
(first use in this function)
make[2]: *** [arch/arm/mm/dma-mapping.o] Error 1

Please have a look, thanks.

Paul.
---


>
> This implementation tries to optimize dma address space usage by remapping
> all possible physical memory chunks into a single dma address space chunk.
>
> DMA address space is managed on top of the bitmap stored in the
> dma_iommu_mapping structure stored in device->archdata. Platform setup
> code has to initialize parameters of the dma address space (base address,
> size, allocation precision order) with arm_iommu_create_mapping()
> function.
> To reduce the size of the bitmap, all allocations are aligned to the
> specified order of base 4 KiB pages.
>
> dma_alloc_* functions allocate physical memory in chunks, each with
> alloc_pages() function to avoid failing if the physical memory gets
> fragmented. In worst case the allocated buffer is composed of 4 KiB page
> chunks.
>
> dma_map_sg() function minimizes the total number of dma address space
> chunks by merging of physical memory chunks into one larger dma address
> space chunk. If requested chunk (scatter list entry) boundaries
> match physical page boundaries, most calls to dma_map_sg() requests will
> result in creating only one chunk in dma address space.
>
> dma_map_page() simply creates a mapping for the given page(s) in the dma
> address space.
>
> All dma functions also perform required cache operation like their
> counterparts from the arm linear physical memory mapping version.
>
> This patch contains code and fixes kindly provided by:
> - Krishna Reddy <vdumpa@nvidia.com>,
> - Andrzej Pietrasiewicz <andrzej.p@samsung.com>,
> - Hiroshi DOYU <hdoyu@nvidia.com>
>
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Tested-By: Subash Patel <subash.ramaswamy@linaro.org>
> ---
>  arch/arm/Kconfig                 |    8 +
>  arch/arm/include/asm/device.h    |    3 +
>  arch/arm/include/asm/dma-iommu.h |   34 ++
>  arch/arm/mm/dma-mapping.c        |  727
> +++++++++++++++++++++++++++++++++++++-
>  arch/arm/mm/vmregion.h           |    2 +-
>  5 files changed, 759 insertions(+), 15 deletions(-)
>  create mode 100644 arch/arm/include/asm/dma-iommu.h
>
Marek Szyprowski May 11, 2012, 7:52 a.m. UTC | #6
Hello,

On Friday, May 11, 2012 4:09 AM Paul Gortmaker wrote:

> On Wed, Apr 18, 2012 at 9:44 AM, Marek Szyprowski
> <m.szyprowski@samsung.com> wrote:
> > This patch add a complete implementation of DMA-mapping API for
> > devices which have IOMMU support.
> 
> Hi Marek,
> 
> It looks like this patch breaks no-MMU builds on ARM, at least
> according to git bisect.  Here is a link to a linux-next failure:
> 
> http://kisskb.ellerman.id.au/kisskb/buildresult/6291233/
> 
> arch/arm/mm/dma-mapping.c:726:42: error: 'pgprot_kernel' undeclared
> (first use in this function)
> make[2]: *** [arch/arm/mm/dma-mapping.o] Error 1
> 
> Please have a look, thanks.

Thanks for reporting this issue, I will send a fix in a minute.

Best regards
diff mbox

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0fd27d4..874e519 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -46,6 +46,14 @@  config ARM
 config ARM_HAS_SG_CHAIN
 	bool
 
+config NEED_SG_DMA_LENGTH
+	bool
+
+config ARM_DMA_USE_IOMMU
+	select NEED_SG_DMA_LENGTH
+	select ARM_HAS_SG_CHAIN
+	bool
+
 config HAVE_PWM
 	bool
 
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index 6e2cb0e..b69c0d3 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -14,6 +14,9 @@  struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu; /* private IOMMU data */
 #endif
+#ifdef CONFIG_ARM_DMA_USE_IOMMU
+	struct dma_iommu_mapping	*mapping;
+#endif
 };
 
 struct omap_device;
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
new file mode 100644
index 0000000..799b094
--- /dev/null
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -0,0 +1,34 @@ 
+#ifndef ASMARM_DMA_IOMMU_H
+#define ASMARM_DMA_IOMMU_H
+
+#ifdef __KERNEL__
+
+#include <linux/mm_types.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-debug.h>
+#include <linux/kmemcheck.h>
+
+struct dma_iommu_mapping {
+	/* iommu specific data */
+	struct iommu_domain	*domain;
+
+	void			*bitmap;
+	size_t			bits;
+	unsigned int		order;
+	dma_addr_t		base;
+
+	spinlock_t		lock;
+	struct kref		kref;
+};
+
+struct dma_iommu_mapping *
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
+			 int order);
+
+void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
+
+int arm_iommu_attach_device(struct device *dev,
+					struct dma_iommu_mapping *mapping);
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d4aad65..2d11aa0 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -19,6 +19,8 @@ 
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/vmalloc.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
@@ -26,6 +28,7 @@ 
 #include <asm/tlbflush.h>
 #include <asm/sizes.h>
 #include <asm/mach/arch.h>
+#include <asm/dma-iommu.h>
 
 #include "mm.h"
 
@@ -155,6 +158,21 @@  static u64 get_coherent_dma_mask(struct device *dev)
 	return mask;
 }
 
+static void __dma_clear_buffer(struct page *page, size_t size)
+{
+	void *ptr;
+	/*
+	 * Ensure that the allocated pages are zeroed, and that any data
+	 * lurking in the kernel direct-mapped region is invalidated.
+	 */
+	ptr = page_address(page);
+	if (ptr) {
+		memset(ptr, 0, size);
+		dmac_flush_range(ptr, ptr + size);
+		outer_flush_range(__pa(ptr), __pa(ptr) + size);
+	}
+}
+
 /*
  * Allocate a DMA buffer for 'dev' of size 'size' using the
  * specified gfp mask.  Note that 'size' must be page aligned.
@@ -163,7 +181,6 @@  static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
 {
 	unsigned long order = get_order(size);
 	struct page *page, *p, *e;
-	void *ptr;
 	u64 mask = get_coherent_dma_mask(dev);
 
 #ifdef CONFIG_DMA_API_DEBUG
@@ -192,14 +209,7 @@  static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
 	for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
 		__free_page(p);
 
-	/*
-	 * Ensure that the allocated pages are zeroed, and that any data
-	 * lurking in the kernel direct-mapped region is invalidated.
-	 */
-	ptr = page_address(page);
-	memset(ptr, 0, size);
-	dmac_flush_range(ptr, ptr + size);
-	outer_flush_range(__pa(ptr), __pa(ptr) + size);
+	__dma_clear_buffer(page, size);
 
 	return page;
 }
@@ -348,7 +358,7 @@  __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
 		u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
 
 		pte = consistent_pte[idx] + off;
-		c->vm_pages = page;
+		c->priv = page;
 
 		do {
 			BUG_ON(!pte_none(*pte));
@@ -461,6 +471,14 @@  __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 	return addr;
 }
 
+static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
+{
+	prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
+			    pgprot_writecombine(prot) :
+			    pgprot_dmacoherent(prot);
+	return prot;
+}
+
 /*
  * Allocate DMA-coherent memory space and return both the kernel remapped
  * virtual and bus address for that space.
@@ -468,9 +486,7 @@  __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		    gfp_t gfp, struct dma_attrs *attrs)
 {
-	pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
-			pgprot_writecombine(pgprot_kernel) :
-			pgprot_dmacoherent(pgprot_kernel);
+	pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
 	void *memory;
 
 	if (dma_alloc_from_coherent(dev, size, handle, &memory))
@@ -497,16 +513,20 @@  int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 			    pgprot_writecombine(vma->vm_page_prot) :
 			    pgprot_dmacoherent(vma->vm_page_prot);
 
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
 	c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
 	if (c) {
 		unsigned long off = vma->vm_pgoff;
+		struct page *pages = c->priv;
 
 		kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
 
 		if (off < kern_size &&
 		    user_size <= (kern_size - off)) {
 			ret = remap_pfn_range(vma, vma->vm_start,
-					      page_to_pfn(c->vm_pages) + off,
+					      page_to_pfn(pages) + off,
 					      user_size << PAGE_SHIFT,
 					      vma->vm_page_prot);
 		}
@@ -645,6 +665,9 @@  int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	int i, j;
 
 	for_each_sg(sg, s, nents, i) {
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+		s->dma_length = s->length;
+#endif
 		s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
 						s->length, dir, attrs);
 		if (dma_mapping_error(dev, s->dma_address))
@@ -753,3 +776,679 @@  static int __init dma_debug_do_init(void)
 	return 0;
 }
 fs_initcall(dma_debug_do_init);
+
+#ifdef CONFIG_ARM_DMA_USE_IOMMU
+
+/* IOMMU */
+
+static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
+				      size_t size)
+{
+	unsigned int order = get_order(size);
+	unsigned int align = 0;
+	unsigned int count, start;
+	unsigned long flags;
+
+	count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) +
+		 (1 << mapping->order) - 1) >> mapping->order;
+
+	if (order > mapping->order)
+		align = (1 << (order - mapping->order)) - 1;
+
+	spin_lock_irqsave(&mapping->lock, flags);
+	start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
+					   count, align);
+	if (start > mapping->bits) {
+		spin_unlock_irqrestore(&mapping->lock, flags);
+		return DMA_ERROR_CODE;
+	}
+
+	bitmap_set(mapping->bitmap, start, count);
+	spin_unlock_irqrestore(&mapping->lock, flags);
+
+	return mapping->base + (start << (mapping->order + PAGE_SHIFT));
+}
+
+static inline void __free_iova(struct dma_iommu_mapping *mapping,
+			       dma_addr_t addr, size_t size)
+{
+	unsigned int start = (addr - mapping->base) >>
+			     (mapping->order + PAGE_SHIFT);
+	unsigned int count = ((size >> PAGE_SHIFT) +
+			      (1 << mapping->order) - 1) >> mapping->order;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mapping->lock, flags);
+	bitmap_clear(mapping->bitmap, start, count);
+	spin_unlock_irqrestore(&mapping->lock, flags);
+}
+
+static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
+{
+	struct page **pages;
+	int count = size >> PAGE_SHIFT;
+	int array_size = count * sizeof(struct page *);
+	int i = 0;
+
+	if (array_size <= PAGE_SIZE)
+		pages = kzalloc(array_size, gfp);
+	else
+		pages = vzalloc(array_size);
+	if (!pages)
+		return NULL;
+
+	while (count) {
+		int j, order = __ffs(count);
+
+		pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
+		while (!pages[i] && order)
+			pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order);
+		if (!pages[i])
+			goto error;
+
+		if (order)
+			split_page(pages[i], order);
+		j = 1 << order;
+		while (--j)
+			pages[i + j] = pages[i] + j;
+
+		__dma_clear_buffer(pages[i], PAGE_SIZE << order);
+		i += 1 << order;
+		count -= 1 << order;
+	}
+
+	return pages;
+error:
+	while (--i)
+		if (pages[i])
+			__free_pages(pages[i], 0);
+	if (array_size < PAGE_SIZE)
+		kfree(pages);
+	else
+		vfree(pages);
+	return NULL;
+}
+
+static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size)
+{
+	int count = size >> PAGE_SHIFT;
+	int array_size = count * sizeof(struct page *);
+	int i;
+	for (i = 0; i < count; i++)
+		if (pages[i])
+			__free_pages(pages[i], 0);
+	if (array_size < PAGE_SIZE)
+		kfree(pages);
+	else
+		vfree(pages);
+	return 0;
+}
+
+/*
+ * Create a CPU mapping for a specified pages
+ */
+static void *
+__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
+{
+	struct arm_vmregion *c;
+	size_t align;
+	size_t count = size >> PAGE_SHIFT;
+	int bit;
+
+	if (!consistent_pte[0]) {
+		pr_err("%s: not initialised\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
+	/*
+	 * Align the virtual region allocation - maximum alignment is
+	 * a section size, minimum is a page size.  This helps reduce
+	 * fragmentation of the DMA space, and also prevents allocations
+	 * smaller than a section from crossing a section boundary.
+	 */
+	bit = fls(size - 1);
+	if (bit > SECTION_SHIFT)
+		bit = SECTION_SHIFT;
+	align = 1 << bit;
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = arm_vmregion_alloc(&consistent_head, align, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
+	if (c) {
+		pte_t *pte;
+		int idx = CONSISTENT_PTE_INDEX(c->vm_start);
+		int i = 0;
+		u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
+
+		pte = consistent_pte[idx] + off;
+		c->priv = pages;
+
+		do {
+			BUG_ON(!pte_none(*pte));
+
+			set_pte_ext(pte, mk_pte(pages[i], prot), 0);
+			pte++;
+			off++;
+			i++;
+			if (off >= PTRS_PER_PTE) {
+				off = 0;
+				pte = consistent_pte[++idx];
+			}
+		} while (i < count);
+
+		dsb();
+
+		return (void *)c->vm_start;
+	}
+	return NULL;
+}
+
+/*
+ * Create a mapping in device IO address space for specified pages
+ */
+static dma_addr_t
+__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	dma_addr_t dma_addr, iova;
+	int i, ret = DMA_ERROR_CODE;
+
+	dma_addr = __alloc_iova(mapping, size);
+	if (dma_addr == DMA_ERROR_CODE)
+		return dma_addr;
+
+	iova = dma_addr;
+	for (i = 0; i < count; ) {
+		unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
+		phys_addr_t phys = page_to_phys(pages[i]);
+		unsigned int len, j;
+
+		for (j = i + 1; j < count; j++, next_pfn++)
+			if (page_to_pfn(pages[j]) != next_pfn)
+				break;
+
+		len = (j - i) << PAGE_SHIFT;
+		ret = iommu_map(mapping->domain, iova, phys, len, 0);
+		if (ret < 0)
+			goto fail;
+		iova += len;
+		i = j;
+	}
+	return dma_addr;
+fail:
+	iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
+	__free_iova(mapping, dma_addr, size);
+	return DMA_ERROR_CODE;
+}
+
+static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+
+	/*
+	 * add optional in-page offset from iova to size and align
+	 * result to page size
+	 */
+	size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
+	iova &= PAGE_MASK;
+
+	iommu_unmap(mapping->domain, iova, size);
+	__free_iova(mapping, iova, size);
+	return 0;
+}
+
+static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
+	    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+{
+	pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
+	struct page **pages;
+	void *addr = NULL;
+
+	*handle = DMA_ERROR_CODE;
+	size = PAGE_ALIGN(size);
+
+	pages = __iommu_alloc_buffer(dev, size, gfp);
+	if (!pages)
+		return NULL;
+
+	*handle = __iommu_create_mapping(dev, pages, size);
+	if (*handle == DMA_ERROR_CODE)
+		goto err_buffer;
+
+	addr = __iommu_alloc_remap(pages, size, gfp, prot);
+	if (!addr)
+		goto err_mapping;
+
+	return addr;
+
+err_mapping:
+	__iommu_remove_mapping(dev, *handle, size);
+err_buffer:
+	__iommu_free_buffer(dev, pages, size);
+	return NULL;
+}
+
+static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		    struct dma_attrs *attrs)
+{
+	struct arm_vmregion *c;
+
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+	c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
+
+	if (c) {
+		struct page **pages = c->priv;
+
+		unsigned long uaddr = vma->vm_start;
+		unsigned long usize = vma->vm_end - vma->vm_start;
+		int i = 0;
+
+		do {
+			int ret;
+
+			ret = vm_insert_page(vma, uaddr, pages[i++]);
+			if (ret) {
+				pr_err("Remapping memory, error: %d\n", ret);
+				return ret;
+			}
+
+			uaddr += PAGE_SIZE;
+			usize -= PAGE_SIZE;
+		} while (usize > 0);
+	}
+	return 0;
+}
+
+/*
+ * free a page as defined by the above mapping.
+ * Must not be called with IRQs disabled.
+ */
+void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+			  dma_addr_t handle, struct dma_attrs *attrs)
+{
+	struct arm_vmregion *c;
+	size = PAGE_ALIGN(size);
+
+	c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
+	if (c) {
+		struct page **pages = c->priv;
+		__dma_free_remap(cpu_addr, size);
+		__iommu_remove_mapping(dev, handle, size);
+		__iommu_free_buffer(dev, pages, size);
+	}
+}
+
+/*
+ * Map a part of the scatter-gather list into contiguous io address space
+ */
+static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
+			  size_t size, dma_addr_t *handle,
+			  enum dma_data_direction dir)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	dma_addr_t iova, iova_base;
+	int ret = 0;
+	unsigned int count;
+	struct scatterlist *s;
+
+	size = PAGE_ALIGN(size);
+	*handle = DMA_ERROR_CODE;
+
+	iova_base = iova = __alloc_iova(mapping, size);
+	if (iova == DMA_ERROR_CODE)
+		return -ENOMEM;
+
+	for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
+		phys_addr_t phys = page_to_phys(sg_page(s));
+		unsigned int len = PAGE_ALIGN(s->offset + s->length);
+
+		if (!arch_is_coherent())
+			__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+
+		ret = iommu_map(mapping->domain, iova, phys, len, 0);
+		if (ret < 0)
+			goto fail;
+		count += len >> PAGE_SHIFT;
+		iova += len;
+	}
+	*handle = iova_base;
+
+	return 0;
+fail:
+	iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
+	__free_iova(mapping, iova_base, size);
+	return ret;
+}
+
+/**
+ * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * The scatter gather list elements are merged together (if possible) and
+ * tagged with the appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}.
+ */
+int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		     enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct scatterlist *s = sg, *dma = sg, *start = sg;
+	int i, count = 0;
+	unsigned int offset = s->offset;
+	unsigned int size = s->offset + s->length;
+	unsigned int max = dma_get_max_seg_size(dev);
+
+	for (i = 1; i < nents; i++) {
+		s = sg_next(s);
+
+		s->dma_address = DMA_ERROR_CODE;
+		s->dma_length = 0;
+
+		if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
+			if (__map_sg_chunk(dev, start, size, &dma->dma_address,
+			    dir) < 0)
+				goto bad_mapping;
+
+			dma->dma_address += offset;
+			dma->dma_length = size - offset;
+
+			size = offset = s->offset;
+			start = s;
+			dma = sg_next(dma);
+			count += 1;
+		}
+		size += s->length;
+	}
+	if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0)
+		goto bad_mapping;
+
+	dma->dma_address += offset;
+	dma->dma_length = size - offset;
+
+	return count+1;
+
+bad_mapping:
+	for_each_sg(sg, s, count, i)
+		__iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
+	return 0;
+}
+
+/**
+ * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations.  Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+			enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (sg_dma_len(s))
+			__iommu_remove_mapping(dev, sg_dma_address(s),
+					       sg_dma_len(s));
+		if (!arch_is_coherent())
+			__dma_page_dev_to_cpu(sg_page(s), s->offset,
+					      s->length, dir);
+	}
+}
+
+/**
+ * arm_iommu_sync_sg_for_cpu
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		if (!arch_is_coherent())
+			__dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
+
+}
+
+/**
+ * arm_iommu_sync_sg_for_device
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		if (!arch_is_coherent())
+			__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+}
+
+
+/**
+ * arm_iommu_map_page
+ * @dev: valid struct device pointer
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * IOMMU aware version of arm_dma_map_page()
+ */
+static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size, enum dma_data_direction dir,
+	     struct dma_attrs *attrs)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	dma_addr_t dma_addr;
+	int ret, len = PAGE_ALIGN(size + offset);
+
+	if (!arch_is_coherent())
+		__dma_page_cpu_to_dev(page, offset, size, dir);
+
+	dma_addr = __alloc_iova(mapping, len);
+	if (dma_addr == DMA_ERROR_CODE)
+		return dma_addr;
+
+	ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
+	if (ret < 0)
+		goto fail;
+
+	return dma_addr + offset;
+fail:
+	__free_iova(mapping, dma_addr, len);
+	return DMA_ERROR_CODE;
+}
+
+/**
+ * arm_iommu_unmap_page
+ * @dev: valid struct device pointer
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_page)
+ * @dir: DMA transfer direction (same as passed to dma_map_page)
+ *
+ * IOMMU aware version of arm_dma_unmap_page()
+ */
+static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir,
+		struct dma_attrs *attrs)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	dma_addr_t iova = handle & PAGE_MASK;
+	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
+	int offset = handle & ~PAGE_MASK;
+	int len = PAGE_ALIGN(size + offset);
+
+	if (!iova)
+		return;
+
+	if (!arch_is_coherent())
+		__dma_page_dev_to_cpu(page, offset, size, dir);
+
+	iommu_unmap(mapping->domain, iova, len);
+	__free_iova(mapping, iova, len);
+}
+
+static void arm_iommu_sync_single_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	dma_addr_t iova = handle & PAGE_MASK;
+	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
+	unsigned int offset = handle & ~PAGE_MASK;
+
+	if (!iova)
+		return;
+
+	if (!arch_is_coherent())
+		__dma_page_dev_to_cpu(page, offset, size, dir);
+}
+
+static void arm_iommu_sync_single_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	dma_addr_t iova = handle & PAGE_MASK;
+	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
+	unsigned int offset = handle & ~PAGE_MASK;
+
+	if (!iova)
+		return;
+
+	__dma_page_cpu_to_dev(page, offset, size, dir);
+}
+
+struct dma_map_ops iommu_ops = {
+	.alloc		= arm_iommu_alloc_attrs,
+	.free		= arm_iommu_free_attrs,
+	.mmap		= arm_iommu_mmap_attrs,
+
+	.map_page		= arm_iommu_map_page,
+	.unmap_page		= arm_iommu_unmap_page,
+	.sync_single_for_cpu	= arm_iommu_sync_single_for_cpu,
+	.sync_single_for_device	= arm_iommu_sync_single_for_device,
+
+	.map_sg			= arm_iommu_map_sg,
+	.unmap_sg		= arm_iommu_unmap_sg,
+	.sync_sg_for_cpu	= arm_iommu_sync_sg_for_cpu,
+	.sync_sg_for_device	= arm_iommu_sync_sg_for_device,
+};
+
+/**
+ * arm_iommu_create_mapping
+ * @bus: pointer to the bus holding the client device (for IOMMU calls)
+ * @base: start address of the valid IO address space
+ * @size: size of the valid IO address space
+ * @order: accuracy of the IO addresses allocations
+ *
+ * Creates a mapping structure which holds information about used/unused
+ * IO address ranges, which is required to perform memory allocation and
+ * mapping with IOMMU aware functions.
+ *
+ * The client device need to be attached to the mapping with
+ * arm_iommu_attach_device function.
+ */
+struct dma_iommu_mapping *
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size,
+			 int order)
+{
+	unsigned int count = size >> (PAGE_SHIFT + order);
+	unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
+	struct dma_iommu_mapping *mapping;
+	int err = -ENOMEM;
+
+	if (!count)
+		return ERR_PTR(-EINVAL);
+
+	mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
+	if (!mapping)
+		goto err;
+
+	mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!mapping->bitmap)
+		goto err2;
+
+	mapping->base = base;
+	mapping->bits = BITS_PER_BYTE * bitmap_size;
+	mapping->order = order;
+	spin_lock_init(&mapping->lock);
+
+	mapping->domain = iommu_domain_alloc(bus);
+	if (!mapping->domain)
+		goto err3;
+
+	kref_init(&mapping->kref);
+	return mapping;
+err3:
+	kfree(mapping->bitmap);
+err2:
+	kfree(mapping);
+err:
+	return ERR_PTR(err);
+}
+
+static void release_iommu_mapping(struct kref *kref)
+{
+	struct dma_iommu_mapping *mapping =
+		container_of(kref, struct dma_iommu_mapping, kref);
+
+	iommu_domain_free(mapping->domain);
+	kfree(mapping->bitmap);
+	kfree(mapping);
+}
+
+void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
+{
+	if (mapping)
+		kref_put(&mapping->kref, release_iommu_mapping);
+}
+
+/**
+ * arm_iommu_attach_device
+ * @dev: valid struct device pointer
+ * @mapping: io address space mapping structure (returned from
+ *	arm_iommu_create_mapping)
+ *
+ * Attaches specified io address space mapping to the provided device,
+ * this replaces the dma operations (dma_map_ops pointer) with the
+ * IOMMU aware version. More than one client might be attached to
+ * the same io address space mapping.
+ */
+int arm_iommu_attach_device(struct device *dev,
+			    struct dma_iommu_mapping *mapping)
+{
+	int err;
+
+	err = iommu_attach_device(mapping->domain, dev);
+	if (err)
+		return err;
+
+	kref_get(&mapping->kref);
+	dev->archdata.mapping = mapping;
+	set_dma_ops(dev, &iommu_ops);
+
+	pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev));
+	return 0;
+}
+
+#endif
diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h
index 162be66..bf312c3 100644
--- a/arch/arm/mm/vmregion.h
+++ b/arch/arm/mm/vmregion.h
@@ -17,7 +17,7 @@  struct arm_vmregion {
 	struct list_head	vm_list;
 	unsigned long		vm_start;
 	unsigned long		vm_end;
-	struct page		*vm_pages;
+	void			*priv;
 	int			vm_active;
 	const void		*caller;
 };