diff mbox series

[v2,1/3] iommu: io-pgtable: add DART pagetable format

Message ID 20210328074009.95932-2-sven@svenpeter.dev
State New
Headers show
Series Apple M1 DART IOMMU driver | expand

Commit Message

Sven Peter March 28, 2021, 7:40 a.m. UTC
Apple's DART iommu uses a pagetable format that shares some
similarities with the ones already implemented by io-pgtable.c.
Add a new format variant to support the required differences
so that we don't have to duplicate the pagetable handling code.

Signed-off-by: Sven Peter <sven@svenpeter.dev>
---
 drivers/iommu/io-pgtable-arm.c | 59 ++++++++++++++++++++++++++++++++++
 drivers/iommu/io-pgtable.c     |  1 +
 include/linux/io-pgtable.h     |  6 ++++
 3 files changed, 66 insertions(+)

Comments

kernel test robot March 28, 2021, 10:04 a.m. UTC | #1
Hi Sven,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on arm-perf/for-next/perf]
[also build test ERROR on linus/master v5.12-rc4 next-20210326]
[cannot apply to iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sven-Peter/Apple-M1-DART-IOMMU-driver/20210328-154437
base:   https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git for-next/perf
config: m68k-allmodconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/58778f8d813f5d98aedff621f0b236f60fe1dd2b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sven-Peter/Apple-M1-DART-IOMMU-driver/20210328-154437
        git checkout 58778f8d813f5d98aedff621f0b236f60fe1dd2b
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> m68k-linux-ld: drivers/iommu/io-pgtable.o:(.rodata+0x1c): undefined reference to `io_pgtable_apple_dart_init_fns'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Sven Peter March 28, 2021, 10:13 a.m. UTC | #2
Hi,

On Sun, Mar 28, 2021, at 11:42, kernel test robot wrote:
> 
> All errors (new ones prefixed by >>):
> 
> >> nios2-linux-ld: drivers/iommu/io-pgtable.o:(.rodata+0x1c): undefined reference to `io_pgtable_apple_dart_init_fns'
> 
>

That one was me not being careful enough when removing the Kconfig flag
compared to v1.

Fixed by moving

	[ARM_APPLE_DART] = &io_pgtable_apple_dart_init_fns,

into the #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE region in drivers/iommu/io-pgtable.c
a few lines above.


Thanks,

Sven
Will Deacon April 7, 2021, 10:44 a.m. UTC | #3
On Sun, Mar 28, 2021 at 09:40:07AM +0200, Sven Peter wrote:
> Apple's DART iommu uses a pagetable format that shares some

> similarities with the ones already implemented by io-pgtable.c.

> Add a new format variant to support the required differences

> so that we don't have to duplicate the pagetable handling code.

> 

> Signed-off-by: Sven Peter <sven@svenpeter.dev>

> ---

>  drivers/iommu/io-pgtable-arm.c | 59 ++++++++++++++++++++++++++++++++++

>  drivers/iommu/io-pgtable.c     |  1 +

>  include/linux/io-pgtable.h     |  6 ++++

>  3 files changed, 66 insertions(+)

> 

> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c

> index 87def58e79b5..2f63443fd115 100644

> --- a/drivers/iommu/io-pgtable-arm.c

> +++ b/drivers/iommu/io-pgtable-arm.c

> @@ -127,6 +127,9 @@

>  #define ARM_MALI_LPAE_MEMATTR_IMP_DEF	0x88ULL

>  #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL

>  

> +#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7)

> +#define APPLE_DART_PTE_PROT_NO_READ (1<<8)

> +

>  /* IOPTE accessors */

>  #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))

>  

> @@ -381,6 +384,15 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,

>  {

>  	arm_lpae_iopte pte;

>  

> +	if (data->iop.fmt == ARM_APPLE_DART) {

> +		pte = 0;

> +		if (!(prot & IOMMU_WRITE))

> +			pte |= APPLE_DART_PTE_PROT_NO_WRITE;

> +		if (!(prot & IOMMU_READ))

> +			pte |= APPLE_DART_PTE_PROT_NO_READ;

> +		return pte;

> +	}

> +

>  	if (data->iop.fmt == ARM_64_LPAE_S1 ||

>  	    data->iop.fmt == ARM_32_LPAE_S1) {

>  		pte = ARM_LPAE_PTE_nG;

> @@ -1043,6 +1055,48 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)

>  	return NULL;

>  }

>  

> +static struct io_pgtable *

> +apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)

> +{

> +	struct arm_lpae_io_pgtable *data;

> +

> +	if (cfg->ias > 36)

> +		return NULL;

> +	if (cfg->oas > 36)

> +		return NULL;

> +

> +	if (!cfg->coherent_walk)

> +		return NULL;


This all feels like IOMMU-specific limitations leaking into the page-table
code here; it doesn't feel so unlikely that future implementations of this
IP might have greater addressing capabilities, for example, and so I don't
see why the page-table code needs to police this.

> +	cfg->pgsize_bitmap &= SZ_16K;

> +	if (!cfg->pgsize_bitmap)

> +		return NULL;


This is worrying (and again, I don't think this belongs here). How is this
thing supposed to work if the CPU is using 4k pages?

Will
Sven Peter April 9, 2021, 4:55 p.m. UTC | #4
On Wed, Apr 7, 2021, at 12:44, Will Deacon wrote:
> On Sun, Mar 28, 2021 at 09:40:07AM +0200, Sven Peter wrote:

[...]
> >  

> > +static struct io_pgtable *

> > +apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)

> > +{

> > +	struct arm_lpae_io_pgtable *data;

> > +

> > +	if (cfg->ias > 36)

> > +		return NULL;

> > +	if (cfg->oas > 36)

> > +		return NULL;

> > +

> > +	if (!cfg->coherent_walk)

> > +		return NULL;

> 

> This all feels like IOMMU-specific limitations leaking into the page-table

> code here; it doesn't feel so unlikely that future implementations of this

> IP might have greater addressing capabilities, for example, and so I don't

> see why the page-table code needs to police this.


That's true, this really doesn't belong here.
I'll fix it for the next version and make sure to keep iommu-specific
limitations inside the driver itself.


> 

> > +	cfg->pgsize_bitmap &= SZ_16K;

> > +	if (!cfg->pgsize_bitmap)

> > +		return NULL;

> 

> This is worrying (and again, I don't think this belongs here). How is this

> thing supposed to work if the CPU is using 4k pages?


This SoC is just full of fun surprises!
I didn't even think about that case since I've always been using 16k pages so far.

I've checked again and wasn't able to find any way to configure the pagesize
of the IOMMU. There seem to be variants of this IP in older iPhones which
support a 4k pagesize but to the best of my knowledge this is hard wired
and not configurable in software.

When booting with 4k pages I hit the BUG_ON in iova.c that ensures that the
iommu pagesize has to be <= the cpu page size.

I see two options here and I'm not sure I like either of them:

1) Just don't support 4k CPU pages together with IOMMU translations and only
   allow full bypass mode there.
   This would however mean that PCIe (i.e. ethernet, usb ports on the Mac
   mini) and possibly Thunderbolt support would not be possible since these
   devices don't seem to like iommu bypass mode at all.

2) I've had a brief discussion on IRC with Arnd about this [1] and he pointed
   out that the dma_map_sg API doesn't make any guarantees about the returned
   iovas and that it might be possible to make this work at least for devices
   that go through the normal DMA API.

   I've then replaced the page size check with a WARN_ON in iova.c just to see
   what happens. At least normal devices that go through the DMA API seem to
   work with my configuration. iommu_dma_alloc took the iommu_dma_alloc_remap
   path which was called with the cpu page size but then used
   domain->pgsize_bitmap to increase that to 16k. So this kinda works out, but
   there are other functions in dma-iommu.c that I believe rely on the fact that
   the iommu can map single cpu pages. This feels very fragile right now and
   would probably require some rather invasive changes.

   Any driver that tries to use the iommu API directly could be trouble
   as well if they make similar assumptions.

   Is this something you would even want to support in the iommu subsytem
   and is it even possible to do this in a sane way?


Best,


Sven


[1] https://freenode.irclog.whitequark.org/asahi/2021-04-07#29609786;
Arnd Bergmann April 9, 2021, 7:38 p.m. UTC | #5
On Fri, Apr 9, 2021 at 6:56 PM Sven Peter <sven@svenpeter.dev> wrote:
> On Wed, Apr 7, 2021, at 12:44, Will Deacon wrote:

> > On Sun, Mar 28, 2021 at 09:40:07AM +0200, Sven Peter wrote:

> >

> > > +   cfg->pgsize_bitmap &= SZ_16K;

> > > +   if (!cfg->pgsize_bitmap)

> > > +           return NULL;

> >

> > This is worrying (and again, I don't think this belongs here). How is this

> > thing supposed to work if the CPU is using 4k pages?

>

> This SoC is just full of fun surprises!

> I didn't even think about that case since I've always been using 16k pages so far.

>

> I've checked again and wasn't able to find any way to configure the pagesize

> of the IOMMU. There seem to be variants of this IP in older iPhones which

> support a 4k pagesize but to the best of my knowledge this is hard wired

> and not configurable in software.

>

> When booting with 4k pages I hit the BUG_ON in iova.c that ensures that the

> iommu pagesize has to be <= the cpu page size.

>

> I see two options here and I'm not sure I like either of them:

>

> 1) Just don't support 4k CPU pages together with IOMMU translations and only

>    allow full bypass mode there.

>    This would however mean that PCIe (i.e. ethernet, usb ports on the Mac

>    mini) and possibly Thunderbolt support would not be possible since these

>    devices don't seem to like iommu bypass mode at all.


It should be possible to do a fake bypass mode by just programming a
static page table for as much address space as you can, and then
use swiotlb to address any memory beyond that. This won't perform
well because it requires bounce buffers for any high memory, but it
can be a last resort if a dart instance cannot do normal bypass mode.

> 2) I've had a brief discussion on IRC with Arnd about this [1] and he pointed

>    out that the dma_map_sg API doesn't make any guarantees about the returned

>    iovas and that it might be possible to make this work at least for devices

>    that go through the normal DMA API.

>

>    I've then replaced the page size check with a WARN_ON in iova.c just to see

>    what happens. At least normal devices that go through the DMA API seem to

>    work with my configuration. iommu_dma_alloc took the iommu_dma_alloc_remap

>    path which was called with the cpu page size but then used

>    domain->pgsize_bitmap to increase that to 16k. So this kinda works out, but

>    there are other functions in dma-iommu.c that I believe rely on the fact that

>    the iommu can map single cpu pages. This feels very fragile right now and

>    would probably require some rather invasive changes.


The other second-to-last resort here would be to duplicate the code from
the dma-iommu code and implement the dma-mapping API directly on
top of the dart hardware instead of the iommu layer. This would probably
be much faster than the swiotlb on top of a bypass or a linear map,
but it's a really awful abstraction that would require adding special cases
into a lot of generic code.

>    Any driver that tries to use the iommu API directly could be trouble

>    as well if they make similar assumptions.


I think pretty much all drivers using the iommu API directly already
depends on having a matching page size.  I don't see any way to use
e.g. PCI device assignment using vfio, or a GPU driver with per-process
contexts when the iotlb page size is larger than the CPU's.

>    Is this something you would even want to support in the iommu subsytem

>    and is it even possible to do this in a sane way?


I don't know how hard it is to do adjust the dma-iommu implementation
to allow this, but as long as we can work out the DT binding to support
both normal dma-iommu mode with 16KB pages and some kind of
passthrough mode (emulated or not) with 4KB pages, it can be left
as a possible optimization for later.

        Arnd
Will Deacon April 19, 2021, 4:31 p.m. UTC | #6
On Fri, Apr 09, 2021 at 09:38:15PM +0200, Arnd Bergmann wrote:
> On Fri, Apr 9, 2021 at 6:56 PM Sven Peter <sven@svenpeter.dev> wrote:

> > On Wed, Apr 7, 2021, at 12:44, Will Deacon wrote:

> > > On Sun, Mar 28, 2021 at 09:40:07AM +0200, Sven Peter wrote:

> > >

> > > > +   cfg->pgsize_bitmap &= SZ_16K;

> > > > +   if (!cfg->pgsize_bitmap)

> > > > +           return NULL;

> > >

> > > This is worrying (and again, I don't think this belongs here). How is this

> > > thing supposed to work if the CPU is using 4k pages?

> >

> > This SoC is just full of fun surprises!

> > I didn't even think about that case since I've always been using 16k pages so far.

> >

> > I've checked again and wasn't able to find any way to configure the pagesize

> > of the IOMMU. There seem to be variants of this IP in older iPhones which

> > support a 4k pagesize but to the best of my knowledge this is hard wired

> > and not configurable in software.

> >

> > When booting with 4k pages I hit the BUG_ON in iova.c that ensures that the

> > iommu pagesize has to be <= the cpu page size.

> >

> > I see two options here and I'm not sure I like either of them:

> >

> > 1) Just don't support 4k CPU pages together with IOMMU translations and only

> >    allow full bypass mode there.

> >    This would however mean that PCIe (i.e. ethernet, usb ports on the Mac

> >    mini) and possibly Thunderbolt support would not be possible since these

> >    devices don't seem to like iommu bypass mode at all.

> 

> It should be possible to do a fake bypass mode by just programming a

> static page table for as much address space as you can, and then

> use swiotlb to address any memory beyond that. This won't perform

> well because it requires bounce buffers for any high memory, but it

> can be a last resort if a dart instance cannot do normal bypass mode.

> 

> > 2) I've had a brief discussion on IRC with Arnd about this [1] and he pointed

> >    out that the dma_map_sg API doesn't make any guarantees about the returned

> >    iovas and that it might be possible to make this work at least for devices

> >    that go through the normal DMA API.

> >

> >    I've then replaced the page size check with a WARN_ON in iova.c just to see

> >    what happens. At least normal devices that go through the DMA API seem to

> >    work with my configuration. iommu_dma_alloc took the iommu_dma_alloc_remap

> >    path which was called with the cpu page size but then used

> >    domain->pgsize_bitmap to increase that to 16k. So this kinda works out, but

> >    there are other functions in dma-iommu.c that I believe rely on the fact that

> >    the iommu can map single cpu pages. This feels very fragile right now and

> >    would probably require some rather invasive changes.

> 

> The other second-to-last resort here would be to duplicate the code from

> the dma-iommu code and implement the dma-mapping API directly on

> top of the dart hardware instead of the iommu layer. This would probably

> be much faster than the swiotlb on top of a bypass or a linear map,

> but it's a really awful abstraction that would require adding special cases

> into a lot of generic code.

> 

> >    Any driver that tries to use the iommu API directly could be trouble

> >    as well if they make similar assumptions.

> 

> I think pretty much all drivers using the iommu API directly already

> depends on having a matching page size.  I don't see any way to use

> e.g. PCI device assignment using vfio, or a GPU driver with per-process

> contexts when the iotlb page size is larger than the CPU's.

> 

> >    Is this something you would even want to support in the iommu subsytem

> >    and is it even possible to do this in a sane way?

> 

> I don't know how hard it is to do adjust the dma-iommu implementation

> to allow this, but as long as we can work out the DT binding to support

> both normal dma-iommu mode with 16KB pages and some kind of

> passthrough mode (emulated or not) with 4KB pages, it can be left

> as a possible optimization for later.


I think one of the main things to modify is the IOVA allocator
(drivers/iommu/iova.c). Once that is happy with pages bigger than the CPU
page size, then you could probably fake everything else in the DMA layer by
offsetting the returned DMA addresses into the 16K page which got mapped.

Will
diff mbox series

Patch

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..2f63443fd115 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -127,6 +127,9 @@ 
 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF	0x88ULL
 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
 
+#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7)
+#define APPLE_DART_PTE_PROT_NO_READ (1<<8)
+
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
@@ -381,6 +384,15 @@  static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 {
 	arm_lpae_iopte pte;
 
+	if (data->iop.fmt == ARM_APPLE_DART) {
+		pte = 0;
+		if (!(prot & IOMMU_WRITE))
+			pte |= APPLE_DART_PTE_PROT_NO_WRITE;
+		if (!(prot & IOMMU_READ))
+			pte |= APPLE_DART_PTE_PROT_NO_READ;
+		return pte;
+	}
+
 	if (data->iop.fmt == ARM_64_LPAE_S1 ||
 	    data->iop.fmt == ARM_32_LPAE_S1) {
 		pte = ARM_LPAE_PTE_nG;
@@ -1043,6 +1055,48 @@  arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
 	return NULL;
 }
 
+static struct io_pgtable *
+apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct arm_lpae_io_pgtable *data;
+
+	if (cfg->ias > 36)
+		return NULL;
+	if (cfg->oas > 36)
+		return NULL;
+
+	if (!cfg->coherent_walk)
+		return NULL;
+
+	cfg->pgsize_bitmap &= SZ_16K;
+	if (!cfg->pgsize_bitmap)
+		return NULL;
+
+	if (cfg->quirks)
+		return NULL;
+
+	data = arm_lpae_alloc_pgtable(cfg);
+	if (!data)
+		return NULL;
+
+	data->start_level = 2;
+	data->pgd_bits = 11;
+	data->bits_per_level = 11;
+
+	data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
+					   cfg);
+	if (!data->pgd)
+		goto out_free_data;
+
+	cfg->apple_dart_cfg.ttbr = virt_to_phys(data->pgd);
+
+	return &data->iop;
+
+out_free_data:
+	kfree(data);
+	return NULL;
+}
+
 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
 	.alloc	= arm_64_lpae_alloc_pgtable_s1,
 	.free	= arm_lpae_free_pgtable,
@@ -1068,6 +1122,11 @@  struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
 	.free	= arm_lpae_free_pgtable,
 };
 
+struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = {
+	.alloc	= apple_dart_alloc_pgtable,
+	.free	= arm_lpae_free_pgtable,
+};
+
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
 
 static struct io_pgtable_cfg *cfg_cookie __initdata;
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 6e9917ce980f..6ec75f3e9c3b 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -27,6 +27,7 @@  io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 #ifdef CONFIG_AMD_IOMMU
 	[AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns,
 #endif
+	[ARM_APPLE_DART] = &io_pgtable_apple_dart_init_fns,
 };
 
 struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..b06925eb20d3 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -16,6 +16,7 @@  enum io_pgtable_fmt {
 	ARM_V7S,
 	ARM_MALI_LPAE,
 	AMD_IOMMU_V1,
+	ARM_APPLE_DART,
 	IO_PGTABLE_NUM_FMTS,
 };
 
@@ -136,6 +137,10 @@  struct io_pgtable_cfg {
 			u64	transtab;
 			u64	memattr;
 		} arm_mali_lpae_cfg;
+
+		struct {
+			u64 ttbr;
+		} apple_dart_cfg;
 	};
 };
 
@@ -250,5 +255,6 @@  extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
 
 #endif /* __IO_PGTABLE_H */