Message ID | 20200409233350.6343-3-jcrouse@codeaurora.org |
---|---|
State | New |
Headers | show |
Series | iommu/arm-smmu: Split pagetable support for arm-smmu-v2 | expand |
On Thu, Apr 09, 2020 at 05:33:47PM -0600, Jordan Crouse wrote: > Add support to enable TTBR1 if the domain requests it via the > DOMAIN_ATTR_SPLIT_TABLES attribute. If enabled by the hardware > and pagetable configuration the driver will configure the TTBR1 region > and program the domain pagetable on TTBR1. TTBR0 will be disabled. > > After attaching the device the value of he domain attribute can > be queried to see if the split pagetables were successfully programmed. > The domain geometry will be updated as well so that the caller can > determine the active region for the pagetable that was programmed. > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > --- > > drivers/iommu/arm-smmu.c | 48 ++++++++++++++++++++++++++++++++++------ > drivers/iommu/arm-smmu.h | 24 +++++++++++++++----- > 2 files changed, 59 insertions(+), 13 deletions(-) > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > index a6a5796e9c41..db6d503c1673 100644 > --- a/drivers/iommu/arm-smmu.c > +++ b/drivers/iommu/arm-smmu.c > @@ -555,11 +555,16 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, > cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; > cb->ttbr[1] = 0; > } else { > - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > - cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, > - cfg->asid); > - cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, > - cfg->asid); > + cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, > + cfg->asid); > + > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > + } else { > + cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > + cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, > + cfg->asid); > + } This looks odd to me. As I mentioned before, the SMMU driver absolutely has to manage the ASID space, so we should be setting it in both TTBRs here. > diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h > index 8d1cd54d82a6..5f6d0af7c8c8 100644 > --- a/drivers/iommu/arm-smmu.h > +++ b/drivers/iommu/arm-smmu.h > @@ -172,6 +172,7 @@ enum arm_smmu_cbar_type { > #define ARM_SMMU_TCR_SH0 GENMASK(13, 12) > #define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10) > #define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) > +#define ARM_SMMU_TCR_EPD0 BIT(7) > #define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) > > #define ARM_SMMU_VTCR_RES1 BIT(31) > @@ -343,16 +344,27 @@ struct arm_smmu_domain { > struct mutex init_mutex; /* Protects smmu pointer */ > spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ > struct iommu_domain domain; > + bool split_pagetables; > }; > > static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) > { > - return ARM_SMMU_TCR_EPD1 | > - FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > - FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > - FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > - FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > - FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > + u32 tcr = FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > + FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > + FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > + FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > + FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > + > + /* > + * When TTBR1 is selected shift the TCR fields by 16 bits and disable > + * translation in TTBR0 > + */ > + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) > + tcr = (tcr << 16) | ARM_SMMU_TCR_EPD0; This looks reasonably dodgy to me, as you copy a RESERVED bit into the A1 field. Furthermore, for 32-bit context banks you've got the EAE bit to contend with as well. Perhaps we shouldn't expose DOMAIN_ATTR_SPLIT_TABLES for anything other than the 64-bit page table format. Will
On Tue, May 19, 2020 at 07:53:26PM -0600, Jordan Crouse wrote: > On Mon, May 18, 2020 at 03:59:59PM +0100, Will Deacon wrote: > > On Thu, Apr 09, 2020 at 05:33:47PM -0600, Jordan Crouse wrote: > > > Add support to enable TTBR1 if the domain requests it via the > > > DOMAIN_ATTR_SPLIT_TABLES attribute. If enabled by the hardware > > > and pagetable configuration the driver will configure the TTBR1 region > > > and program the domain pagetable on TTBR1. TTBR0 will be disabled. > > > > > > After attaching the device the value of he domain attribute can > > > be queried to see if the split pagetables were successfully programmed. > > > The domain geometry will be updated as well so that the caller can > > > determine the active region for the pagetable that was programmed. > > > > > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > > > --- > > > > > > drivers/iommu/arm-smmu.c | 48 ++++++++++++++++++++++++++++++++++------ > > > drivers/iommu/arm-smmu.h | 24 +++++++++++++++----- > > > 2 files changed, 59 insertions(+), 13 deletions(-) > > > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > > index a6a5796e9c41..db6d503c1673 100644 > > > --- a/drivers/iommu/arm-smmu.c > > > +++ b/drivers/iommu/arm-smmu.c > > > @@ -555,11 +555,16 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, > > > cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; > > > cb->ttbr[1] = 0; > > > } else { > > > - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > > > - cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, > > > - cfg->asid); > > > - cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, > > > - cfg->asid); > > > + cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, > > > + cfg->asid); > > > + > > > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > > > + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > > > + } else { > > > + cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > > > + cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, > > > + cfg->asid); > > > + } > > > > This looks odd to me. As I mentioned before, the SMMU driver absolutely has > > to manage the ASID space, so we should be setting it in both TTBRs here. > > Somebody had suggested a while back to only do TTBR0 but I agree that it makes > more sense for it to be on both. Sorry if it was me, I've been having a hard time piecing together how this works with the GPU. > > > diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h > > > index 8d1cd54d82a6..5f6d0af7c8c8 100644 > > > --- a/drivers/iommu/arm-smmu.h > > > +++ b/drivers/iommu/arm-smmu.h > > > @@ -172,6 +172,7 @@ enum arm_smmu_cbar_type { > > > #define ARM_SMMU_TCR_SH0 GENMASK(13, 12) > > > #define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10) > > > #define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) > > > +#define ARM_SMMU_TCR_EPD0 BIT(7) > > > #define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) > > > > > > #define ARM_SMMU_VTCR_RES1 BIT(31) > > > @@ -343,16 +344,27 @@ struct arm_smmu_domain { > > > struct mutex init_mutex; /* Protects smmu pointer */ > > > spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ > > > struct iommu_domain domain; > > > + bool split_pagetables; > > > }; > > > > > > static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) > > > { > > > - return ARM_SMMU_TCR_EPD1 | > > > - FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > > > - FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > > > - FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > > > - FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > > > - FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > > > + u32 tcr = FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > > > + FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > > > + FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > > > + FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > > > + FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > > > + > > > + /* > > > + * When TTBR1 is selected shift the TCR fields by 16 bits and disable > > > + * translation in TTBR0 > > > + */ > > > + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) > > > + tcr = (tcr << 16) | ARM_SMMU_TCR_EPD0; > > > > This looks reasonably dodgy to me, as you copy a RESERVED bit into the A1 > > field. Furthermore, for 32-bit context banks you've got the EAE bit to > > contend with as well. > > I can swizzle it more if we need to. I think Robin's main objection was that we > didn't want to construct the whole half of the TCR twice and have a bunch of > field definitions for the T1 space that are only used in this special case. > > > Perhaps we shouldn't expose DOMAIN_ATTR_SPLIT_TABLES for anything other than > > the 64-bit page table format. > > We kind of enforce that in arm_smmu_init_domain_context [1] by only allowing the > quirk to be set when cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64 and stage 1. I'm not > sure if that is formal enough or if we should bake something in to > io-pgtable-arm to warn against it too. Sorry again, I missed that this was predicated on 'cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64' Please can you add that to the comment above the code shifting the TCR around? I think you should also say that this clears A1. Will
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a6a5796e9c41..db6d503c1673 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -555,11 +555,16 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; cb->ttbr[1] = 0; } else { - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, - cfg->asid); - cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, - cfg->asid); + cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); + + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + } else { + cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); + } } } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; @@ -673,6 +678,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + unsigned long quirks = 0; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -741,6 +747,14 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, oas = smmu->ipa_size; if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { fmt = ARM_64_LPAE_S1; + + /* + * We are assuming that split pagetables will always use + * SEP_UPSTREAM so we don't need to reduce the size of + * the ias to account for the sign extension bit + */ + if (smmu_domain->split_pagetables) + quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { fmt = ARM_32_LPAE_S1; ias = min(ias, 32UL); @@ -810,6 +824,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, .tlb = smmu_domain->flush_ops, .iommu_dev = smmu->dev, + .quirks = quirks, }; if (smmu_domain->non_strict) @@ -823,8 +838,16 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, /* Update the domain's page sizes to reflect the page table format */ domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; - domain->geometry.aperture_end = (1UL << ias) - 1; - domain->geometry.force_aperture = true; + + if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { + domain->geometry.aperture_start = ~0UL << ias; + domain->geometry.aperture_end = ~0UL; + domain->geometry.force_aperture = true; + } else { + domain->geometry.aperture_end = (1UL << ias) - 1; + domain->geometry.force_aperture = true; + smmu_domain->split_pagetables = false; + } /* Initialise the context bank with our page table cfg */ arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); @@ -1526,6 +1549,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); return 0; + case DOMAIN_ATTR_SPLIT_TABLES: + *(int *)data = smmu_domain->split_pagetables; + return 0; default: return -ENODEV; } @@ -1566,6 +1592,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, else smmu_domain->stage = ARM_SMMU_DOMAIN_S1; break; + case DOMAIN_ATTR_SPLIT_TABLES: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + if (*(int *)data) + smmu_domain->split_pagetables = true; + break; default: ret = -ENODEV; } diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 8d1cd54d82a6..5f6d0af7c8c8 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -172,6 +172,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_TCR_SH0 GENMASK(13, 12) #define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10) #define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) +#define ARM_SMMU_TCR_EPD0 BIT(7) #define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) #define ARM_SMMU_VTCR_RES1 BIT(31) @@ -343,16 +344,27 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; + bool split_pagetables; }; static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) { - return ARM_SMMU_TCR_EPD1 | - FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | - FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | - FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | - FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | - FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); + u32 tcr = FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | + FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | + FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | + FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | + FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); + + /* + * When TTBR1 is selected shift the TCR fields by 16 bits and disable + * translation in TTBR0 + */ + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + tcr = (tcr << 16) | ARM_SMMU_TCR_EPD0; + else + tcr |= ARM_SMMU_TCR_EPD1; + + return tcr; } static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg)
Add support to enable TTBR1 if the domain requests it via the DOMAIN_ATTR_SPLIT_TABLES attribute. If enabled by the hardware and pagetable configuration the driver will configure the TTBR1 region and program the domain pagetable on TTBR1. TTBR0 will be disabled. After attaching the device the value of he domain attribute can be queried to see if the split pagetables were successfully programmed. The domain geometry will be updated as well so that the caller can determine the active region for the pagetable that was programmed. Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- drivers/iommu/arm-smmu.c | 48 ++++++++++++++++++++++++++++++++++------ drivers/iommu/arm-smmu.h | 24 +++++++++++++++----- 2 files changed, 59 insertions(+), 13 deletions(-)