Message ID | 20250319-msm-gpu-fault-fixes-next-v5-1-97561209dd8c@gmail.com |
---|---|
State | New |
Headers | show |
Series | iommu/arm-smmu, drm/msm: Fixes for stall-on-fault | expand |
On Wed, Mar 19, 2025 at 10:44:00AM -0400, Connor Abbott wrote: > This will be used by drm/msm for GPU page faults, replacing the manual > register reading it does. > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > Reviewed-by: Rob Clark <robdclark@gmail.com> > --- > drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 6 ++-- > drivers/iommu/arm/arm-smmu/arm-smmu.c | 35 ++++++++++++++---------- > drivers/iommu/arm/arm-smmu/arm-smmu.h | 7 +++-- > 3 files changed, 29 insertions(+), 19 deletions(-) [...] > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a02078eb968b81a35c1c086ed7007ea2a453ef94 100644 > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c > @@ -405,13 +405,20 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { > }; > > > -void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, > +void arm_smmu_read_context_fault_info(struct arm_smmu_domain *smmu_domain, > struct arm_smmu_context_fault_info *cfi) > { > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + int idx = smmu_domain->cfg.cbndx; > + > cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); > + cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0); > cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); > - cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > + cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > + cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1); > cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); > + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) > + cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR); I think this leaves 'cfi->contextidr' uninitialised for stage-2 domains. We should probably either zero it here or just zero-initialise the whole 'cfi' struct in arm_smmu_context_fault() with a: struct arm_smmu_context_fault_info cfi = {}; line. Will
On Tue, May 6, 2025 at 7:32 AM Will Deacon <will@kernel.org> wrote: > > On Wed, Mar 19, 2025 at 10:44:00AM -0400, Connor Abbott wrote: > > This will be used by drm/msm for GPU page faults, replacing the manual > > register reading it does. > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > Reviewed-by: Rob Clark <robdclark@gmail.com> > > --- > > drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 6 ++-- > > drivers/iommu/arm/arm-smmu/arm-smmu.c | 35 ++++++++++++++---------- > > drivers/iommu/arm/arm-smmu/arm-smmu.h | 7 +++-- > > 3 files changed, 29 insertions(+), 19 deletions(-) > > [...] > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c > > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a02078eb968b81a35c1c086ed7007ea2a453ef94 100644 > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c > > @@ -405,13 +405,20 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { > > }; > > > > > > -void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, > > +void arm_smmu_read_context_fault_info(struct arm_smmu_domain *smmu_domain, > > struct arm_smmu_context_fault_info *cfi) > > { > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > + int idx = smmu_domain->cfg.cbndx; > > + > > cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); > > + cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0); > > cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); > > - cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > > + cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > > + cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1); > > cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); > > + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) > > + cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR); > > I think this leaves 'cfi->contextidr' uninitialised for stage-2 domains. > We should probably either zero it here or just zero-initialise the whole > 'cfi' struct in arm_smmu_context_fault() with a: > > struct arm_smmu_context_fault_info cfi = {}; > > line. > > Will The next patch moves the context fault info into arm_smmu_domain which is already zero-initialized. Connor
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 548783f3f8e89fd978367afa65c473002f66e2e7..5bf1aa4aa941962710f1f14260e133d560aee86f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -393,14 +393,14 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - arm_smmu_read_context_fault_info(smmu, idx, &cfi); + arm_smmu_read_context_fault_info(smmu_domain, &cfi); if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; if (list_empty(&tbu_list)) { ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, - cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) arm_smmu_print_context_fault_info(smmu, idx, &cfi); @@ -412,7 +412,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) phys_soft = ops->iova_to_phys(ops, cfi.iova); tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, - cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index ade4684c14c9b2724a71e2457288dbfaf7562c83..a02078eb968b81a35c1c086ed7007ea2a453ef94 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -405,13 +405,20 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { }; -void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, +void arm_smmu_read_context_fault_info(struct arm_smmu_domain *smmu_domain, struct arm_smmu_context_fault_info *cfi) { + struct arm_smmu_device *smmu = smmu_domain->smmu; + int idx = smmu_domain->cfg.cbndx; + cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); + cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0); cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); + cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); + cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1); cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR); } void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, @@ -419,7 +426,7 @@ void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, { dev_err(smmu->dev, "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); + cfi->fsr, cfi->iova, cfi->fsynr0, cfi->cbfrsynra, idx); dev_err(smmu->dev, "FSR = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n", cfi->fsr, @@ -437,15 +444,15 @@ void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, cfi->cbfrsynra); dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n", - cfi->fsynr, - (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr), - (cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "", - (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "", - (cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "", - (cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "", - (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "", - (cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "", - (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr)); + cfi->fsynr0, + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr0), + (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "", + (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "", + (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "", + (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "", + (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "", + (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr0)); } static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -458,13 +465,13 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int idx = smmu_domain->cfg.cbndx; int ret; - arm_smmu_read_context_fault_info(smmu, idx, &cfi); + arm_smmu_read_context_fault_info(smmu_domain, &cfi); if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, - cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS && __ratelimit(&rs)) arm_smmu_print_context_fault_info(smmu, idx, &cfi); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index e2aeb511ae903302e3c15d2cf5f22e2a26ac2346..ef6915a0d9f62b0a1734a3ee57ea422615036094 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -543,12 +543,15 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu); struct arm_smmu_context_fault_info { unsigned long iova; + u64 ttbr0; u32 fsr; - u32 fsynr; + u32 fsynr0; + u32 fsynr1; u32 cbfrsynra; + u32 contextidr; }; -void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, +void arm_smmu_read_context_fault_info(struct arm_smmu_domain *smmu_domain, struct arm_smmu_context_fault_info *cfi); void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,