@@ -131,6 +131,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
ring->cur_ctx_seqno = 0;
a5xx_submit_in_rb(gpu, submit);
@@ -212,6 +212,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
a6xx_set_pagetable(a6xx_gpu, ring, submit);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
@@ -335,6 +337,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
/*
* Toggle concurrent binning for pagetable switch and set the thread to
* BR since only it can execute the pagetable switch packets.
@@ -246,16 +246,54 @@ u64 adreno_private_address_space_size(struct msm_gpu *gpu)
return SZ_4G;
}
+void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu)
+{
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ struct msm_drm_private *priv = gpu->dev->dev_private;
+ unsigned long flags;
+
+ /*
+ * Wait until the cooldown period has passed and we would actually
+ * collect a crashdump to re-enable stall-on-fault.
+ */
+ spin_lock_irqsave(&priv->fault_stall_lock, flags);
+ if (!priv->stall_enabled &&
+ ktime_after(ktime_get(), priv->stall_reenable_time) &&
+ !READ_ONCE(gpu->crashstate)) {
+ priv->stall_enabled = true;
+
+ gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, true);
+ }
+ spin_unlock_irqrestore(&priv->fault_stall_lock, flags);
+}
+
#define ARM_SMMU_FSR_TF BIT(1)
#define ARM_SMMU_FSR_PF BIT(3)
#define ARM_SMMU_FSR_EF BIT(4)
+#define ARM_SMMU_FSR_SS BIT(30)
int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags,
struct adreno_smmu_fault_info *info, const char *block,
u32 scratch[4])
{
+ struct msm_drm_private *priv = gpu->dev->dev_private;
const char *type = "UNKNOWN";
- bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
+ bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) &&
+ !READ_ONCE(gpu->crashstate);
+ unsigned long irq_flags;
+
+ /*
+ * In case there is a subsequent storm of pagefaults, disable
+ * stall-on-fault for at least half a second.
+ */
+ spin_lock_irqsave(&priv->fault_stall_lock, irq_flags);
+ if (priv->stall_enabled) {
+ priv->stall_enabled = false;
+
+ gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, false);
+ }
+ priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500);
+ spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags);
/*
* Print a default message if we couldn't get the data from the
@@ -634,6 +634,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags,
struct adreno_smmu_fault_info *info, const char *block,
u32 scratch[4]);
+void adreno_check_and_reenable_stall(struct adreno_gpu *gpu);
+
int adreno_read_speedbin(struct device *dev, u32 *speedbin);
/*
@@ -319,6 +319,9 @@ static void msm_debugfs_gpu_init(struct drm_minor *minor)
debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root,
&priv->disable_err_irq);
+ debugfs_create_bool("stall_on_fault_enabled", 0400, minor->debugfs_root,
+ &priv->stall_enabled);
+
gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root);
debugfs_create_bool("idle_clamp",0600, gpu_devfreq,
@@ -245,6 +245,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock);
drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock);
+ /* Initialize stall-on-fault */
+ spin_lock_init(&priv->fault_stall_lock);
+ priv->stall_enabled = true;
+
/* Teach lockdep about lock ordering wrt. shrinker: */
fs_reclaim_acquire(GFP_KERNEL);
might_lock(&priv->lru.lock);
@@ -222,6 +222,29 @@ struct msm_drm_private {
* the sw hangcheck mechanism.
*/
bool disable_err_irq;
+
+ /**
+ * @fault_stall_lock:
+ *
+ * Serialize changes to stall-on-fault state.
+ */
+ spinlock_t fault_stall_lock;
+
+ /**
+ * @fault_stall_reenable_time:
+ *
+ * If stall_enabled is false, when to reenable stall-on-fault.
+ * Protected by @fault_stall_lock.
+ */
+ ktime_t stall_reenable_time;
+
+ /**
+ * @stall_enabled:
+ *
+ * Whether stall-on-fault is currently enabled. Protected by
+ * @fault_stall_lock.
+ */
+ bool stall_enabled;
};
const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier);
@@ -372,6 +372,14 @@ static int msm_disp_fault_handler(struct iommu_domain *domain, struct device *de
return -ENOSYS;
}
+static void msm_iommu_set_stall(struct msm_mmu *mmu, bool enable)
+{
+ struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev);
+
+ if (adreno_smmu->set_stall)
+ adreno_smmu->set_stall(adreno_smmu->cookie, enable);
+}
+
static void msm_iommu_detach(struct msm_mmu *mmu)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
@@ -419,6 +427,7 @@ static const struct msm_mmu_funcs funcs = {
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
.destroy = msm_iommu_destroy,
+ .set_stall = msm_iommu_set_stall,
};
struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks)
@@ -15,6 +15,7 @@ struct msm_mmu_funcs {
size_t len, int prot);
int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len);
void (*destroy)(struct msm_mmu *mmu);
+ void (*set_stall)(struct msm_mmu *mmu, bool enable);
};
enum msm_mmu_type {