Message ID | 20240426-a750-raytracing-v2-4-562ac9866d63@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | drm/msm: Support a750 "software fuse" for raytracing | expand |
On 26.04.2024 8:34 PM, Connor Abbott wrote: > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > initialize cx_mem. Copy this from downstream (minus BCL which we > currently don't support). On a750, this includes a new "fuse" register > which can be used by qcom_scm to fuse off certain features like > raytracing in software. The fuse is default off, and is initialized by > calling the method. Afterwards we have to read it to find out which > features were enabled. > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > --- [...] > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > +{ > + u32 status; > + > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > + > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > + > + /* Ignore FASTBLEND violations, because the HW will silently fall back > + * to legacy blending. /* * foo > + */ > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > + del_timer(&gpu->hangcheck_timer); > + > + kthread_queue_work(gpu->worker, &gpu->recover_work); > + } > +} > + > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > { > struct msm_drm_private *priv = gpu->dev->dev_private; > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) Does this field actualy exist on a6 too? > + a7xx_sw_fuse_violation_irq(gpu); > + > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > msm_gpu_retire(gpu); > > @@ -2525,6 +2550,59 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > } > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > +{ > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > + struct msm_gpu *gpu = &adreno_gpu->base; > + u32 fuse_val; > + int ret = 0; > + > + if (adreno_is_a750(adreno_gpu)) { > + /* Assume that if qcom scm isn't available, that whatever > + * replacement allows writing the fuse register ourselves. > + * Users of alternative firmware need to make sure this > + * register is writeable or indicate that it's not somehow. > + * Print a warning because if you mess this up you're about to > + * crash horribly. > + */ > + if (!qcom_scm_is_available()) { > + dev_warn_once(gpu->dev->dev, > + "SCM is not available, poking fuse register\n"); > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > + adreno_gpu->has_ray_tracing = true; I'm not 100% sure. I'm afraid there may be SKUs with RT cores fused off (as in, cut off from the rest, not "indicated unavailable") or otherwise dysfunctional.. My guess would be that TZ probably has some sort of a LUT/match table based on other SoC identifiers > + return 0; > + } > + > + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | > + QCOM_SCM_GPU_TSENSE_EN_REQ); > + if (ret) > + return ret; > + > + /* On a750 raytracing may be disabled by the firmware, find out whether > + * that's the case. The scm call above sets the fuse register. > + */ > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > + adreno_gpu->has_ray_tracing = > + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); > + } else { > + if (adreno_is_a740(adreno_gpu)) { > + /* Raytracing is always enabled on a740 */ > + adreno_gpu->has_ray_tracing = true; > + } > + > + if (!qcom_scm_is_available()) > + return 0; > + > + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ); > + } > + > + return ret; if (qcom_scm_is_available()) return qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ); } return 0; ? Konrad
On Sat, Apr 27, 2024 at 1:19 PM Konrad Dybcio <konrad.dybcio@linaro.org> wrote: > > On 26.04.2024 8:34 PM, Connor Abbott wrote: > > On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to > > initialize cx_mem. Copy this from downstream (minus BCL which we > > currently don't support). On a750, this includes a new "fuse" register > > which can be used by qcom_scm to fuse off certain features like > > raytracing in software. The fuse is default off, and is initialized by > > calling the method. Afterwards we have to read it to find out which > > features were enabled. > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > --- > > [...] > > > +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) > > +{ > > + u32 status; > > + > > + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); > > + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); > > + > > + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); > > + > > + /* Ignore FASTBLEND violations, because the HW will silently fall back > > + * to legacy blending. > > /* > * foo > > > > > + */ > > + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { > > + del_timer(&gpu->hangcheck_timer); > > + > > + kthread_queue_work(gpu->worker, &gpu->recover_work); > > + } > > +} > > + > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > { > > struct msm_drm_private *priv = gpu->dev->dev_private; > > @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > > if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) > > dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); > > > > + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) > > Does this field actualy exist on a6 too? No, it doesn't. It's correctly marked as an a7xx-only field in the XML, and we only enable the mask on a7xx so we will never call a7xx_sw_fuse_violation_irq() on a6xx, but the XML parser still puts the A6XX_ prefix here. > > > + a7xx_sw_fuse_violation_irq(gpu); > > + > > if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) > > msm_gpu_retire(gpu); > > > > @@ -2525,6 +2550,59 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, > > a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); > > } > > > > +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) > > +{ > > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > + struct msm_gpu *gpu = &adreno_gpu->base; > > + u32 fuse_val; > > + int ret = 0; > > + > > + if (adreno_is_a750(adreno_gpu)) { > > + /* Assume that if qcom scm isn't available, that whatever > > + * replacement allows writing the fuse register ourselves. > > + * Users of alternative firmware need to make sure this > > + * register is writeable or indicate that it's not somehow. > > + * Print a warning because if you mess this up you're about to > > + * crash horribly. > > + */ > > + if (!qcom_scm_is_available()) { > > + dev_warn_once(gpu->dev->dev, > > + "SCM is not available, poking fuse register\n"); > > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > > + adreno_gpu->has_ray_tracing = true; > > I'm not 100% sure. I'm afraid there may be SKUs with RT cores fused > off (as in, cut off from the rest, not "indicated unavailable") or > otherwise dysfunctional.. > > My guess would be that TZ probably has some sort of a LUT/match table > based on other SoC identifiers I don't think so. The entire point of this register AFAIK is to make it possible for the firmware to block access to features, not to describe the HW. If they want to fuse something off in the HW, as they have done before, then they will typically give the GPU a different chip_id. If they do happen to use this register in that way, which I think is unlikely, and someone ships this on a platform like Chromebooks without qcom_scm and with the RTU fused off, then it's on them to come up with an alternative way to describe it. There's nothing more we can do here without further information. > > > + return 0; > > + } > > + > > + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | > > + QCOM_SCM_GPU_TSENSE_EN_REQ); > > + if (ret) > > + return ret; > > + > > + /* On a750 raytracing may be disabled by the firmware, find out whether > > + * that's the case. The scm call above sets the fuse register. > > + */ > > + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); > > + adreno_gpu->has_ray_tracing = > > + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); > > + } else { > > + if (adreno_is_a740(adreno_gpu)) { > > + /* Raytracing is always enabled on a740 */ > > + adreno_gpu->has_ray_tracing = true; > > + } > > + > > + if (!qcom_scm_is_available()) > > + return 0; > > + > > + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ); > > + } > > + > > + return ret; > > if (qcom_scm_is_available()) > return qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ); Ok. > } > > return 0; > > ? > > Konrad
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index cf0b1de1c071..4a3b12b20802 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/devfreq.h> +#include <linux/firmware/qcom/qcom_scm.h> #include <linux/pm_domain.h> #include <linux/soc/qcom/llcc-qcom.h> @@ -1686,7 +1687,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ A6XX_CP_APRIV_CNTL_RBFETCH | \ @@ -2356,6 +2358,26 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) kthread_queue_work(gpu->worker, &gpu->recover_work); } +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) +{ + u32 status; + + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); + + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); + + /* Ignore FASTBLEND violations, because the HW will silently fall back + * to legacy blending. + */ + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { + del_timer(&gpu->hangcheck_timer); + + kthread_queue_work(gpu->worker, &gpu->recover_work); + } +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; @@ -2384,6 +2406,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) + a7xx_sw_fuse_violation_irq(gpu); + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) msm_gpu_retire(gpu); @@ -2525,6 +2550,59 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); } +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + u32 fuse_val; + int ret = 0; + + if (adreno_is_a750(adreno_gpu)) { + /* Assume that if qcom scm isn't available, that whatever + * replacement allows writing the fuse register ourselves. + * Users of alternative firmware need to make sure this + * register is writeable or indicate that it's not somehow. + * Print a warning because if you mess this up you're about to + * crash horribly. + */ + if (!qcom_scm_is_available()) { + dev_warn_once(gpu->dev->dev, + "SCM is not available, poking fuse register\n"); + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); + adreno_gpu->has_ray_tracing = true; + return 0; + } + + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | + QCOM_SCM_GPU_TSENSE_EN_REQ); + if (ret) + return ret; + + /* On a750 raytracing may be disabled by the firmware, find out whether + * that's the case. The scm call above sets the fuse register. + */ + fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); + adreno_gpu->has_ray_tracing = + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); + } else { + if (adreno_is_a740(adreno_gpu)) { + /* Raytracing is always enabled on a740 */ + adreno_gpu->has_ray_tracing = true; + } + + if (!qcom_scm_is_available()) + return 0; + + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ); + } + + return ret; +} + + #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASK BIT(1) #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) @@ -3094,6 +3172,14 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } + if (adreno_is_a7xx(adreno_gpu)) { + ret = a7xx_cx_mem_init(a6xx_gpu); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + } + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 77526892eb8c..4180f3149dd8 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -182,6 +182,8 @@ struct adreno_gpu { */ const unsigned int *reg_offsets; bool gmu_is_wrapper; + + bool has_ray_tracing; }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base)
On all Qualcomm platforms with a7xx GPUs, qcom_scm provides a method to initialize cx_mem. Copy this from downstream (minus BCL which we currently don't support). On a750, this includes a new "fuse" register which can be used by qcom_scm to fuse off certain features like raytracing in software. The fuse is default off, and is initialized by calling the method. Afterwards we have to read it to find out which features were enabled. Signed-off-by: Connor Abbott <cwabbott0@gmail.com> --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 88 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + 2 files changed, 89 insertions(+), 1 deletion(-)