Message ID | 20250319145425.51935-15-robdclark@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | drm/msm: sparse / "VM_BIND" support | expand |
On 3/19/2025 8:22 PM, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org> > > In the next commit, a way for userspace to opt-in to userspace managed > VM is added. For this to work, we need to defer creation of the VM > until it is needed. > > Signed-off-by: Rob Clark <robdclark@chromium.org> > --- > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 3 ++- > drivers/gpu/drm/msm/adreno/adreno_gpu.c | 14 +++++++----- > drivers/gpu/drm/msm/msm_drv.c | 29 ++++++++++++++++++++----- > drivers/gpu/drm/msm/msm_gem_submit.c | 2 +- > drivers/gpu/drm/msm/msm_gpu.h | 9 +++++++- > 5 files changed, 43 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index 4811be5a7c29..0b1e2ba3539e 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -112,6 +112,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, > { > bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; > struct msm_context *ctx = submit->queue->ctx; > + struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); > struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > phys_addr_t ttbr; > u32 asid; > @@ -120,7 +121,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, > if (ctx->seqno == ring->cur_ctx_seqno) > return; > > - if (msm_iommu_pagetable_params(to_msm_vm(ctx->vm)->mmu, &ttbr, &asid)) > + if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) > return; > > if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > index 0f71703f6ec7..e4d895dda051 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > @@ -351,6 +351,8 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > struct drm_device *drm = gpu->dev; > + /* Note ctx can be NULL when called from rd_open(): */ > + struct drm_gpuvm *vm = ctx ? msm_context_vm(drm, ctx) : NULL; > > /* No pointer params yet */ > if (*len != 0) > @@ -396,8 +398,8 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, > *value = 0; > return 0; > case MSM_PARAM_FAULTS: > - if (ctx->vm) > - *value = gpu->global_faults + to_msm_vm(ctx->vm)->faults; > + if (vm) > + *value = gpu->global_faults + to_msm_vm(vm)->faults; > else > *value = gpu->global_faults; > return 0; > @@ -405,14 +407,14 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, > *value = gpu->suspend_count; > return 0; > case MSM_PARAM_VA_START: > - if (ctx->vm == gpu->vm) > + if (vm == gpu->vm) > return UERR(EINVAL, drm, "requires per-process pgtables"); > - *value = ctx->vm->mm_start; > + *value = vm->mm_start; > return 0; > case MSM_PARAM_VA_SIZE: > - if (ctx->vm == gpu->vm) > + if (vm == gpu->vm) > return UERR(EINVAL, drm, "requires per-process pgtables"); > - *value = ctx->vm->mm_range; > + *value = vm->mm_range; > return 0; > case MSM_PARAM_HIGHEST_BANK_BIT: > *value = adreno_gpu->ubwc_config.highest_bank_bit; > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c > index 6ef29bc48bb0..6fd981ee6aee 100644 > --- a/drivers/gpu/drm/msm/msm_drv.c > +++ b/drivers/gpu/drm/msm/msm_drv.c > @@ -214,10 +214,29 @@ static void load_gpu(struct drm_device *dev) > mutex_unlock(&init_lock); > } > > +/** > + * msm_context_vm - lazily create the context's VM > + * > + * @dev: the drm device > + * @ctx: the context > + * > + * The VM is lazily created, so that userspace has a chance to opt-in to having > + * a userspace managed VM before the VM is created. > + * > + * Note that this does not return a reference to the VM. Once the VM is created, > + * it exists for the lifetime of the context. > + */ > +struct drm_gpuvm *msm_context_vm(struct drm_device *dev, struct msm_context *ctx) > +{ > + struct msm_drm_private *priv = dev->dev_private; > + if (!ctx->vm) hmm. This is racy and it is in a userspace accessible path! -Akhil > + ctx->vm = msm_gpu_create_private_vm(priv->gpu, current); > + return ctx->vm; > +} > + > static int context_init(struct drm_device *dev, struct drm_file *file) > { > static atomic_t ident = ATOMIC_INIT(0); > - struct msm_drm_private *priv = dev->dev_private; > struct msm_context *ctx; > > ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); > @@ -230,7 +249,6 @@ static int context_init(struct drm_device *dev, struct drm_file *file) > kref_init(&ctx->ref); > msm_submitqueue_init(dev, ctx); > > - ctx->vm = msm_gpu_create_private_vm(priv->gpu, current); > file->driver_priv = ctx; > > ctx->seqno = atomic_inc_return(&ident); > @@ -408,7 +426,7 @@ static int msm_ioctl_gem_info_iova(struct drm_device *dev, > * Don't pin the memory here - just get an address so that userspace can > * be productive > */ > - return msm_gem_get_iova(obj, ctx->vm, iova); > + return msm_gem_get_iova(obj, msm_context_vm(dev, ctx), iova); > } > > static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, > @@ -417,18 +435,19 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, > { > struct msm_drm_private *priv = dev->dev_private; > struct msm_context *ctx = file->driver_priv; > + struct drm_gpuvm *vm = msm_context_vm(dev, ctx); > > if (!priv->gpu) > return -EINVAL; > > /* Only supported if per-process address space is supported: */ > - if (priv->gpu->vm == ctx->vm) > + if (priv->gpu->vm == vm) > return UERR(EOPNOTSUPP, dev, "requires per-process pgtables"); > > if (should_fail(&fail_gem_iova, obj->size)) > return -ENOMEM; > > - return msm_gem_set_iova(obj, ctx->vm, iova); > + return msm_gem_set_iova(obj, vm, iova); > } > > static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj, > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c > index c65f3a6a5256..9731ad7993cf 100644 > --- a/drivers/gpu/drm/msm/msm_gem_submit.c > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c > @@ -63,7 +63,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, > > kref_init(&submit->ref); > submit->dev = dev; > - submit->vm = queue->ctx->vm; > + submit->vm = msm_context_vm(dev, queue->ctx); > submit->gpu = gpu; > submit->cmd = (void *)&submit->bos[nr_bos]; > submit->queue = queue; > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h > index d8425e6d7f5a..c15aad288552 100644 > --- a/drivers/gpu/drm/msm/msm_gpu.h > +++ b/drivers/gpu/drm/msm/msm_gpu.h > @@ -362,7 +362,12 @@ struct msm_context { > */ > int queueid; > > - /** @vm: the per-process GPU address-space */ > + /** > + * @vm: > + * > + * The per-process GPU address-space. Do not access directly, use > + * msm_context_vm(). > + */ > struct drm_gpuvm *vm; > > /** @kref: the reference count */ > @@ -447,6 +452,8 @@ struct msm_context { > atomic64_t ctx_mem; > }; > > +struct drm_gpuvm *msm_context_vm(struct drm_device *dev, struct msm_context *ctx); > + > /** > * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority > *
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 4811be5a7c29..0b1e2ba3539e 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -112,6 +112,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, { bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; struct msm_context *ctx = submit->queue->ctx; + struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; phys_addr_t ttbr; u32 asid; @@ -120,7 +121,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, if (ctx->seqno == ring->cur_ctx_seqno) return; - if (msm_iommu_pagetable_params(to_msm_vm(ctx->vm)->mmu, &ttbr, &asid)) + if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) return; if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 0f71703f6ec7..e4d895dda051 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -351,6 +351,8 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct drm_device *drm = gpu->dev; + /* Note ctx can be NULL when called from rd_open(): */ + struct drm_gpuvm *vm = ctx ? msm_context_vm(drm, ctx) : NULL; /* No pointer params yet */ if (*len != 0) @@ -396,8 +398,8 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = 0; return 0; case MSM_PARAM_FAULTS: - if (ctx->vm) - *value = gpu->global_faults + to_msm_vm(ctx->vm)->faults; + if (vm) + *value = gpu->global_faults + to_msm_vm(vm)->faults; else *value = gpu->global_faults; return 0; @@ -405,14 +407,14 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = gpu->suspend_count; return 0; case MSM_PARAM_VA_START: - if (ctx->vm == gpu->vm) + if (vm == gpu->vm) return UERR(EINVAL, drm, "requires per-process pgtables"); - *value = ctx->vm->mm_start; + *value = vm->mm_start; return 0; case MSM_PARAM_VA_SIZE: - if (ctx->vm == gpu->vm) + if (vm == gpu->vm) return UERR(EINVAL, drm, "requires per-process pgtables"); - *value = ctx->vm->mm_range; + *value = vm->mm_range; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: *value = adreno_gpu->ubwc_config.highest_bank_bit; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 6ef29bc48bb0..6fd981ee6aee 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -214,10 +214,29 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } +/** + * msm_context_vm - lazily create the context's VM + * + * @dev: the drm device + * @ctx: the context + * + * The VM is lazily created, so that userspace has a chance to opt-in to having + * a userspace managed VM before the VM is created. + * + * Note that this does not return a reference to the VM. Once the VM is created, + * it exists for the lifetime of the context. + */ +struct drm_gpuvm *msm_context_vm(struct drm_device *dev, struct msm_context *ctx) +{ + struct msm_drm_private *priv = dev->dev_private; + if (!ctx->vm) + ctx->vm = msm_gpu_create_private_vm(priv->gpu, current); + return ctx->vm; +} + static int context_init(struct drm_device *dev, struct drm_file *file) { static atomic_t ident = ATOMIC_INIT(0); - struct msm_drm_private *priv = dev->dev_private; struct msm_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -230,7 +249,6 @@ static int context_init(struct drm_device *dev, struct drm_file *file) kref_init(&ctx->ref); msm_submitqueue_init(dev, ctx); - ctx->vm = msm_gpu_create_private_vm(priv->gpu, current); file->driver_priv = ctx; ctx->seqno = atomic_inc_return(&ident); @@ -408,7 +426,7 @@ static int msm_ioctl_gem_info_iova(struct drm_device *dev, * Don't pin the memory here - just get an address so that userspace can * be productive */ - return msm_gem_get_iova(obj, ctx->vm, iova); + return msm_gem_get_iova(obj, msm_context_vm(dev, ctx), iova); } static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, @@ -417,18 +435,19 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, { struct msm_drm_private *priv = dev->dev_private; struct msm_context *ctx = file->driver_priv; + struct drm_gpuvm *vm = msm_context_vm(dev, ctx); if (!priv->gpu) return -EINVAL; /* Only supported if per-process address space is supported: */ - if (priv->gpu->vm == ctx->vm) + if (priv->gpu->vm == vm) return UERR(EOPNOTSUPP, dev, "requires per-process pgtables"); if (should_fail(&fail_gem_iova, obj->size)) return -ENOMEM; - return msm_gem_set_iova(obj, ctx->vm, iova); + return msm_gem_set_iova(obj, vm, iova); } static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index c65f3a6a5256..9731ad7993cf 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -63,7 +63,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, kref_init(&submit->ref); submit->dev = dev; - submit->vm = queue->ctx->vm; + submit->vm = msm_context_vm(dev, queue->ctx); submit->gpu = gpu; submit->cmd = (void *)&submit->bos[nr_bos]; submit->queue = queue; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index d8425e6d7f5a..c15aad288552 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -362,7 +362,12 @@ struct msm_context { */ int queueid; - /** @vm: the per-process GPU address-space */ + /** + * @vm: + * + * The per-process GPU address-space. Do not access directly, use + * msm_context_vm(). + */ struct drm_gpuvm *vm; /** @kref: the reference count */ @@ -447,6 +452,8 @@ struct msm_context { atomic64_t ctx_mem; }; +struct drm_gpuvm *msm_context_vm(struct drm_device *dev, struct msm_context *ctx); + /** * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority *