diff mbox series

[RFC,2/3] drm/msm: Rework get_comm_cmdline() helper

Message ID 20230417201215.448099-3-robdclark@gmail.com
State New
Headers show
Series drm: Add comm/cmdline fdinfo fields | expand

Commit Message

Rob Clark April 17, 2023, 8:12 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

Make it work in terms of ctx so that it can be re-used for fdinfo.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
 drivers/gpu/drm/msm/msm_drv.c           |  2 ++
 drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
 drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
 drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
 5 files changed, 21 insertions(+), 11 deletions(-)

Comments

Tvrtko Ursulin April 18, 2023, 8:27 a.m. UTC | #1
On 17/04/2023 21:12, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
> 
> Make it work in terms of ctx so that it can be re-used for fdinfo.
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
>   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
>   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
>   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
>   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
>   5 files changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index bb38e728864d..43c4e1fea83f 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
>   		/* Ensure string is null terminated: */
>   		str[len] = '\0';
>   
> -		mutex_lock(&gpu->lock);
> +		mutex_lock(&ctx->lock);
>   
>   		if (param == MSM_PARAM_COMM) {
>   			paramp = &ctx->comm;
> @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
>   		kfree(*paramp);
>   		*paramp = str;
>   
> -		mutex_unlock(&gpu->lock);
> +		mutex_unlock(&ctx->lock);
>   
>   		return 0;
>   	}
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 3d73b98d6a9c..ca0e89e46e13 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
>   	rwlock_init(&ctx->queuelock);
>   
>   	kref_init(&ctx->ref);
> +	ctx->pid = get_pid(task_pid(current));

Would it simplify things for msm if DRM core had an up to date file->pid 
as proposed in 
https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It 
gets updated if ioctl issuer is different than fd opener and this being 
context_init here reminded me of it. Maybe you wouldn't have to track 
the pid in msm?

Regards,

Tvrtko

> +	mutex_init(&ctx->lock);
>   	msm_submitqueue_init(dev, ctx);
>   
>   	ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current);
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index c403912d13ab..f0f4f845c32d 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -327,18 +327,17 @@ find_submit(struct msm_ringbuffer *ring, uint32_t fence)
>   
>   static void retire_submits(struct msm_gpu *gpu);
>   
> -static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char **cmd)
> +static void get_comm_cmdline(struct msm_file_private *ctx, char **comm, char **cmd)
>   {
> -	struct msm_file_private *ctx = submit->queue->ctx;
>   	struct task_struct *task;
>   
> -	WARN_ON(!mutex_is_locked(&submit->gpu->lock));
> -
>   	/* Note that kstrdup will return NULL if argument is NULL: */
> +	mutex_lock(&ctx->lock);
>   	*comm = kstrdup(ctx->comm, GFP_KERNEL);
>   	*cmd  = kstrdup(ctx->cmdline, GFP_KERNEL);
> +	mutex_unlock(&ctx->lock);
>   
> -	task = get_pid_task(submit->pid, PIDTYPE_PID);
> +	task = get_pid_task(ctx->pid, PIDTYPE_PID);
>   	if (!task)
>   		return;
>   
> @@ -372,7 +371,7 @@ static void recover_worker(struct kthread_work *work)
>   		if (submit->aspace)
>   			submit->aspace->faults++;
>   
> -		get_comm_cmdline(submit, &comm, &cmd);
> +		get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
>   
>   		if (comm && cmd) {
>   			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
> @@ -460,7 +459,7 @@ static void fault_worker(struct kthread_work *work)
>   		goto resume_smmu;
>   
>   	if (submit) {
> -		get_comm_cmdline(submit, &comm, &cmd);
> +		get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
>   
>   		/*
>   		 * When we get GPU iova faults, we can get 1000s of them,
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 7a4fa1b8655b..b2023a42116b 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -377,17 +377,25 @@ struct msm_file_private {
>   	 */
>   	int sysprof;
>   
> +	/** @pid: Process that opened this file. */
> +	struct pid *pid;
> +
> +	/**
> +	 * lock: Protects comm and cmdline
> +	 */
> +	struct mutex lock;
> +
>   	/**
>   	 * comm: Overridden task comm, see MSM_PARAM_COMM
>   	 *
> -	 * Accessed under msm_gpu::lock
> +	 * Accessed under msm_file_private::lock
>   	 */
>   	char *comm;
>   
>   	/**
>   	 * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
>   	 *
> -	 * Accessed under msm_gpu::lock
> +	 * Accessed under msm_file_private::lock
>   	 */
>   	char *cmdline;
>   
> diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
> index 0e803125a325..0444ba04fa06 100644
> --- a/drivers/gpu/drm/msm/msm_submitqueue.c
> +++ b/drivers/gpu/drm/msm/msm_submitqueue.c
> @@ -61,6 +61,7 @@ void __msm_file_private_destroy(struct kref *kref)
>   	}
>   
>   	msm_gem_address_space_put(ctx->aspace);
> +	put_pid(ctx->pid);
>   	kfree(ctx->comm);
>   	kfree(ctx->cmdline);
>   	kfree(ctx);
Daniel Vetter April 18, 2023, 8:34 a.m. UTC | #2
On Tue, Apr 18, 2023 at 09:27:49AM +0100, Tvrtko Ursulin wrote:
> 
> On 17/04/2023 21:12, Rob Clark wrote:
> > From: Rob Clark <robdclark@chromium.org>
> > 
> > Make it work in terms of ctx so that it can be re-used for fdinfo.
> > 
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> >   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
> >   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
> >   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
> >   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
> >   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
> >   5 files changed, 21 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > index bb38e728864d..43c4e1fea83f 100644
> > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> >   		/* Ensure string is null terminated: */
> >   		str[len] = '\0';
> > -		mutex_lock(&gpu->lock);
> > +		mutex_lock(&ctx->lock);
> >   		if (param == MSM_PARAM_COMM) {
> >   			paramp = &ctx->comm;
> > @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> >   		kfree(*paramp);
> >   		*paramp = str;
> > -		mutex_unlock(&gpu->lock);
> > +		mutex_unlock(&ctx->lock);
> >   		return 0;
> >   	}
> > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > index 3d73b98d6a9c..ca0e89e46e13 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.c
> > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
> >   	rwlock_init(&ctx->queuelock);
> >   	kref_init(&ctx->ref);
> > +	ctx->pid = get_pid(task_pid(current));
> 
> Would it simplify things for msm if DRM core had an up to date file->pid as
> proposed in
> https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It
> gets updated if ioctl issuer is different than fd opener and this being
> context_init here reminded me of it. Maybe you wouldn't have to track the
> pid in msm?

Can we go one step further and let the drm fdinfo stuff print these new
additions? Consistency across drivers and all that.

Also for a generic trigger I think any driver ioctl is good enough (we
only really need to avoid the auth dance when you're not on a render
node).
-Daniel

> 
> Regards,
> 
> Tvrtko
> 
> > +	mutex_init(&ctx->lock);
> >   	msm_submitqueue_init(dev, ctx);
> >   	ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current);
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > index c403912d13ab..f0f4f845c32d 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > @@ -327,18 +327,17 @@ find_submit(struct msm_ringbuffer *ring, uint32_t fence)
> >   static void retire_submits(struct msm_gpu *gpu);
> > -static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char **cmd)
> > +static void get_comm_cmdline(struct msm_file_private *ctx, char **comm, char **cmd)
> >   {
> > -	struct msm_file_private *ctx = submit->queue->ctx;
> >   	struct task_struct *task;
> > -	WARN_ON(!mutex_is_locked(&submit->gpu->lock));
> > -
> >   	/* Note that kstrdup will return NULL if argument is NULL: */
> > +	mutex_lock(&ctx->lock);
> >   	*comm = kstrdup(ctx->comm, GFP_KERNEL);
> >   	*cmd  = kstrdup(ctx->cmdline, GFP_KERNEL);
> > +	mutex_unlock(&ctx->lock);
> > -	task = get_pid_task(submit->pid, PIDTYPE_PID);
> > +	task = get_pid_task(ctx->pid, PIDTYPE_PID);
> >   	if (!task)
> >   		return;
> > @@ -372,7 +371,7 @@ static void recover_worker(struct kthread_work *work)
> >   		if (submit->aspace)
> >   			submit->aspace->faults++;
> > -		get_comm_cmdline(submit, &comm, &cmd);
> > +		get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
> >   		if (comm && cmd) {
> >   			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
> > @@ -460,7 +459,7 @@ static void fault_worker(struct kthread_work *work)
> >   		goto resume_smmu;
> >   	if (submit) {
> > -		get_comm_cmdline(submit, &comm, &cmd);
> > +		get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
> >   		/*
> >   		 * When we get GPU iova faults, we can get 1000s of them,
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > index 7a4fa1b8655b..b2023a42116b 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > @@ -377,17 +377,25 @@ struct msm_file_private {
> >   	 */
> >   	int sysprof;
> > +	/** @pid: Process that opened this file. */
> > +	struct pid *pid;
> > +
> > +	/**
> > +	 * lock: Protects comm and cmdline
> > +	 */
> > +	struct mutex lock;
> > +
> >   	/**
> >   	 * comm: Overridden task comm, see MSM_PARAM_COMM
> >   	 *
> > -	 * Accessed under msm_gpu::lock
> > +	 * Accessed under msm_file_private::lock
> >   	 */
> >   	char *comm;
> >   	/**
> >   	 * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
> >   	 *
> > -	 * Accessed under msm_gpu::lock
> > +	 * Accessed under msm_file_private::lock
> >   	 */
> >   	char *cmdline;
> > diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
> > index 0e803125a325..0444ba04fa06 100644
> > --- a/drivers/gpu/drm/msm/msm_submitqueue.c
> > +++ b/drivers/gpu/drm/msm/msm_submitqueue.c
> > @@ -61,6 +61,7 @@ void __msm_file_private_destroy(struct kref *kref)
> >   	}
> >   	msm_gem_address_space_put(ctx->aspace);
> > +	put_pid(ctx->pid);
> >   	kfree(ctx->comm);
> >   	kfree(ctx->cmdline);
> >   	kfree(ctx);
Rob Clark April 18, 2023, 2:31 p.m. UTC | #3
On Tue, Apr 18, 2023 at 1:34 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Tue, Apr 18, 2023 at 09:27:49AM +0100, Tvrtko Ursulin wrote:
> >
> > On 17/04/2023 21:12, Rob Clark wrote:
> > > From: Rob Clark <robdclark@chromium.org>
> > >
> > > Make it work in terms of ctx so that it can be re-used for fdinfo.
> > >
> > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > ---
> > >   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
> > >   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
> > >   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
> > >   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
> > >   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
> > >   5 files changed, 21 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > index bb38e728864d..43c4e1fea83f 100644
> > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > >             /* Ensure string is null terminated: */
> > >             str[len] = '\0';
> > > -           mutex_lock(&gpu->lock);
> > > +           mutex_lock(&ctx->lock);
> > >             if (param == MSM_PARAM_COMM) {
> > >                     paramp = &ctx->comm;
> > > @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > >             kfree(*paramp);
> > >             *paramp = str;
> > > -           mutex_unlock(&gpu->lock);
> > > +           mutex_unlock(&ctx->lock);
> > >             return 0;
> > >     }
> > > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > > index 3d73b98d6a9c..ca0e89e46e13 100644
> > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
> > >     rwlock_init(&ctx->queuelock);
> > >     kref_init(&ctx->ref);
> > > +   ctx->pid = get_pid(task_pid(current));
> >
> > Would it simplify things for msm if DRM core had an up to date file->pid as
> > proposed in
> > https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It
> > gets updated if ioctl issuer is different than fd opener and this being
> > context_init here reminded me of it. Maybe you wouldn't have to track the
> > pid in msm?

The problem is that we also need this for gpu devcore dumps, which
could happen after the drm_file is closed.  The ctx can outlive the
file.

But the ctx->pid has the same problem as the existing file->pid when
it comes to Xorg.. hopefully over time that problem just goes away.  I
guess I could do a similar dance to your patch to update the pid
whenever (for ex) a submitqueue is created.

> Can we go one step further and let the drm fdinfo stuff print these new
> additions? Consistency across drivers and all that.

Hmm, I guess I could _also_ store the overridden comm/cmdline in
drm_file.  I still need to track it in ctx (msm_file_private) because
I could need it after the file is closed.

Maybe it could be useful to have a gl extension to let the app set a
name on the context so that this is useful beyond native-ctx (ie.
maybe it would be nice to see that "chrome: lwn.net" is using less gpu
memory than "chrome: phoronix.com", etc)

BR,
-R

> Also for a generic trigger I think any driver ioctl is good enough (we
> only really need to avoid the auth dance when you're not on a render
> node).
> -Daniel
>
> >
> > Regards,
> >
> > Tvrtko
> >
> > > +   mutex_init(&ctx->lock);
> > >     msm_submitqueue_init(dev, ctx);
> > >     ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current);
> > > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > > index c403912d13ab..f0f4f845c32d 100644
> > > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > > @@ -327,18 +327,17 @@ find_submit(struct msm_ringbuffer *ring, uint32_t fence)
> > >   static void retire_submits(struct msm_gpu *gpu);
> > > -static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char **cmd)
> > > +static void get_comm_cmdline(struct msm_file_private *ctx, char **comm, char **cmd)
> > >   {
> > > -   struct msm_file_private *ctx = submit->queue->ctx;
> > >     struct task_struct *task;
> > > -   WARN_ON(!mutex_is_locked(&submit->gpu->lock));
> > > -
> > >     /* Note that kstrdup will return NULL if argument is NULL: */
> > > +   mutex_lock(&ctx->lock);
> > >     *comm = kstrdup(ctx->comm, GFP_KERNEL);
> > >     *cmd  = kstrdup(ctx->cmdline, GFP_KERNEL);
> > > +   mutex_unlock(&ctx->lock);
> > > -   task = get_pid_task(submit->pid, PIDTYPE_PID);
> > > +   task = get_pid_task(ctx->pid, PIDTYPE_PID);
> > >     if (!task)
> > >             return;
> > > @@ -372,7 +371,7 @@ static void recover_worker(struct kthread_work *work)
> > >             if (submit->aspace)
> > >                     submit->aspace->faults++;
> > > -           get_comm_cmdline(submit, &comm, &cmd);
> > > +           get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
> > >             if (comm && cmd) {
> > >                     DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
> > > @@ -460,7 +459,7 @@ static void fault_worker(struct kthread_work *work)
> > >             goto resume_smmu;
> > >     if (submit) {
> > > -           get_comm_cmdline(submit, &comm, &cmd);
> > > +           get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
> > >             /*
> > >              * When we get GPU iova faults, we can get 1000s of them,
> > > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > > index 7a4fa1b8655b..b2023a42116b 100644
> > > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > > @@ -377,17 +377,25 @@ struct msm_file_private {
> > >      */
> > >     int sysprof;
> > > +   /** @pid: Process that opened this file. */
> > > +   struct pid *pid;
> > > +
> > > +   /**
> > > +    * lock: Protects comm and cmdline
> > > +    */
> > > +   struct mutex lock;
> > > +
> > >     /**
> > >      * comm: Overridden task comm, see MSM_PARAM_COMM
> > >      *
> > > -    * Accessed under msm_gpu::lock
> > > +    * Accessed under msm_file_private::lock
> > >      */
> > >     char *comm;
> > >     /**
> > >      * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
> > >      *
> > > -    * Accessed under msm_gpu::lock
> > > +    * Accessed under msm_file_private::lock
> > >      */
> > >     char *cmdline;
> > > diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
> > > index 0e803125a325..0444ba04fa06 100644
> > > --- a/drivers/gpu/drm/msm/msm_submitqueue.c
> > > +++ b/drivers/gpu/drm/msm/msm_submitqueue.c
> > > @@ -61,6 +61,7 @@ void __msm_file_private_destroy(struct kref *kref)
> > >     }
> > >     msm_gem_address_space_put(ctx->aspace);
> > > +   put_pid(ctx->pid);
> > >     kfree(ctx->comm);
> > >     kfree(ctx->cmdline);
> > >     kfree(ctx);
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Emil Velikov April 21, 2023, 9:33 a.m. UTC | #4
Greeting all,

Sorry for the delay - Easter Holidays, food coma and all that :-)

On Tue, 18 Apr 2023 at 15:31, Rob Clark <robdclark@gmail.com> wrote:
>
> On Tue, Apr 18, 2023 at 1:34 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Tue, Apr 18, 2023 at 09:27:49AM +0100, Tvrtko Ursulin wrote:
> > >
> > > On 17/04/2023 21:12, Rob Clark wrote:
> > > > From: Rob Clark <robdclark@chromium.org>
> > > >
> > > > Make it work in terms of ctx so that it can be re-used for fdinfo.
> > > >
> > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > ---
> > > >   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
> > > >   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
> > > >   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
> > > >   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
> > > >   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
> > > >   5 files changed, 21 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > index bb38e728864d..43c4e1fea83f 100644
> > > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > >             /* Ensure string is null terminated: */
> > > >             str[len] = '\0';
> > > > -           mutex_lock(&gpu->lock);
> > > > +           mutex_lock(&ctx->lock);
> > > >             if (param == MSM_PARAM_COMM) {
> > > >                     paramp = &ctx->comm;
> > > > @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > >             kfree(*paramp);
> > > >             *paramp = str;
> > > > -           mutex_unlock(&gpu->lock);
> > > > +           mutex_unlock(&ctx->lock);
> > > >             return 0;
> > > >     }
> > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > > > index 3d73b98d6a9c..ca0e89e46e13 100644
> > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
> > > >     rwlock_init(&ctx->queuelock);
> > > >     kref_init(&ctx->ref);
> > > > +   ctx->pid = get_pid(task_pid(current));
> > >
> > > Would it simplify things for msm if DRM core had an up to date file->pid as
> > > proposed in
> > > https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It
> > > gets updated if ioctl issuer is different than fd opener and this being
> > > context_init here reminded me of it. Maybe you wouldn't have to track the
> > > pid in msm?
>
> The problem is that we also need this for gpu devcore dumps, which
> could happen after the drm_file is closed.  The ctx can outlive the
> file.
>
I think we all kept forgetting about that. MSM had support for ages,
while AMDGPU is the second driver to land support - just a release
ago.

> But the ctx->pid has the same problem as the existing file->pid when
> it comes to Xorg.. hopefully over time that problem just goes away.

Out of curiosity: what do you mean with "when it comes to Xorg" - the
"was_master" handling or something else?

> guess I could do a similar dance to your patch to update the pid
> whenever (for ex) a submitqueue is created.
>
> > Can we go one step further and let the drm fdinfo stuff print these new
> > additions? Consistency across drivers and all that.
>
> Hmm, I guess I could _also_ store the overridden comm/cmdline in
> drm_file.  I still need to track it in ctx (msm_file_private) because
> I could need it after the file is closed.
>
> Maybe it could be useful to have a gl extension to let the app set a
> name on the context so that this is useful beyond native-ctx (ie.
> maybe it would be nice to see that "chrome: lwn.net" is using less gpu
> memory than "chrome: phoronix.com", etc)
>

/me awaits for the series to hit the respective websites ;-)

But seriously - the series from Tvrtko (thanks for the link, will
check in a moment) makes sense. Although given the livespan issue
mentioned above, I don't think it's applicable here.

So if it were me, I would consider the two orthogonal for the
short/mid term. Fwiw this and patch 1/3 are:
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>

HTH
-Emil
Rob Clark April 21, 2023, 2:47 p.m. UTC | #5
On Fri, Apr 21, 2023 at 2:33 AM Emil Velikov <emil.l.velikov@gmail.com> wrote:
>
> Greeting all,
>
> Sorry for the delay - Easter Holidays, food coma and all that :-)
>
> On Tue, 18 Apr 2023 at 15:31, Rob Clark <robdclark@gmail.com> wrote:
> >
> > On Tue, Apr 18, 2023 at 1:34 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > >
> > > On Tue, Apr 18, 2023 at 09:27:49AM +0100, Tvrtko Ursulin wrote:
> > > >
> > > > On 17/04/2023 21:12, Rob Clark wrote:
> > > > > From: Rob Clark <robdclark@chromium.org>
> > > > >
> > > > > Make it work in terms of ctx so that it can be re-used for fdinfo.
> > > > >
> > > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > > ---
> > > > >   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
> > > > >   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
> > > > >   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
> > > > >   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
> > > > >   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
> > > > >   5 files changed, 21 insertions(+), 11 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > index bb38e728864d..43c4e1fea83f 100644
> > > > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > > >             /* Ensure string is null terminated: */
> > > > >             str[len] = '\0';
> > > > > -           mutex_lock(&gpu->lock);
> > > > > +           mutex_lock(&ctx->lock);
> > > > >             if (param == MSM_PARAM_COMM) {
> > > > >                     paramp = &ctx->comm;
> > > > > @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > > >             kfree(*paramp);
> > > > >             *paramp = str;
> > > > > -           mutex_unlock(&gpu->lock);
> > > > > +           mutex_unlock(&ctx->lock);
> > > > >             return 0;
> > > > >     }
> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > > > > index 3d73b98d6a9c..ca0e89e46e13 100644
> > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
> > > > >     rwlock_init(&ctx->queuelock);
> > > > >     kref_init(&ctx->ref);
> > > > > +   ctx->pid = get_pid(task_pid(current));
> > > >
> > > > Would it simplify things for msm if DRM core had an up to date file->pid as
> > > > proposed in
> > > > https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It
> > > > gets updated if ioctl issuer is different than fd opener and this being
> > > > context_init here reminded me of it. Maybe you wouldn't have to track the
> > > > pid in msm?
> >
> > The problem is that we also need this for gpu devcore dumps, which
> > could happen after the drm_file is closed.  The ctx can outlive the
> > file.
> >
> I think we all kept forgetting about that. MSM had support for ages,
> while AMDGPU is the second driver to land support - just a release
> ago.
>
> > But the ctx->pid has the same problem as the existing file->pid when
> > it comes to Xorg.. hopefully over time that problem just goes away.
>
> Out of curiosity: what do you mean with "when it comes to Xorg" - the
> "was_master" handling or something else?

The problem is that Xorg is the one to open the drm fd, and then
passes the fd to the client.. so the pid of drm_file is the Xorg pid,
not the client.  Making it not terribly informative.

Tvrtko's patch he linked above would address that for drm_file, but
not for other driver internal usages.  Maybe it could be wired up as a
helper so that drivers don't have to re-invent that dance.  Idk, I
have to think about it.

Btw, with my WIP drm sched fence signalling patch lockdep is unhappy
when gpu devcore dumps are triggered.  I'm still pondering how to
decouple the locking so that anything coming from fs (ie.
show_fdinfo()) is decoupled from anything that happens in the fence
signaling path.  But will repost this series once I get that sorted
out.

BR,
-R

>
> > guess I could do a similar dance to your patch to update the pid
> > whenever (for ex) a submitqueue is created.
> >
> > > Can we go one step further and let the drm fdinfo stuff print these new
> > > additions? Consistency across drivers and all that.
> >
> > Hmm, I guess I could _also_ store the overridden comm/cmdline in
> > drm_file.  I still need to track it in ctx (msm_file_private) because
> > I could need it after the file is closed.
> >
> > Maybe it could be useful to have a gl extension to let the app set a
> > name on the context so that this is useful beyond native-ctx (ie.
> > maybe it would be nice to see that "chrome: lwn.net" is using less gpu
> > memory than "chrome: phoronix.com", etc)
> >
>
> /me awaits for the series to hit the respective websites ;-)
>
> But seriously - the series from Tvrtko (thanks for the link, will
> check in a moment) makes sense. Although given the livespan issue
> mentioned above, I don't think it's applicable here.
>
> So if it were me, I would consider the two orthogonal for the
> short/mid term. Fwiw this and patch 1/3 are:
> Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
>
> HTH
> -Emil
Daniel Vetter April 27, 2023, 9:39 a.m. UTC | #6
On Fri, Apr 21, 2023 at 07:47:26AM -0700, Rob Clark wrote:
> On Fri, Apr 21, 2023 at 2:33 AM Emil Velikov <emil.l.velikov@gmail.com> wrote:
> >
> > Greeting all,
> >
> > Sorry for the delay - Easter Holidays, food coma and all that :-)
> >
> > On Tue, 18 Apr 2023 at 15:31, Rob Clark <robdclark@gmail.com> wrote:
> > >
> > > On Tue, Apr 18, 2023 at 1:34 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > > >
> > > > On Tue, Apr 18, 2023 at 09:27:49AM +0100, Tvrtko Ursulin wrote:
> > > > >
> > > > > On 17/04/2023 21:12, Rob Clark wrote:
> > > > > > From: Rob Clark <robdclark@chromium.org>
> > > > > >
> > > > > > Make it work in terms of ctx so that it can be re-used for fdinfo.
> > > > > >
> > > > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > > > ---
> > > > > >   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
> > > > > >   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
> > > > > >   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
> > > > > >   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
> > > > > >   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
> > > > > >   5 files changed, 21 insertions(+), 11 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > > index bb38e728864d..43c4e1fea83f 100644
> > > > > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > > @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > > > >             /* Ensure string is null terminated: */
> > > > > >             str[len] = '\0';
> > > > > > -           mutex_lock(&gpu->lock);
> > > > > > +           mutex_lock(&ctx->lock);
> > > > > >             if (param == MSM_PARAM_COMM) {
> > > > > >                     paramp = &ctx->comm;
> > > > > > @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > > > >             kfree(*paramp);
> > > > > >             *paramp = str;
> > > > > > -           mutex_unlock(&gpu->lock);
> > > > > > +           mutex_unlock(&ctx->lock);
> > > > > >             return 0;
> > > > > >     }
> > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > index 3d73b98d6a9c..ca0e89e46e13 100644
> > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
> > > > > >     rwlock_init(&ctx->queuelock);
> > > > > >     kref_init(&ctx->ref);
> > > > > > +   ctx->pid = get_pid(task_pid(current));
> > > > >
> > > > > Would it simplify things for msm if DRM core had an up to date file->pid as
> > > > > proposed in
> > > > > https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It
> > > > > gets updated if ioctl issuer is different than fd opener and this being
> > > > > context_init here reminded me of it. Maybe you wouldn't have to track the
> > > > > pid in msm?
> > >
> > > The problem is that we also need this for gpu devcore dumps, which
> > > could happen after the drm_file is closed.  The ctx can outlive the
> > > file.
> > >
> > I think we all kept forgetting about that. MSM had support for ages,
> > while AMDGPU is the second driver to land support - just a release
> > ago.
> >
> > > But the ctx->pid has the same problem as the existing file->pid when
> > > it comes to Xorg.. hopefully over time that problem just goes away.
> >
> > Out of curiosity: what do you mean with "when it comes to Xorg" - the
> > "was_master" handling or something else?
> 
> The problem is that Xorg is the one to open the drm fd, and then
> passes the fd to the client.. so the pid of drm_file is the Xorg pid,
> not the client.  Making it not terribly informative.
> 
> Tvrtko's patch he linked above would address that for drm_file, but
> not for other driver internal usages.  Maybe it could be wired up as a
> helper so that drivers don't have to re-invent that dance.  Idk, I
> have to think about it.
> 
> Btw, with my WIP drm sched fence signalling patch lockdep is unhappy
> when gpu devcore dumps are triggered.  I'm still pondering how to
> decouple the locking so that anything coming from fs (ie.
> show_fdinfo()) is decoupled from anything that happens in the fence
> signaling path.  But will repost this series once I get that sorted
> out.

So the cleanest imo is that you push most of the capturing into a worker
that's entirely decoupled. If you have terminal context (i.e. on first
hang they stop all further cmd submission, which is anyway what
vk/arb_robustness want), then you don't have to capture at tdr time,
because there's no subsequent batch that will wreck the state.

But it only works if your gpu ctx don't have recoverable semantics.

If you can't do that it's a _lot_ of GFP_ATOMIC and trylock and bailing
out if any fails :-/
-Daniel

> 
> BR,
> -R
> 
> >
> > > guess I could do a similar dance to your patch to update the pid
> > > whenever (for ex) a submitqueue is created.
> > >
> > > > Can we go one step further and let the drm fdinfo stuff print these new
> > > > additions? Consistency across drivers and all that.
> > >
> > > Hmm, I guess I could _also_ store the overridden comm/cmdline in
> > > drm_file.  I still need to track it in ctx (msm_file_private) because
> > > I could need it after the file is closed.
> > >
> > > Maybe it could be useful to have a gl extension to let the app set a
> > > name on the context so that this is useful beyond native-ctx (ie.
> > > maybe it would be nice to see that "chrome: lwn.net" is using less gpu
> > > memory than "chrome: phoronix.com", etc)
> > >
> >
> > /me awaits for the series to hit the respective websites ;-)
> >
> > But seriously - the series from Tvrtko (thanks for the link, will
> > check in a moment) makes sense. Although given the livespan issue
> > mentioned above, I don't think it's applicable here.
> >
> > So if it were me, I would consider the two orthogonal for the
> > short/mid term. Fwiw this and patch 1/3 are:
> > Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
> >
> > HTH
> > -Emil
Rob Clark April 27, 2023, 2:31 p.m. UTC | #7
On Thu, Apr 27, 2023 at 2:39 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Fri, Apr 21, 2023 at 07:47:26AM -0700, Rob Clark wrote:
> > On Fri, Apr 21, 2023 at 2:33 AM Emil Velikov <emil.l.velikov@gmail.com> wrote:
> > >
> > > Greeting all,
> > >
> > > Sorry for the delay - Easter Holidays, food coma and all that :-)
> > >
> > > On Tue, 18 Apr 2023 at 15:31, Rob Clark <robdclark@gmail.com> wrote:
> > > >
> > > > On Tue, Apr 18, 2023 at 1:34 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > > > >
> > > > > On Tue, Apr 18, 2023 at 09:27:49AM +0100, Tvrtko Ursulin wrote:
> > > > > >
> > > > > > On 17/04/2023 21:12, Rob Clark wrote:
> > > > > > > From: Rob Clark <robdclark@chromium.org>
> > > > > > >
> > > > > > > Make it work in terms of ctx so that it can be re-used for fdinfo.
> > > > > > >
> > > > > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > > > > ---
> > > > > > >   drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 ++--
> > > > > > >   drivers/gpu/drm/msm/msm_drv.c           |  2 ++
> > > > > > >   drivers/gpu/drm/msm/msm_gpu.c           | 13 ++++++-------
> > > > > > >   drivers/gpu/drm/msm/msm_gpu.h           | 12 ++++++++++--
> > > > > > >   drivers/gpu/drm/msm/msm_submitqueue.c   |  1 +
> > > > > > >   5 files changed, 21 insertions(+), 11 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > > > index bb38e728864d..43c4e1fea83f 100644
> > > > > > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > > > > > @@ -412,7 +412,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > > > > >             /* Ensure string is null terminated: */
> > > > > > >             str[len] = '\0';
> > > > > > > -           mutex_lock(&gpu->lock);
> > > > > > > +           mutex_lock(&ctx->lock);
> > > > > > >             if (param == MSM_PARAM_COMM) {
> > > > > > >                     paramp = &ctx->comm;
> > > > > > > @@ -423,7 +423,7 @@ int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
> > > > > > >             kfree(*paramp);
> > > > > > >             *paramp = str;
> > > > > > > -           mutex_unlock(&gpu->lock);
> > > > > > > +           mutex_unlock(&ctx->lock);
> > > > > > >             return 0;
> > > > > > >     }
> > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > index 3d73b98d6a9c..ca0e89e46e13 100644
> > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > @@ -581,6 +581,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
> > > > > > >     rwlock_init(&ctx->queuelock);
> > > > > > >     kref_init(&ctx->ref);
> > > > > > > +   ctx->pid = get_pid(task_pid(current));
> > > > > >
> > > > > > Would it simplify things for msm if DRM core had an up to date file->pid as
> > > > > > proposed in
> > > > > > https://patchwork.freedesktop.org/patch/526752/?series=109902&rev=4 ? It
> > > > > > gets updated if ioctl issuer is different than fd opener and this being
> > > > > > context_init here reminded me of it. Maybe you wouldn't have to track the
> > > > > > pid in msm?
> > > >
> > > > The problem is that we also need this for gpu devcore dumps, which
> > > > could happen after the drm_file is closed.  The ctx can outlive the
> > > > file.
> > > >
> > > I think we all kept forgetting about that. MSM had support for ages,
> > > while AMDGPU is the second driver to land support - just a release
> > > ago.
> > >
> > > > But the ctx->pid has the same problem as the existing file->pid when
> > > > it comes to Xorg.. hopefully over time that problem just goes away.
> > >
> > > Out of curiosity: what do you mean with "when it comes to Xorg" - the
> > > "was_master" handling or something else?
> >
> > The problem is that Xorg is the one to open the drm fd, and then
> > passes the fd to the client.. so the pid of drm_file is the Xorg pid,
> > not the client.  Making it not terribly informative.
> >
> > Tvrtko's patch he linked above would address that for drm_file, but
> > not for other driver internal usages.  Maybe it could be wired up as a
> > helper so that drivers don't have to re-invent that dance.  Idk, I
> > have to think about it.
> >
> > Btw, with my WIP drm sched fence signalling patch lockdep is unhappy
> > when gpu devcore dumps are triggered.  I'm still pondering how to
> > decouple the locking so that anything coming from fs (ie.
> > show_fdinfo()) is decoupled from anything that happens in the fence
> > signaling path.  But will repost this series once I get that sorted
> > out.
>
> So the cleanest imo is that you push most of the capturing into a worker
> that's entirely decoupled. If you have terminal context (i.e. on first
> hang they stop all further cmd submission, which is anyway what
> vk/arb_robustness want), then you don't have to capture at tdr time,
> because there's no subsequent batch that will wreck the state.

It is already in a worker, but we (a) need to block other contexts
from submitting while at the same time (b) using the GPU itself to
capture its state.. (yes, the way the hw works is overly complicated
in this regard)

> But it only works if your gpu ctx don't have recoverable semantics.

We do have recoverable semantics.. but that is pretty orthogonal.  We
just need a different lock.. I have a plan to move (a copy) of the
override strings to drm_file with it's own locking decoupled from what
we need in the recovery path.. and hopefully will finally have time to
type it up today and post it (just before disappearing off into the
woods to go backpacking ;-))

BR,
-R

> If you can't do that it's a _lot_ of GFP_ATOMIC and trylock and bailing
> out if any fails :-/
> -Daniel
>
> >
> > BR,
> > -R
> >
> > >
> > > > guess I could do a similar dance to your patch to update the pid
> > > > whenever (for ex) a submitqueue is created.
> > > >
> > > > > Can we go one step further and let the drm fdinfo stuff print these new
> > > > > additions? Consistency across drivers and all that.
> > > >
> > > > Hmm, I guess I could _also_ store the overridden comm/cmdline in
> > > > drm_file.  I still need to track it in ctx (msm_file_private) because
> > > > I could need it after the file is closed.
> > > >
> > > > Maybe it could be useful to have a gl extension to let the app set a
> > > > name on the context so that this is useful beyond native-ctx (ie.
> > > > maybe it would be nice to see that "chrome: lwn.net" is using less gpu
> > > > memory than "chrome: phoronix.com", etc)
> > > >
> > >
> > > /me awaits for the series to hit the respective websites ;-)
> > >
> > > But seriously - the series from Tvrtko (thanks for the link, will
> > > check in a moment) makes sense. Although given the livespan issue
> > > mentioned above, I don't think it's applicable here.
> > >
> > > So if it were me, I would consider the two orthogonal for the
> > > short/mid term. Fwiw this and patch 1/3 are:
> > > Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
> > >
> > > HTH
> > > -Emil
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index bb38e728864d..43c4e1fea83f 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -412,7 +412,7 @@  int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 		/* Ensure string is null terminated: */
 		str[len] = '\0';
 
-		mutex_lock(&gpu->lock);
+		mutex_lock(&ctx->lock);
 
 		if (param == MSM_PARAM_COMM) {
 			paramp = &ctx->comm;
@@ -423,7 +423,7 @@  int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 		kfree(*paramp);
 		*paramp = str;
 
-		mutex_unlock(&gpu->lock);
+		mutex_unlock(&ctx->lock);
 
 		return 0;
 	}
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 3d73b98d6a9c..ca0e89e46e13 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -581,6 +581,8 @@  static int context_init(struct drm_device *dev, struct drm_file *file)
 	rwlock_init(&ctx->queuelock);
 
 	kref_init(&ctx->ref);
+	ctx->pid = get_pid(task_pid(current));
+	mutex_init(&ctx->lock);
 	msm_submitqueue_init(dev, ctx);
 
 	ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index c403912d13ab..f0f4f845c32d 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -327,18 +327,17 @@  find_submit(struct msm_ringbuffer *ring, uint32_t fence)
 
 static void retire_submits(struct msm_gpu *gpu);
 
-static void get_comm_cmdline(struct msm_gem_submit *submit, char **comm, char **cmd)
+static void get_comm_cmdline(struct msm_file_private *ctx, char **comm, char **cmd)
 {
-	struct msm_file_private *ctx = submit->queue->ctx;
 	struct task_struct *task;
 
-	WARN_ON(!mutex_is_locked(&submit->gpu->lock));
-
 	/* Note that kstrdup will return NULL if argument is NULL: */
+	mutex_lock(&ctx->lock);
 	*comm = kstrdup(ctx->comm, GFP_KERNEL);
 	*cmd  = kstrdup(ctx->cmdline, GFP_KERNEL);
+	mutex_unlock(&ctx->lock);
 
-	task = get_pid_task(submit->pid, PIDTYPE_PID);
+	task = get_pid_task(ctx->pid, PIDTYPE_PID);
 	if (!task)
 		return;
 
@@ -372,7 +371,7 @@  static void recover_worker(struct kthread_work *work)
 		if (submit->aspace)
 			submit->aspace->faults++;
 
-		get_comm_cmdline(submit, &comm, &cmd);
+		get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
 
 		if (comm && cmd) {
 			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
@@ -460,7 +459,7 @@  static void fault_worker(struct kthread_work *work)
 		goto resume_smmu;
 
 	if (submit) {
-		get_comm_cmdline(submit, &comm, &cmd);
+		get_comm_cmdline(submit->queue->ctx, &comm, &cmd);
 
 		/*
 		 * When we get GPU iova faults, we can get 1000s of them,
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 7a4fa1b8655b..b2023a42116b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -377,17 +377,25 @@  struct msm_file_private {
 	 */
 	int sysprof;
 
+	/** @pid: Process that opened this file. */
+	struct pid *pid;
+
+	/**
+	 * lock: Protects comm and cmdline
+	 */
+	struct mutex lock;
+
 	/**
 	 * comm: Overridden task comm, see MSM_PARAM_COMM
 	 *
-	 * Accessed under msm_gpu::lock
+	 * Accessed under msm_file_private::lock
 	 */
 	char *comm;
 
 	/**
 	 * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
 	 *
-	 * Accessed under msm_gpu::lock
+	 * Accessed under msm_file_private::lock
 	 */
 	char *cmdline;
 
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 0e803125a325..0444ba04fa06 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -61,6 +61,7 @@  void __msm_file_private_destroy(struct kref *kref)
 	}
 
 	msm_gem_address_space_put(ctx->aspace);
+	put_pid(ctx->pid);
 	kfree(ctx->comm);
 	kfree(ctx->cmdline);
 	kfree(ctx);