diff mbox series

[3/4] drm/msm: Add SYSPROF param

Message ID 20220303194758.710358-4-robdclark@gmail.com
State New
Headers show
Series drm/msm: Clear perf counters across context switch | expand

Commit Message

Rob Clark March 3, 2022, 7:46 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

Add a SYSPROF param for system profiling tools like Mesa's pps-producer
(perfetto) to control behavior related to system-wide performance
counter collection.  In particular, for profiling, one wants to ensure
that GPU context switches do not effect perfcounter state, and might
want to suppress suspend (which would cause counters to lose state).

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 +++
 drivers/gpu/drm/msm/msm_drv.c           |  8 ++++++
 drivers/gpu/drm/msm/msm_gpu.h           | 27 ++++++++++++++++++++
 drivers/gpu/drm/msm/msm_submitqueue.c   | 34 +++++++++++++++++++++++++
 include/uapi/drm/msm_drm.h              |  1 +
 5 files changed, 74 insertions(+)

Comments

Rob Clark March 3, 2022, 9:47 p.m. UTC | #1
On Thu, Mar 3, 2022 at 1:17 PM Rob Clark <robdclark@gmail.com> wrote:
>
> On Thu, Mar 3, 2022 at 12:47 PM Stephen Boyd <swboyd@chromium.org> wrote:
> >
> > Quoting Rob Clark (2022-03-03 11:46:47)
> > > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > > index fde9a29f884e..0ba1dbd4e50f 100644
> > > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > > @@ -330,6 +337,24 @@ struct msm_file_private {
> > >         struct kref ref;
> > >         int seqno;
> > >
> > > +       /**
> > > +        * sysprof:
> > > +        *
> > > +        * The value of MSM_PARAM_SYSPROF set by userspace.  This is
> > > +        * intended to be used by system profiling tools like Mesa's
> > > +        * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
> > > +        *
> > > +        * Setting a value of 1 will preserve performance counters across
> > > +        * context switches.  Setting a value of 2 will in addition
> > > +        * suppress suspend.  (Performance counters loose  state across
> >
> > s/loose  /lose/
>
> fixed locally
>
> > > +        * power collapse, which is undesirable for profiling in some
> > > +        * cases.)
> > > +        *
> > > +        * The value automatically reverts to zero when the drm device
> > > +        * file is closed.
> > > +        */
> > > +       int sysprof;
> > > +
> > >         /**
> > >          * elapsed:
> > >          *
> > > diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
> > > index 7cb158bcbcf6..4179db54ac93 100644
> > > --- a/drivers/gpu/drm/msm/msm_submitqueue.c
> > > +++ b/drivers/gpu/drm/msm/msm_submitqueue.c
> > > @@ -7,6 +7,40 @@
> > >
> > >  #include "msm_gpu.h"
> > >
> > > +int msm_file_private_set_sysprof(struct msm_file_private *ctx,
> > > +                                struct msm_gpu *gpu, int sysprof)
> > > +{
> > > +       /* unwind old value first: */
> > > +       switch (ctx->sysprof) {
> > > +       case 2:
> > > +               pm_runtime_put_autosuspend(&gpu->pdev->dev);
> > > +               fallthrough;
> > > +       case 1:
> > > +               refcount_dec(&gpu->sysprof_active);
> > > +               fallthrough;
> > > +       case 0:
> > > +               break;
> > > +       }
> > > +
> > > +       /* then apply new value: */
> >
> > It would be safer to swap this. Otherwise a set when the values are at
> > "1" would drop to "zero" here and potentially trigger some glitch,
> > whereas incrementing one more time and then dropping the previous state
> > would avoid that short blip.
> >
> > > +       switch (sysprof) {
> > > +       default:
> > > +               return -EINVAL;
> >
> > This will become more complicated though.
>
> Right, that is why I took the "unwind first and then re-apply"
> approach.. in practice I expect userspace to set the value before it
> starts sampling counter values, so I wasn't too concerned about this
> racing with a submit and clearing the counters.  (Plus any glitch if
> userspace did decide to change it dynamically would just be transient
> and not really a big deal.)

Actually I could just swap the two switch's.. the result would be that
an EINVAL would not change the state instead of dropping the state to
zero.  Maybe that is better anyways

BR,
-R


> BR,
> -R
>
> > > +       case 2:
> > > +               pm_runtime_get_sync(&gpu->pdev->dev);
> > > +               fallthrough;
> > > +       case 1:
> > > +               refcount_inc(&gpu->sysprof_active);
> > > +               fallthrough;
> > > +       case 0:
> > > +               break;
> > > +       }
> > > +
> > > +       ctx->sysprof = sysprof;
> > > +
> > > +       return 0;
> > > +}
Stephen Boyd March 3, 2022, 10:36 p.m. UTC | #2
Quoting Rob Clark (2022-03-03 13:47:14)
> On Thu, Mar 3, 2022 at 1:17 PM Rob Clark <robdclark@gmail.com> wrote:
> >
> > On Thu, Mar 3, 2022 at 12:47 PM Stephen Boyd <swboyd@chromium.org> wrote:
> > >
> > > Quoting Rob Clark (2022-03-03 11:46:47)
> > > > +
> > > > +       /* then apply new value: */
> > >
> > > It would be safer to swap this. Otherwise a set when the values are at
> > > "1" would drop to "zero" here and potentially trigger some glitch,
> > > whereas incrementing one more time and then dropping the previous state
> > > would avoid that short blip.
> > >
> > > > +       switch (sysprof) {
> > > > +       default:
> > > > +               return -EINVAL;
> > >
> > > This will become more complicated though.
> >
> > Right, that is why I took the "unwind first and then re-apply"
> > approach.. in practice I expect userspace to set the value before it
> > starts sampling counter values, so I wasn't too concerned about this
> > racing with a submit and clearing the counters.  (Plus any glitch if
> > userspace did decide to change it dynamically would just be transient
> > and not really a big deal.)
>
> Actually I could just swap the two switch's.. the result would be that
> an EINVAL would not change the state instead of dropping the state to
> zero.  Maybe that is better anyways
>

Yeah it isn't clear to me what should happen if the new state is
invalid. Outright rejection is probably better than replacing the
previous state with an invalid state.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 6a37d409653b..c91ea363c373 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -287,6 +287,10 @@  int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 		     uint32_t param, uint64_t value)
 {
 	switch (param) {
+	case MSM_PARAM_SYSPROF:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		return msm_file_private_set_sysprof(ctx, gpu, value);
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ca9a8a866292..780f9748aaaf 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -559,8 +559,16 @@  static void context_close(struct msm_file_private *ctx)
 
 static void msm_postclose(struct drm_device *dev, struct drm_file *file)
 {
+	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_file_private *ctx = file->driver_priv;
 
+	/*
+	 * It is not possible to set sysprof param to non-zero if gpu
+	 * is not initialized:
+	 */
+	if (priv->gpu)
+		msm_file_private_set_sysprof(ctx, priv->gpu, 0);
+
 	context_close(ctx);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index fde9a29f884e..0ba1dbd4e50f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -160,6 +160,13 @@  struct msm_gpu {
 	struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
 	int nr_rings;
 
+	/**
+	 * sysprof_active:
+	 *
+	 * The count of contexts that have enabled system profiling.
+	 */
+	refcount_t sysprof_active;
+
 	/**
 	 * cur_ctx_seqno:
 	 *
@@ -330,6 +337,24 @@  struct msm_file_private {
 	struct kref ref;
 	int seqno;
 
+	/**
+	 * sysprof:
+	 *
+	 * The value of MSM_PARAM_SYSPROF set by userspace.  This is
+	 * intended to be used by system profiling tools like Mesa's
+	 * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
+	 *
+	 * Setting a value of 1 will preserve performance counters across
+	 * context switches.  Setting a value of 2 will in addition
+	 * suppress suspend.  (Performance counters loose  state across
+	 * power collapse, which is undesirable for profiling in some
+	 * cases.)
+	 *
+	 * The value automatically reverts to zero when the drm device
+	 * file is closed.
+	 */
+	int sysprof;
+
 	/**
 	 * elapsed:
 	 *
@@ -545,6 +570,8 @@  void msm_submitqueue_close(struct msm_file_private *ctx);
 
 void msm_submitqueue_destroy(struct kref *kref);
 
+int msm_file_private_set_sysprof(struct msm_file_private *ctx,
+				 struct msm_gpu *gpu, int sysprof);
 void __msm_file_private_destroy(struct kref *kref);
 
 static inline void msm_file_private_put(struct msm_file_private *ctx)
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 7cb158bcbcf6..4179db54ac93 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -7,6 +7,40 @@ 
 
 #include "msm_gpu.h"
 
+int msm_file_private_set_sysprof(struct msm_file_private *ctx,
+				 struct msm_gpu *gpu, int sysprof)
+{
+	/* unwind old value first: */
+	switch (ctx->sysprof) {
+	case 2:
+		pm_runtime_put_autosuspend(&gpu->pdev->dev);
+		fallthrough;
+	case 1:
+		refcount_dec(&gpu->sysprof_active);
+		fallthrough;
+	case 0:
+		break;
+	}
+
+	/* then apply new value: */
+	switch (sysprof) {
+	default:
+		return -EINVAL;
+	case 2:
+		pm_runtime_get_sync(&gpu->pdev->dev);
+		fallthrough;
+	case 1:
+		refcount_inc(&gpu->sysprof_active);
+		fallthrough;
+	case 0:
+		break;
+	}
+
+	ctx->sysprof = sysprof;
+
+	return 0;
+}
+
 void __msm_file_private_destroy(struct kref *kref)
 {
 	struct msm_file_private *ctx = container_of(kref,
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a1bb2a17a8b9..07efc8033492 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -81,6 +81,7 @@  struct drm_msm_timespec {
 #define MSM_PARAM_PP_PGTABLE 0x08  /* RO: Deprecated, always returns zero */
 #define MSM_PARAM_FAULTS     0x09  /* RO */
 #define MSM_PARAM_SUSPENDS   0x0a  /* RO */
+#define MSM_PARAM_SYSPROF    0x0b  /* WO: 1 preserves perfcntrs, 2 also disables suspend */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #