diff mbox series

[5/6] perf/core: Use ioctl to communicate driver configuration to kernel

Message ID 1530570810-28929-6-git-send-email-mathieu.poirier@linaro.org
State New
Headers show
Series perf: Add ioctl for PMU driver configuration | expand

Commit Message

Mathieu Poirier July 2, 2018, 10:33 p.m. UTC
This patch follows what has been done for filters by adding an ioctl()
option to communicate to the kernel arbitrary PMU specific configuration
that don't fit in the conventional struct perf_event_attr to the kernel.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

---
 include/linux/perf_event.h |  54 ++++++++++++++++++++++
 kernel/events/core.c       | 110 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+)

-- 
2.7.4

Comments

Alexander Shishkin July 3, 2018, 10:03 a.m. UTC | #1
On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:
> This patch follows what has been done for filters by adding an ioctl()

> option to communicate to the kernel arbitrary PMU specific configuration

> that don't fit in the conventional struct perf_event_attr to the kernel.


Ok, so what *is* the PMU specific configuration that doesn't fit in the
attribute and needs to be re-configured by the driver using the generation
tracking?

> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> ---

>  include/linux/perf_event.h |  54 ++++++++++++++++++++++

>  kernel/events/core.c       | 110 +++++++++++++++++++++++++++++++++++++++++++++

>  2 files changed, 164 insertions(+)

> 

> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

> index 4d9c8f30ca6c..6e06b63c262f 100644

> --- a/include/linux/perf_event.h

> +++ b/include/linux/perf_event.h

> @@ -178,6 +178,12 @@ struct hw_perf_event {

>  	/* Last sync'ed generation of filters */

>  	unsigned long			addr_filters_gen;

>  

> +	/* PMU driver configuration */

> +	void				*drv_config;

> +

> +	/* Last sync'ed generation of driver config */

> +	unsigned long			drv_config_gen;

> +

>  /*

>   * hw_perf_event::state flags; used to track the PERF_EF_* state.

>   */

> @@ -447,6 +453,26 @@ struct pmu {

>  	 * Filter events for PMU-specific reasons.

>  	 */

>  	int (*filter_match)		(struct perf_event *event); /* optional */

> +

> +	/*

> +	 * Valiate complex PMU configuration that don't fit in the

> +	 * perf_event_attr struct.  Returns a PMU specific pointer or an error

> +	 * value < 0.

> +	 *

> +	 * As with addr_filters_validate(), runs in the context of the ioctl()

> +	 * process and is not serialized with the rest of the PMU callbacks.


Yes, but what is it? I get it that it's probably in one of the other
patches, but we still need to mention it somewhere here.

> +	 */

> +	void *(*drv_config_validate)	(struct perf_event *event,

> +					 char *config_str);

> +

> +	/* Synchronize PMU driver configuration */

> +	void (*drv_config_sync)		(struct perf_event *event);

> +

> +	/*

> +	 * Release PMU specific configuration acquired by

> +	 * drv_config_validate()

> +	 */

> +	void (*drv_config_free)		(void *drv_data);

>  };

>  

>  enum perf_addr_filter_action_t {

> @@ -489,6 +515,11 @@ struct perf_addr_filters_head {

>  	unsigned int		nr_file_filters;

>  };

>  

> +struct perf_drv_config {

> +	void		*drv_config;

> +	raw_spinlock_t	lock;

> +};

> +

>  /**

>   * enum perf_event_state - the states of a event

>   */

> @@ -668,6 +699,10 @@ struct perf_event {

>  	unsigned long			*addr_filters_offs;

>  	unsigned long			addr_filters_gen;

>  

> +	/* PMU driver specific configuration */

> +	struct perf_drv_config		drv_config;

> +	unsigned long			drv_config_gen;

> +

>  	void (*destroy)(struct perf_event *);

>  	struct rcu_head			rcu_head;

>  

> @@ -1234,6 +1269,13 @@ static inline bool has_addr_filter(struct perf_event *event)

>  	return event->pmu->nr_addr_filters;

>  }

>  

> +static inline bool has_drv_config(struct perf_event *event)

> +{

> +	return event->pmu->drv_config_validate &&

> +	       event->pmu->drv_config_sync &&

> +	       event->pmu->drv_config_free;

> +}

> +

>  /*

>   * An inherited event uses parent's filters

>   */

> @@ -1248,7 +1290,19 @@ perf_event_addr_filters(struct perf_event *event)

>  	return ifh;

>  }

>  

> +static inline struct perf_drv_config *

> +perf_event_get_drv_config(struct perf_event *event)

> +{

> +	struct perf_drv_config *cfg = &event->drv_config;

> +

> +	if (event->parent)

> +		cfg = &event->parent->drv_config;

> +

> +	return cfg;

> +}

> +

>  extern void perf_event_addr_filters_sync(struct perf_event *event);

> +extern void perf_event_drv_config_sync(struct perf_event *event);

>  

>  extern int perf_output_begin(struct perf_output_handle *handle,

>  			     struct perf_event *event, unsigned int size);

> diff --git a/kernel/events/core.c b/kernel/events/core.c

> index 8f0434a9951a..701839866789 100644

> --- a/kernel/events/core.c

> +++ b/kernel/events/core.c

> @@ -2829,6 +2829,29 @@ void perf_event_addr_filters_sync(struct perf_event *event)

>  }

>  EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);

>  

> +/*

> + * PMU driver configuration works the same way as filter management above,

> + * but without the need to deal with memory mapping.  Driver configuration

> + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

> + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

> + * bring the configuration information within reach of the PMU.


Wait a second. The reason why we dance around with the generations of filters
is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're
notified about mapping changes, we're called under the latter, and we'd need
to grab the former to update the event configuration. In your case, the
update comes in via perf_ioctl(), where we're already holding the ctx::mutex,
so you can just kick the PMU right there, via an event_function_call() or
perf_event_stop(restart=1). In the latter case, your pmu::start() would just
grab the new configuration. Should also be about 90% less code. :)

Would this work for you or am I misunderstanding something about your
requirements?

> + */

> +void perf_event_drv_config_sync(struct perf_event *event)

> +{

> +	struct perf_drv_config *drv_config = perf_event_get_drv_config(event);

> +

> +	if (!has_drv_config(event))

> +		return;

> +

> +	raw_spin_lock(&drv_config->lock);

> +	if (event->drv_config_gen != event->hw.drv_config_gen) {

> +		event->pmu->drv_config_sync(event);

> +		event->hw.drv_config_gen = event->drv_config_gen;

> +	}

> +	raw_spin_unlock(&drv_config->lock);

> +}

> +EXPORT_SYMBOL_GPL(perf_event_drv_config_sync);

> +

>  static int _perf_event_refresh(struct perf_event *event, int refresh)

>  {

>  	/*

> @@ -4410,6 +4433,7 @@ static bool exclusive_event_installable(struct perf_event *event,

>  

>  static void perf_addr_filters_splice(struct perf_event *event,

>  				       struct list_head *head);

> +static void perf_drv_config_splice(struct perf_event *event, void *drv_data);

>  

>  static void _free_event(struct perf_event *event)

>  {

> @@ -4440,6 +4464,7 @@ static void _free_event(struct perf_event *event)

>  	perf_event_free_bpf_prog(event);

>  	perf_addr_filters_splice(event, NULL);

>  	kfree(event->addr_filters_offs);

> +	perf_drv_config_splice(event, NULL);

>  

>  	if (event->destroy)

>  		event->destroy(event);

> @@ -5002,6 +5027,8 @@ static inline int perf_fget_light(int fd, struct fd *p)

>  static int perf_event_set_output(struct perf_event *event,

>  				 struct perf_event *output_event);

>  static int perf_event_set_filter(struct perf_event *event, void __user *arg);

> +static int perf_event_set_drv_config(struct perf_event *event,

> +				     void __user *arg);

>  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);

>  static int perf_copy_attr(struct perf_event_attr __user *uattr,

>  			  struct perf_event_attr *attr);

> @@ -5088,6 +5115,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon

>  

>  		return perf_event_modify_attr(event,  &new_attr);

>  	}

> +

> +	case PERF_EVENT_IOC_SET_DRV_CONFIG:

> +		return perf_event_set_drv_config(event, (void __user *)arg);

> +

>  	default:

>  		return -ENOTTY;

>  	}

> @@ -9086,6 +9117,85 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)

>  	return ret;

>  }

>  

> +static void perf_drv_config_splice(struct perf_event *event, void *drv_data)


I think the address filter counterpart is called "splice" because it takes
a list_head as a parameter and splices that list into the list of filters.
I'd suggest that this one is more like "replace", but up to you.

> +{

> +	unsigned long flags;

> +	void *old_drv_data;

> +

> +	if (!has_drv_config(event))

> +		return;

> +

> +	/* Children take their configuration from their parent */

> +	if (event->parent)

> +		return;

> +

> +	raw_spin_lock_irqsave(&event->drv_config.lock, flags);

> +

> +	old_drv_data = event->drv_config.drv_config;

> +	event->drv_config.drv_config = drv_data;


Now I'm thinking: should we reset the generation here (and also in the
address filters bit)? At least, it deserves a comment.

> +

> +	raw_spin_unlock_irqrestore(&event->drv_config.lock, flags);

> +

> +	event->pmu->drv_config_free(old_drv_data);

> +}

> +

> +static void perf_event_drv_config_apply(struct perf_event *event)

> +{

> +	unsigned long flags;

> +	struct perf_drv_config *drv_config = perf_event_get_drv_config(event);

> +

> +	/* Notify event that a new configuration is available */

> +	raw_spin_lock_irqsave(&drv_config->lock, flags);

> +	event->drv_config_gen++;

> +	raw_spin_unlock_irqrestore(&drv_config->lock, flags);


Should we also mention how this new locks fits into the existing locking
order?

Regards,
--
Alex
Alexander Shishkin July 3, 2018, 10:56 a.m. UTC | #2
On Tue, Jul 03, 2018 at 01:03:48PM +0300, Alexander Shishkin wrote:
> On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

> > +/*

> > + * PMU driver configuration works the same way as filter management above,

> > + * but without the need to deal with memory mapping.  Driver configuration

> > + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

> > + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

> > + * bring the configuration information within reach of the PMU.

> 

> Wait a second. The reason why we dance around with the generations of filters

> is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're

> notified about mapping changes, we're called under the latter, and we'd need

> to grab the former to update the event configuration. In your case, the

> update comes in via perf_ioctl(), where we're already holding the ctx::mutex,

> so you can just kick the PMU right there, via an event_function_call() or

> perf_event_stop(restart=1). In the latter case, your pmu::start() would just

> grab the new configuration. Should also be about 90% less code. :)


Also, since it affects the AUX buffer configuration, it is probably a one
time ioctl command that you issue before you mmap the buffer. If that's
the case, you don't even have to worry about stopping the event, as it
shouldn't be running, because without the buffer perf_aux_output_begin()
should fail and so should the pmu::add() iirc.

Regards,
--
Alex
Jiri Olsa July 3, 2018, 1:40 p.m. UTC | #3
On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

SNIP

> +static int

> +perf_event_process_drv_config(struct perf_event *event, char *config_str)

> +{

> +	int ret = -EINVAL;

> +	void *drv_data;

> +

> +	/* Make sure ctx.mutex it held */

> +	lockdep_assert_held(&event->ctx->mutex);

> +

> +	/* Children take their configuration from their parent */

> +	if (WARN_ON_ONCE(event->parent))

> +		goto out;

> +

> +	drv_data = event->pmu->drv_config_validate(event, config_str);

> +	if (!drv_data)

> +		goto out;


what's this check for? doc does not mention that,
also the coresight callback does not return NULL

jirka
Jiri Olsa July 3, 2018, 1:41 p.m. UTC | #4
On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

SNIP

> +	ret = 0;

> +out:

> +	return ret;

> +}

> +

> +static int perf_event_set_drv_config(struct perf_event *event, void __user *arg)

> +{

> +	int ret = -EINVAL;

> +	char *config_str;

> +

> +	config_str = strndup_user(arg, PAGE_SIZE);

> +	if (IS_ERR(config_str))

> +		return PTR_ERR(config_str);

> +

> +	if (has_drv_config(event))


would it be better to make this check before
the alloc/copy data from user takes place?

jirka
Mathieu Poirier July 3, 2018, 5:47 p.m. UTC | #5
On Tue, 3 Jul 2018 at 07:40, Jiri Olsa <jolsa@redhat.com> wrote:
>

> On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

>

> SNIP

>

> > +static int

> > +perf_event_process_drv_config(struct perf_event *event, char *config_str)

> > +{

> > +     int ret = -EINVAL;

> > +     void *drv_data;

> > +

> > +     /* Make sure ctx.mutex it held */

> > +     lockdep_assert_held(&event->ctx->mutex);

> > +

> > +     /* Children take their configuration from their parent */

> > +     if (WARN_ON_ONCE(event->parent))

> > +             goto out;

> > +

> > +     drv_data = event->pmu->drv_config_validate(event, config_str);

> > +     if (!drv_data)

> > +             goto out;

>

> what's this check for? doc does not mention that,

> also the coresight callback does not return NULL

>


Very true - thanks for pointing this out.

Mathieu

> jirka
Mathieu Poirier July 3, 2018, 8:30 p.m. UTC | #6
Hi Alex,

On Tue, 3 Jul 2018 at 04:03, Alexander Shishkin
<alexander.shishkin@linux.intel.com> wrote:
>

> On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

> > This patch follows what has been done for filters by adding an ioctl()

> > option to communicate to the kernel arbitrary PMU specific configuration

> > that don't fit in the conventional struct perf_event_attr to the kernel.

>

> Ok, so what *is* the PMU specific configuration that doesn't fit in the

> attribute and needs to be re-configured by the driver using the generation

> tracking?

>


In this patchset I'm am after the specification of sink information
for each event, i.e what sink a CPU is supposed to use for the
session.  I simply don't see putting something that PMU specific in
the generic perf_event_attr structure.  I also intend to use the same
ioctl mechanism to communicate complex tracer configuration for
sequencers, counters and input events.  I don't see a nice way of
doing that from the perf_event_attr, and that is even without thinking
about the different flavours of tracers out there, all with their own
features.

I've looked around and the only clean way I found to support this is
via an ioctl().  That way each tracer can easily identify the sink it
should be using without smearing the perf_event_attr structure.  I
would be happy to explore a different avenue should you think of
something.

Thanks,
Mathieu

> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> > ---

> >  include/linux/perf_event.h |  54 ++++++++++++++++++++++

> >  kernel/events/core.c       | 110 +++++++++++++++++++++++++++++++++++++++++++++

> >  2 files changed, 164 insertions(+)

> >

> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

> > index 4d9c8f30ca6c..6e06b63c262f 100644

> > --- a/include/linux/perf_event.h

> > +++ b/include/linux/perf_event.h

> > @@ -178,6 +178,12 @@ struct hw_perf_event {

> >       /* Last sync'ed generation of filters */

> >       unsigned long                   addr_filters_gen;

> >

> > +     /* PMU driver configuration */

> > +     void                            *drv_config;

> > +

> > +     /* Last sync'ed generation of driver config */

> > +     unsigned long                   drv_config_gen;

> > +

> >  /*

> >   * hw_perf_event::state flags; used to track the PERF_EF_* state.

> >   */

> > @@ -447,6 +453,26 @@ struct pmu {

> >        * Filter events for PMU-specific reasons.

> >        */

> >       int (*filter_match)             (struct perf_event *event); /* optional */

> > +

> > +     /*

> > +      * Valiate complex PMU configuration that don't fit in the

> > +      * perf_event_attr struct.  Returns a PMU specific pointer or an error

> > +      * value < 0.

> > +      *

> > +      * As with addr_filters_validate(), runs in the context of the ioctl()

> > +      * process and is not serialized with the rest of the PMU callbacks.

>

> Yes, but what is it? I get it that it's probably in one of the other

> patches, but we still need to mention it somewhere here.

>

> > +      */

> > +     void *(*drv_config_validate)    (struct perf_event *event,

> > +                                      char *config_str);

> > +

> > +     /* Synchronize PMU driver configuration */

> > +     void (*drv_config_sync)         (struct perf_event *event);

> > +

> > +     /*

> > +      * Release PMU specific configuration acquired by

> > +      * drv_config_validate()

> > +      */

> > +     void (*drv_config_free)         (void *drv_data);

> >  };

> >

> >  enum perf_addr_filter_action_t {

> > @@ -489,6 +515,11 @@ struct perf_addr_filters_head {

> >       unsigned int            nr_file_filters;

> >  };

> >

> > +struct perf_drv_config {

> > +     void            *drv_config;

> > +     raw_spinlock_t  lock;

> > +};

> > +

> >  /**

> >   * enum perf_event_state - the states of a event

> >   */

> > @@ -668,6 +699,10 @@ struct perf_event {

> >       unsigned long                   *addr_filters_offs;

> >       unsigned long                   addr_filters_gen;

> >

> > +     /* PMU driver specific configuration */

> > +     struct perf_drv_config          drv_config;

> > +     unsigned long                   drv_config_gen;

> > +

> >       void (*destroy)(struct perf_event *);

> >       struct rcu_head                 rcu_head;

> >

> > @@ -1234,6 +1269,13 @@ static inline bool has_addr_filter(struct perf_event *event)

> >       return event->pmu->nr_addr_filters;

> >  }

> >

> > +static inline bool has_drv_config(struct perf_event *event)

> > +{

> > +     return event->pmu->drv_config_validate &&

> > +            event->pmu->drv_config_sync &&

> > +            event->pmu->drv_config_free;

> > +}

> > +

> >  /*

> >   * An inherited event uses parent's filters

> >   */

> > @@ -1248,7 +1290,19 @@ perf_event_addr_filters(struct perf_event *event)

> >       return ifh;

> >  }

> >

> > +static inline struct perf_drv_config *

> > +perf_event_get_drv_config(struct perf_event *event)

> > +{

> > +     struct perf_drv_config *cfg = &event->drv_config;

> > +

> > +     if (event->parent)

> > +             cfg = &event->parent->drv_config;

> > +

> > +     return cfg;

> > +}

> > +

> >  extern void perf_event_addr_filters_sync(struct perf_event *event);

> > +extern void perf_event_drv_config_sync(struct perf_event *event);

> >

> >  extern int perf_output_begin(struct perf_output_handle *handle,

> >                            struct perf_event *event, unsigned int size);

> > diff --git a/kernel/events/core.c b/kernel/events/core.c

> > index 8f0434a9951a..701839866789 100644

> > --- a/kernel/events/core.c

> > +++ b/kernel/events/core.c

> > @@ -2829,6 +2829,29 @@ void perf_event_addr_filters_sync(struct perf_event *event)

> >  }

> >  EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);

> >

> > +/*

> > + * PMU driver configuration works the same way as filter management above,

> > + * but without the need to deal with memory mapping.  Driver configuration

> > + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

> > + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

> > + * bring the configuration information within reach of the PMU.

>

> Wait a second. The reason why we dance around with the generations of filters

> is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're

> notified about mapping changes, we're called under the latter, and we'd need

> to grab the former to update the event configuration. In your case, the

> update comes in via perf_ioctl(), where we're already holding the ctx::mutex,

> so you can just kick the PMU right there, via an event_function_call() or

> perf_event_stop(restart=1). In the latter case, your pmu::start() would just

> grab the new configuration. Should also be about 90% less code. :)

>

> Would this work for you or am I misunderstanding something about your

> requirements?

>

> > + */

> > +void perf_event_drv_config_sync(struct perf_event *event)

> > +{

> > +     struct perf_drv_config *drv_config = perf_event_get_drv_config(event);

> > +

> > +     if (!has_drv_config(event))

> > +             return;

> > +

> > +     raw_spin_lock(&drv_config->lock);

> > +     if (event->drv_config_gen != event->hw.drv_config_gen) {

> > +             event->pmu->drv_config_sync(event);

> > +             event->hw.drv_config_gen = event->drv_config_gen;

> > +     }

> > +     raw_spin_unlock(&drv_config->lock);

> > +}

> > +EXPORT_SYMBOL_GPL(perf_event_drv_config_sync);

> > +

> >  static int _perf_event_refresh(struct perf_event *event, int refresh)

> >  {

> >       /*

> > @@ -4410,6 +4433,7 @@ static bool exclusive_event_installable(struct perf_event *event,

> >

> >  static void perf_addr_filters_splice(struct perf_event *event,

> >                                      struct list_head *head);

> > +static void perf_drv_config_splice(struct perf_event *event, void *drv_data);

> >

> >  static void _free_event(struct perf_event *event)

> >  {

> > @@ -4440,6 +4464,7 @@ static void _free_event(struct perf_event *event)

> >       perf_event_free_bpf_prog(event);

> >       perf_addr_filters_splice(event, NULL);

> >       kfree(event->addr_filters_offs);

> > +     perf_drv_config_splice(event, NULL);

> >

> >       if (event->destroy)

> >               event->destroy(event);

> > @@ -5002,6 +5027,8 @@ static inline int perf_fget_light(int fd, struct fd *p)

> >  static int perf_event_set_output(struct perf_event *event,

> >                                struct perf_event *output_event);

> >  static int perf_event_set_filter(struct perf_event *event, void __user *arg);

> > +static int perf_event_set_drv_config(struct perf_event *event,

> > +                                  void __user *arg);

> >  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);

> >  static int perf_copy_attr(struct perf_event_attr __user *uattr,

> >                         struct perf_event_attr *attr);

> > @@ -5088,6 +5115,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon

> >

> >               return perf_event_modify_attr(event,  &new_attr);

> >       }

> > +

> > +     case PERF_EVENT_IOC_SET_DRV_CONFIG:

> > +             return perf_event_set_drv_config(event, (void __user *)arg);

> > +

> >       default:

> >               return -ENOTTY;

> >       }

> > @@ -9086,6 +9117,85 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)

> >       return ret;

> >  }

> >

> > +static void perf_drv_config_splice(struct perf_event *event, void *drv_data)

>

> I think the address filter counterpart is called "splice" because it takes

> a list_head as a parameter and splices that list into the list of filters.

> I'd suggest that this one is more like "replace", but up to you.

>

> > +{

> > +     unsigned long flags;

> > +     void *old_drv_data;

> > +

> > +     if (!has_drv_config(event))

> > +             return;

> > +

> > +     /* Children take their configuration from their parent */

> > +     if (event->parent)

> > +             return;

> > +

> > +     raw_spin_lock_irqsave(&event->drv_config.lock, flags);

> > +

> > +     old_drv_data = event->drv_config.drv_config;

> > +     event->drv_config.drv_config = drv_data;

>

> Now I'm thinking: should we reset the generation here (and also in the

> address filters bit)? At least, it deserves a comment.

>

> > +

> > +     raw_spin_unlock_irqrestore(&event->drv_config.lock, flags);

> > +

> > +     event->pmu->drv_config_free(old_drv_data);

> > +}

> > +

> > +static void perf_event_drv_config_apply(struct perf_event *event)

> > +{

> > +     unsigned long flags;

> > +     struct perf_drv_config *drv_config = perf_event_get_drv_config(event);

> > +

> > +     /* Notify event that a new configuration is available */

> > +     raw_spin_lock_irqsave(&drv_config->lock, flags);

> > +     event->drv_config_gen++;

> > +     raw_spin_unlock_irqrestore(&drv_config->lock, flags);

>

> Should we also mention how this new locks fits into the existing locking

> order?

>

> Regards,

> --

> Alex
Mathieu Poirier July 3, 2018, 10 p.m. UTC | #7
On Tue, 3 Jul 2018 at 04:57, Alexander Shishkin
<alexander.shishkin@linux.intel.com> wrote:
>

> On Tue, Jul 03, 2018 at 01:03:48PM +0300, Alexander Shishkin wrote:

> > On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

> > > +/*

> > > + * PMU driver configuration works the same way as filter management above,

> > > + * but without the need to deal with memory mapping.  Driver configuration

> > > + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

> > > + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

> > > + * bring the configuration information within reach of the PMU.

> >

> > Wait a second. The reason why we dance around with the generations of filters

> > is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're

> > notified about mapping changes, we're called under the latter, and we'd need

> > to grab the former to update the event configuration. In your case, the

> > update comes in via perf_ioctl(), where we're already holding the ctx::mutex,

> > so you can just kick the PMU right there, via an event_function_call() or

> > perf_event_stop(restart=1). In the latter case, your pmu::start() would just

> > grab the new configuration. Should also be about 90% less code. :)

>

> Also, since it affects the AUX buffer configuration, it is probably a one

> time ioctl command that you issue before you mmap the buffer. If that's

> the case, you don't even have to worry about stopping the event, as it

> shouldn't be running, because without the buffer perf_aux_output_begin()

> should fail and so should the pmu::add() iirc.

>


The idea behind the current approach was to make the SET_DRV_CONFIG
ioctl() usable by other drivers where multiple ioctl() calls could be
performed while a session in ongoing.  I also opted to introduce a
_sync() function to let the PMU refresh its configuration at the time
of its own choosing rather than having to interrupt the session.

But all I need for coresight is to have available the sink information
and PMU configuration (in an upcoming patchset) by the time
setup_aux() is called.  You are correct, this is a one time
configuration and since the event isn't running there is no need for
locking - I should be able to access the PMU when the ioctl is called.

If you are fine with this bare-bone scenario and don't care much about
usability in different situation, I'll do a respin with minimal
functionality that cover my needs.

Thanks for the review,
Mathieu

> Regards,

> --

> Alex
Mathieu Poirier July 3, 2018, 10:03 p.m. UTC | #8
On Tue, 3 Jul 2018 at 04:03, Alexander Shishkin
<alexander.shishkin@linux.intel.com> wrote:
>

> On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

> > This patch follows what has been done for filters by adding an ioctl()

> > option to communicate to the kernel arbitrary PMU specific configuration

> > that don't fit in the conventional struct perf_event_attr to the kernel.

>

> Ok, so what *is* the PMU specific configuration that doesn't fit in the

> attribute and needs to be re-configured by the driver using the generation

> tracking?

>

> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> > ---

> >  include/linux/perf_event.h |  54 ++++++++++++++++++++++

> >  kernel/events/core.c       | 110 +++++++++++++++++++++++++++++++++++++++++++++

> >  2 files changed, 164 insertions(+)

> >

> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

> > index 4d9c8f30ca6c..6e06b63c262f 100644

> > --- a/include/linux/perf_event.h

> > +++ b/include/linux/perf_event.h

> > @@ -178,6 +178,12 @@ struct hw_perf_event {

> >       /* Last sync'ed generation of filters */

> >       unsigned long                   addr_filters_gen;

> >

> > +     /* PMU driver configuration */

> > +     void                            *drv_config;

> > +

> > +     /* Last sync'ed generation of driver config */

> > +     unsigned long                   drv_config_gen;

> > +

> >  /*

> >   * hw_perf_event::state flags; used to track the PERF_EF_* state.

> >   */

> > @@ -447,6 +453,26 @@ struct pmu {

> >        * Filter events for PMU-specific reasons.

> >        */

> >       int (*filter_match)             (struct perf_event *event); /* optional */

> > +

> > +     /*

> > +      * Valiate complex PMU configuration that don't fit in the

> > +      * perf_event_attr struct.  Returns a PMU specific pointer or an error

> > +      * value < 0.

> > +      *

> > +      * As with addr_filters_validate(), runs in the context of the ioctl()

> > +      * process and is not serialized with the rest of the PMU callbacks.

>

> Yes, but what is it? I get it that it's probably in one of the other

> patches, but we still need to mention it somewhere here.


I could write a more detailed description here, something about the
specification of sink and configuration of coresight specific features
(sequencers, counters, input/output) but I decided to keep things
generic.  Rather than doing that I thought it best to leave things
generic and let people look at the code for more details should they
want to.  Let me know what you think is best.

>

> > +      */

> > +     void *(*drv_config_validate)    (struct perf_event *event,

> > +                                      char *config_str);

> > +

> > +     /* Synchronize PMU driver configuration */

> > +     void (*drv_config_sync)         (struct perf_event *event);

> > +

> > +     /*

> > +      * Release PMU specific configuration acquired by

> > +      * drv_config_validate()

> > +      */

> > +     void (*drv_config_free)         (void *drv_data);

> >  };

> >

> >  enum perf_addr_filter_action_t {

> > @@ -489,6 +515,11 @@ struct perf_addr_filters_head {

> >       unsigned int            nr_file_filters;

> >  };

> >

> > +struct perf_drv_config {

> > +     void            *drv_config;

> > +     raw_spinlock_t  lock;

> > +};

> > +

> >  /**

> >   * enum perf_event_state - the states of a event

> >   */

> > @@ -668,6 +699,10 @@ struct perf_event {

> >       unsigned long                   *addr_filters_offs;

> >       unsigned long                   addr_filters_gen;

> >

> > +     /* PMU driver specific configuration */

> > +     struct perf_drv_config          drv_config;

> > +     unsigned long                   drv_config_gen;

> > +

> >       void (*destroy)(struct perf_event *);

> >       struct rcu_head                 rcu_head;

> >

> > @@ -1234,6 +1269,13 @@ static inline bool has_addr_filter(struct perf_event *event)

> >       return event->pmu->nr_addr_filters;

> >  }

> >

> > +static inline bool has_drv_config(struct perf_event *event)

> > +{

> > +     return event->pmu->drv_config_validate &&

> > +            event->pmu->drv_config_sync &&

> > +            event->pmu->drv_config_free;

> > +}

> > +

> >  /*

> >   * An inherited event uses parent's filters

> >   */

> > @@ -1248,7 +1290,19 @@ perf_event_addr_filters(struct perf_event *event)

> >       return ifh;

> >  }

> >

> > +static inline struct perf_drv_config *

> > +perf_event_get_drv_config(struct perf_event *event)

> > +{

> > +     struct perf_drv_config *cfg = &event->drv_config;

> > +

> > +     if (event->parent)

> > +             cfg = &event->parent->drv_config;

> > +

> > +     return cfg;

> > +}

> > +

> >  extern void perf_event_addr_filters_sync(struct perf_event *event);

> > +extern void perf_event_drv_config_sync(struct perf_event *event);

> >

> >  extern int perf_output_begin(struct perf_output_handle *handle,

> >                            struct perf_event *event, unsigned int size);

> > diff --git a/kernel/events/core.c b/kernel/events/core.c

> > index 8f0434a9951a..701839866789 100644

> > --- a/kernel/events/core.c

> > +++ b/kernel/events/core.c

> > @@ -2829,6 +2829,29 @@ void perf_event_addr_filters_sync(struct perf_event *event)

> >  }

> >  EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);

> >

> > +/*

> > + * PMU driver configuration works the same way as filter management above,

> > + * but without the need to deal with memory mapping.  Driver configuration

> > + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

> > + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

> > + * bring the configuration information within reach of the PMU.

>

> Wait a second. The reason why we dance around with the generations of filters

> is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're

> notified about mapping changes, we're called under the latter, and we'd need

> to grab the former to update the event configuration. In your case, the

> update comes in via perf_ioctl(), where we're already holding the ctx::mutex,

> so you can just kick the PMU right there, via an event_function_call() or

> perf_event_stop(restart=1). In the latter case, your pmu::start() would just

> grab the new configuration. Should also be about 90% less code. :)

>

> Would this work for you or am I misunderstanding something about your

> requirements?

>

> > + */

> > +void perf_event_drv_config_sync(struct perf_event *event)

> > +{

> > +     struct perf_drv_config *drv_config = perf_event_get_drv_config(event);

> > +

> > +     if (!has_drv_config(event))

> > +             return;

> > +

> > +     raw_spin_lock(&drv_config->lock);

> > +     if (event->drv_config_gen != event->hw.drv_config_gen) {

> > +             event->pmu->drv_config_sync(event);

> > +             event->hw.drv_config_gen = event->drv_config_gen;

> > +     }

> > +     raw_spin_unlock(&drv_config->lock);

> > +}

> > +EXPORT_SYMBOL_GPL(perf_event_drv_config_sync);

> > +

> >  static int _perf_event_refresh(struct perf_event *event, int refresh)

> >  {

> >       /*

> > @@ -4410,6 +4433,7 @@ static bool exclusive_event_installable(struct perf_event *event,

> >

> >  static void perf_addr_filters_splice(struct perf_event *event,

> >                                      struct list_head *head);

> > +static void perf_drv_config_splice(struct perf_event *event, void *drv_data);

> >

> >  static void _free_event(struct perf_event *event)

> >  {

> > @@ -4440,6 +4464,7 @@ static void _free_event(struct perf_event *event)

> >       perf_event_free_bpf_prog(event);

> >       perf_addr_filters_splice(event, NULL);

> >       kfree(event->addr_filters_offs);

> > +     perf_drv_config_splice(event, NULL);

> >

> >       if (event->destroy)

> >               event->destroy(event);

> > @@ -5002,6 +5027,8 @@ static inline int perf_fget_light(int fd, struct fd *p)

> >  static int perf_event_set_output(struct perf_event *event,

> >                                struct perf_event *output_event);

> >  static int perf_event_set_filter(struct perf_event *event, void __user *arg);

> > +static int perf_event_set_drv_config(struct perf_event *event,

> > +                                  void __user *arg);

> >  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);

> >  static int perf_copy_attr(struct perf_event_attr __user *uattr,

> >                         struct perf_event_attr *attr);

> > @@ -5088,6 +5115,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon

> >

> >               return perf_event_modify_attr(event,  &new_attr);

> >       }

> > +

> > +     case PERF_EVENT_IOC_SET_DRV_CONFIG:

> > +             return perf_event_set_drv_config(event, (void __user *)arg);

> > +

> >       default:

> >               return -ENOTTY;

> >       }

> > @@ -9086,6 +9117,85 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)

> >       return ret;

> >  }

> >

> > +static void perf_drv_config_splice(struct perf_event *event, void *drv_data)

>

> I think the address filter counterpart is called "splice" because it takes

> a list_head as a parameter and splices that list into the list of filters.

> I'd suggest that this one is more like "replace", but up to you.


I was on the fence about the naming convention...  I wanted the
drv_config mechanism to be as close as possible to the filters, that
way if someone understands the filters they'll easily understand the
drv_config.  But it may be more confusing than anything else - I'll
change it.

>

> > +{

> > +     unsigned long flags;

> > +     void *old_drv_data;

> > +

> > +     if (!has_drv_config(event))

> > +             return;

> > +

> > +     /* Children take their configuration from their parent */

> > +     if (event->parent)

> > +             return;

> > +

> > +     raw_spin_lock_irqsave(&event->drv_config.lock, flags);

> > +

> > +     old_drv_data = event->drv_config.drv_config;

> > +     event->drv_config.drv_config = drv_data;

>

> Now I'm thinking: should we reset the generation here (and also in the

> address filters bit)? At least, it deserves a comment.


I thought your way of doing things was quite nice, hence doing the
same...  xyz_splice() does exactly that, it replaces the value but
downstream won't see it for as long as xyz_gen isn't updated - and
that is exactly what xyz_apply() does.  I can add a comment to clarify
how things work but I think we should keep the current scheme.

>

> > +

> > +     raw_spin_unlock_irqrestore(&event->drv_config.lock, flags);

> > +

> > +     event->pmu->drv_config_free(old_drv_data);

> > +}

> > +

> > +static void perf_event_drv_config_apply(struct perf_event *event)

> > +{

> > +     unsigned long flags;

> > +     struct perf_drv_config *drv_config = perf_event_get_drv_config(event);

> > +

> > +     /* Notify event that a new configuration is available */

> > +     raw_spin_lock_irqsave(&drv_config->lock, flags);

> > +     event->drv_config_gen++;

> > +     raw_spin_unlock_irqrestore(&drv_config->lock, flags);

>

> Should we also mention how this new locks fits into the existing locking

> order?

>

> Regards,

> --

> Alex
Alexander Shishkin July 4, 2018, 10:34 a.m. UTC | #9
Mathieu Poirier <mathieu.poirier@linaro.org> writes:

> On Tue, 3 Jul 2018 at 04:57, Alexander Shishkin

> <alexander.shishkin@linux.intel.com> wrote:

>>

>> On Tue, Jul 03, 2018 at 01:03:48PM +0300, Alexander Shishkin wrote:

>> > On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

>> > > +/*

>> > > + * PMU driver configuration works the same way as filter management above,

>> > > + * but without the need to deal with memory mapping.  Driver configuration

>> > > + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

>> > > + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

>> > > + * bring the configuration information within reach of the PMU.

>> >

>> > Wait a second. The reason why we dance around with the generations of filters

>> > is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're

>> > notified about mapping changes, we're called under the latter, and we'd need

>> > to grab the former to update the event configuration. In your case, the

>> > update comes in via perf_ioctl(), where we're already holding the ctx::mutex,

>> > so you can just kick the PMU right there, via an event_function_call() or

>> > perf_event_stop(restart=1). In the latter case, your pmu::start() would just

>> > grab the new configuration. Should also be about 90% less code. :)

>>

>> Also, since it affects the AUX buffer configuration, it is probably a one

>> time ioctl command that you issue before you mmap the buffer. If that's

>> the case, you don't even have to worry about stopping the event, as it

>> shouldn't be running, because without the buffer perf_aux_output_begin()

>> should fail and so should the pmu::add() iirc.

>>

>

> The idea behind the current approach was to make the SET_DRV_CONFIG

> ioctl() usable by other drivers where multiple ioctl() calls could be

> performed while a session in ongoing.  I also opted to introduce a

> _sync() function to let the PMU refresh its configuration at the time

> of its own choosing rather than having to interrupt the session.


Yes, but the times of PMU's own choosing would still be more or less
limited to ->start()/->stop().

You can also do an event_function_call(), which would call
->config_sync(), which would be free to decide what to do with the new
information, up to and including doing a ->stop()/->start() sequence. My
guess is that you'd want to do either of the following:
 * decide to apply the new configuration immediately, and do the
 start-stop thing,
 * decide to defer the new configuration until the next ->start().

Both should work via cross call directly from the ioctl() call.

> But all I need for coresight is to have available the sink information

> and PMU configuration (in an upcoming patchset) by the time

> setup_aux() is called.  You are correct, this is a one time

> configuration and since the event isn't running there is no need for

> locking - I should be able to access the PMU when the ioctl is called.

>

> If you are fine with this bare-bone scenario and don't care much about

> usability in different situation, I'll do a respin with minimal

> functionality that cover my needs.


It doesn't have to be bare-bones, what I'm saying is that you shouldn't
need the event->drv_config, as you can directly call
pmu::config(new_config) (or config_sync(), but I'm guessing the _sync
part is redundant if you don't keep the configuration in 2 parts) from
the ioctl() and it should cover all your bases.

Regards,
--
Alex
Alexander Shishkin July 4, 2018, 10:51 a.m. UTC | #10
Mathieu Poirier <mathieu.poirier@linaro.org> writes:

> Hi Alex,


Hi Mathieu,

> On Tue, 3 Jul 2018 at 04:03, Alexander Shishkin

> <alexander.shishkin@linux.intel.com> wrote:

>>

>> On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

>> > This patch follows what has been done for filters by adding an ioctl()

>> > option to communicate to the kernel arbitrary PMU specific configuration

>> > that don't fit in the conventional struct perf_event_attr to the kernel.

>>

>> Ok, so what *is* the PMU specific configuration that doesn't fit in the

>> attribute and needs to be re-configured by the driver using the generation

>> tracking?

>>

>

> In this patchset I'm am after the specification of sink information

> for each event, i.e what sink a CPU is supposed to use for the

> session.  I simply don't see putting something that PMU specific in

> the generic perf_event_attr structure.  I also intend to use the same

> ioctl mechanism to communicate complex tracer configuration for

> sequencers, counters and input events.  I don't see a nice way of

> doing that from the perf_event_attr, and that is even without thinking

> about the different flavours of tracers out there, all with their own

> features.


Yes, the sequencers and counters seem tricky. Here's a wild idea: can
the sequencer/counter configuration be expressed as an eBPF program? Or,
can an eBPF program be used to program those?

> I've looked around and the only clean way I found to support this is

> via an ioctl().  That way each tracer can easily identify the sink it

> should be using without smearing the perf_event_attr structure.  I

> would be happy to explore a different avenue should you think of

> something.


Yes, I also have something similar on my todo list and I was previously
thinking along the lines of pipe()/splice(). As in, you take the AUX
event file descriptor and feed it to the sink, at which point the trace
path is configured. I need to dig up the notes that I made back in the
day to continue this conversation in more concrete terms.

Regards,
--
Alex
Mathieu Poirier July 4, 2018, 9:39 p.m. UTC | #11
On Wed, 4 Jul 2018 at 04:35, Alexander Shishkin
<alexander.shishkin@linux.intel.com> wrote:
>

> Mathieu Poirier <mathieu.poirier@linaro.org> writes:

>

> > On Tue, 3 Jul 2018 at 04:57, Alexander Shishkin

> > <alexander.shishkin@linux.intel.com> wrote:

> >>

> >> On Tue, Jul 03, 2018 at 01:03:48PM +0300, Alexander Shishkin wrote:

> >> > On Mon, Jul 02, 2018 at 04:33:29PM -0600, Mathieu Poirier wrote:

> >> > > +/*

> >> > > + * PMU driver configuration works the same way as filter management above,

> >> > > + * but without the need to deal with memory mapping.  Driver configuration

> >> > > + * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied

> >> > > + * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to

> >> > > + * bring the configuration information within reach of the PMU.

> >> >

> >> > Wait a second. The reason why we dance around with the generations of filters

> >> > is the locking order of ctx::mutex vs mmap_sem. In an mmap path, where we're

> >> > notified about mapping changes, we're called under the latter, and we'd need

> >> > to grab the former to update the event configuration. In your case, the

> >> > update comes in via perf_ioctl(), where we're already holding the ctx::mutex,

> >> > so you can just kick the PMU right there, via an event_function_call() or

> >> > perf_event_stop(restart=1). In the latter case, your pmu::start() would just

> >> > grab the new configuration. Should also be about 90% less code. :)

> >>

> >> Also, since it affects the AUX buffer configuration, it is probably a one

> >> time ioctl command that you issue before you mmap the buffer. If that's

> >> the case, you don't even have to worry about stopping the event, as it

> >> shouldn't be running, because without the buffer perf_aux_output_begin()

> >> should fail and so should the pmu::add() iirc.

> >>

> >

> > The idea behind the current approach was to make the SET_DRV_CONFIG

> > ioctl() usable by other drivers where multiple ioctl() calls could be

> > performed while a session in ongoing.  I also opted to introduce a

> > _sync() function to let the PMU refresh its configuration at the time

> > of its own choosing rather than having to interrupt the session.

>

> Yes, but the times of PMU's own choosing would still be more or less

> limited to ->start()/->stop().

>

> You can also do an event_function_call(), which would call

> ->config_sync(), which would be free to decide what to do with the new

> information, up to and including doing a ->stop()/->start() sequence. My

> guess is that you'd want to do either of the following:

>  * decide to apply the new configuration immediately, and do the

>  start-stop thing,

>  * decide to defer the new configuration until the next ->start().

>

> Both should work via cross call directly from the ioctl() call.


Other than doing a cross call, do you see any advantage of using
event_function_call()?  For what I currently need the cross call is
not necessary.

>

> > But all I need for coresight is to have available the sink information

> > and PMU configuration (in an upcoming patchset) by the time

> > setup_aux() is called.  You are correct, this is a one time

> > configuration and since the event isn't running there is no need for

> > locking - I should be able to access the PMU when the ioctl is called.

> >

> > If you are fine with this bare-bone scenario and don't care much about

> > usability in different situation, I'll do a respin with minimal

> > functionality that cover my needs.

>

> It doesn't have to be bare-bones, what I'm saying is that you shouldn't

> need the event->drv_config, as you can directly call

> pmu::config(new_config) (or config_sync(), but I'm guessing the _sync

> part is redundant if you don't keep the configuration in 2 parts) from

> the ioctl() and it should cover all your bases.


I think I get your point - I will do another respin and we can go from there.

Thanks,
Mathieu

>

> Regards,

> --

> Alex
diff mbox series

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4d9c8f30ca6c..6e06b63c262f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -178,6 +178,12 @@  struct hw_perf_event {
 	/* Last sync'ed generation of filters */
 	unsigned long			addr_filters_gen;
 
+	/* PMU driver configuration */
+	void				*drv_config;
+
+	/* Last sync'ed generation of driver config */
+	unsigned long			drv_config_gen;
+
 /*
  * hw_perf_event::state flags; used to track the PERF_EF_* state.
  */
@@ -447,6 +453,26 @@  struct pmu {
 	 * Filter events for PMU-specific reasons.
 	 */
 	int (*filter_match)		(struct perf_event *event); /* optional */
+
+	/*
+	 * Valiate complex PMU configuration that don't fit in the
+	 * perf_event_attr struct.  Returns a PMU specific pointer or an error
+	 * value < 0.
+	 *
+	 * As with addr_filters_validate(), runs in the context of the ioctl()
+	 * process and is not serialized with the rest of the PMU callbacks.
+	 */
+	void *(*drv_config_validate)	(struct perf_event *event,
+					 char *config_str);
+
+	/* Synchronize PMU driver configuration */
+	void (*drv_config_sync)		(struct perf_event *event);
+
+	/*
+	 * Release PMU specific configuration acquired by
+	 * drv_config_validate()
+	 */
+	void (*drv_config_free)		(void *drv_data);
 };
 
 enum perf_addr_filter_action_t {
@@ -489,6 +515,11 @@  struct perf_addr_filters_head {
 	unsigned int		nr_file_filters;
 };
 
+struct perf_drv_config {
+	void		*drv_config;
+	raw_spinlock_t	lock;
+};
+
 /**
  * enum perf_event_state - the states of a event
  */
@@ -668,6 +699,10 @@  struct perf_event {
 	unsigned long			*addr_filters_offs;
 	unsigned long			addr_filters_gen;
 
+	/* PMU driver specific configuration */
+	struct perf_drv_config		drv_config;
+	unsigned long			drv_config_gen;
+
 	void (*destroy)(struct perf_event *);
 	struct rcu_head			rcu_head;
 
@@ -1234,6 +1269,13 @@  static inline bool has_addr_filter(struct perf_event *event)
 	return event->pmu->nr_addr_filters;
 }
 
+static inline bool has_drv_config(struct perf_event *event)
+{
+	return event->pmu->drv_config_validate &&
+	       event->pmu->drv_config_sync &&
+	       event->pmu->drv_config_free;
+}
+
 /*
  * An inherited event uses parent's filters
  */
@@ -1248,7 +1290,19 @@  perf_event_addr_filters(struct perf_event *event)
 	return ifh;
 }
 
+static inline struct perf_drv_config *
+perf_event_get_drv_config(struct perf_event *event)
+{
+	struct perf_drv_config *cfg = &event->drv_config;
+
+	if (event->parent)
+		cfg = &event->parent->drv_config;
+
+	return cfg;
+}
+
 extern void perf_event_addr_filters_sync(struct perf_event *event);
+extern void perf_event_drv_config_sync(struct perf_event *event);
 
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f0434a9951a..701839866789 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2829,6 +2829,29 @@  void perf_event_addr_filters_sync(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
 
+/*
+ * PMU driver configuration works the same way as filter management above,
+ * but without the need to deal with memory mapping.  Driver configuration
+ * arrives through the SET_DRV_CONFIG ioctl() where it is validated and applied
+ * to the event.  When the PMU is ready it calls perf_event_drv_config_sync() to
+ * bring the configuration information within reach of the PMU.
+ */
+void perf_event_drv_config_sync(struct perf_event *event)
+{
+	struct perf_drv_config *drv_config = perf_event_get_drv_config(event);
+
+	if (!has_drv_config(event))
+		return;
+
+	raw_spin_lock(&drv_config->lock);
+	if (event->drv_config_gen != event->hw.drv_config_gen) {
+		event->pmu->drv_config_sync(event);
+		event->hw.drv_config_gen = event->drv_config_gen;
+	}
+	raw_spin_unlock(&drv_config->lock);
+}
+EXPORT_SYMBOL_GPL(perf_event_drv_config_sync);
+
 static int _perf_event_refresh(struct perf_event *event, int refresh)
 {
 	/*
@@ -4410,6 +4433,7 @@  static bool exclusive_event_installable(struct perf_event *event,
 
 static void perf_addr_filters_splice(struct perf_event *event,
 				       struct list_head *head);
+static void perf_drv_config_splice(struct perf_event *event, void *drv_data);
 
 static void _free_event(struct perf_event *event)
 {
@@ -4440,6 +4464,7 @@  static void _free_event(struct perf_event *event)
 	perf_event_free_bpf_prog(event);
 	perf_addr_filters_splice(event, NULL);
 	kfree(event->addr_filters_offs);
+	perf_drv_config_splice(event, NULL);
 
 	if (event->destroy)
 		event->destroy(event);
@@ -5002,6 +5027,8 @@  static inline int perf_fget_light(int fd, struct fd *p)
 static int perf_event_set_output(struct perf_event *event,
 				 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+static int perf_event_set_drv_config(struct perf_event *event,
+				     void __user *arg);
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
 static int perf_copy_attr(struct perf_event_attr __user *uattr,
 			  struct perf_event_attr *attr);
@@ -5088,6 +5115,10 @@  static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 
 		return perf_event_modify_attr(event,  &new_attr);
 	}
+
+	case PERF_EVENT_IOC_SET_DRV_CONFIG:
+		return perf_event_set_drv_config(event, (void __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -9086,6 +9117,85 @@  static int perf_event_set_filter(struct perf_event *event, void __user *arg)
 	return ret;
 }
 
+static void perf_drv_config_splice(struct perf_event *event, void *drv_data)
+{
+	unsigned long flags;
+	void *old_drv_data;
+
+	if (!has_drv_config(event))
+		return;
+
+	/* Children take their configuration from their parent */
+	if (event->parent)
+		return;
+
+	raw_spin_lock_irqsave(&event->drv_config.lock, flags);
+
+	old_drv_data = event->drv_config.drv_config;
+	event->drv_config.drv_config = drv_data;
+
+	raw_spin_unlock_irqrestore(&event->drv_config.lock, flags);
+
+	event->pmu->drv_config_free(old_drv_data);
+}
+
+static void perf_event_drv_config_apply(struct perf_event *event)
+{
+	unsigned long flags;
+	struct perf_drv_config *drv_config = perf_event_get_drv_config(event);
+
+	/* Notify event that a new configuration is available */
+	raw_spin_lock_irqsave(&drv_config->lock, flags);
+	event->drv_config_gen++;
+	raw_spin_unlock_irqrestore(&drv_config->lock, flags);
+}
+
+static int
+perf_event_process_drv_config(struct perf_event *event, char *config_str)
+{
+	int ret = -EINVAL;
+	void *drv_data;
+
+	/* Make sure ctx.mutex it held */
+	lockdep_assert_held(&event->ctx->mutex);
+
+	/* Children take their configuration from their parent */
+	if (WARN_ON_ONCE(event->parent))
+		goto out;
+
+	drv_data = event->pmu->drv_config_validate(event, config_str);
+	if (!drv_data)
+		goto out;
+	if (IS_ERR(drv_data)) {
+		ret = PTR_ERR(drv_data);
+		goto out;
+	}
+
+	perf_drv_config_splice(event, drv_data);
+
+	perf_event_for_each_child(event, perf_event_drv_config_apply);
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static int perf_event_set_drv_config(struct perf_event *event, void __user *arg)
+{
+	int ret = -EINVAL;
+	char *config_str;
+
+	config_str = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(config_str))
+		return PTR_ERR(config_str);
+
+	if (has_drv_config(event))
+		ret = perf_event_process_drv_config(event, config_str);
+
+	kfree(config_str);
+	return ret;
+}
+
 /*
  * hrtimer based swevent callback
  */