diff mbox series

[RESEND,v5,4/6] coresight: Use PMU driver configuration for sink selection

Message ID 1545067306-31687-5-git-send-email-mathieu.poirier@linaro.org
State New
Headers show
Series perf: Add ioctl for PMU driver configuration | expand

Commit Message

Mathieu Poirier Dec. 17, 2018, 5:21 p.m. UTC
This patch uses the PMU driver configuration held in event::hw::drv_config
to select a sink for each event that is created (the old sysFS way of
working is kept around for backward compatibility).

By proceeding in this way a sink can be used by multiple sessions
without having to play games with entries in sysFS.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

---
 drivers/hwtracing/coresight/coresight-etm-perf.c | 74 ++++++++++++++++++++----
 1 file changed, 62 insertions(+), 12 deletions(-)

-- 
2.7.4

Comments

Suzuki K Poulose Dec. 18, 2018, 2:14 p.m. UTC | #1
Hi Mathieu,

On 17/12/2018 17:21, Mathieu Poirier wrote:
> This patch uses the PMU driver configuration held in event::hw::drv_config

> to select a sink for each event that is created (the old sysFS way of

> working is kept around for backward compatibility).

> 

> By proceeding in this way a sink can be used by multiple sessions

> without having to play games with entries in sysFS.

> 

> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> ---

>   drivers/hwtracing/coresight/coresight-etm-perf.c | 74 ++++++++++++++++++++----

>   1 file changed, 62 insertions(+), 12 deletions(-)

> 

> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c

> index f21eb28b6782..a7e1fdef07f2 100644

> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c

> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c

> @@ -4,6 +4,7 @@

>    * Author: Mathieu Poirier <mathieu.poirier@linaro.org>

>    */

>   

> +#include <linux/amba/bus.h>

>   #include <linux/coresight.h>

>   #include <linux/coresight-pmu.h>

>   #include <linux/cpumask.h>

> @@ -11,6 +12,7 @@

>   #include <linux/list.h>

>   #include <linux/mm.h>

>   #include <linux/init.h>

> +#include <linux/ioport.h>

>   #include <linux/perf_event.h>

>   #include <linux/percpu-defs.h>

>   #include <linux/slab.h>

> @@ -177,6 +179,26 @@ static void etm_free_aux(void *data)

>   	schedule_work(&event_data->work);

>   }

>   

> +static struct coresight_device *etm_drv_config_sync(struct perf_event *event)


minor nit: The name doesn't quite imply what we do here. Did you mean
s/sync/sink ?

> +{

> +	struct coresight_device *sink = NULL;

> +	struct pmu_drv_config *drv_config = perf_event_get_drv_config(event);

> +

> +	/*

> +	 * Make sure we don't race with perf_drv_config_replace() in

> +	 * kernel/events/core.c.

> +	 */

> +	raw_spin_lock(&drv_config->lock);

> +

> +	/* Copy what we got from user space if applicable. */

> +	if (drv_config->config)

> +		sink = drv_config->config;

> +

> +	raw_spin_unlock(&drv_config->lock);

> +

> +	return sink;

> +}

> +

>   static void *etm_setup_aux(struct perf_event *event, void **pages,

>   			   int nr_pages, bool overwrite)

>   {

> @@ -190,18 +212,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,

>   		return NULL;

>   	INIT_WORK(&event_data->work, free_event_data);

>   

> -	/*

> -	 * In theory nothing prevent tracers in a trace session from being

> -	 * associated with different sinks, nor having a sink per tracer.  But

> -	 * until we have HW with this kind of topology we need to assume tracers

> -	 * in a trace session are using the same sink.  Therefore go through

> -	 * the coresight bus and pick the first enabled sink.

> -	 *

> -	 * When operated from sysFS users are responsible to enable the sink

> -	 * while from perf, the perf tools will do it based on the choice made

> -	 * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.

> -	 */

> -	sink = coresight_get_enabled_sink(true);

> +	/* First get the sink config from user space. */

> +	sink = etm_drv_config_sync(event);

> +	if (!sink)

> +		sink = coresight_get_enabled_sink(true);

> +

>   	if (!sink || !sink_ops(sink)->alloc_buffer)

>   		goto err;

>   

> @@ -454,6 +469,40 @@ static void etm_addr_filters_sync(struct perf_event *event)

>   	filters->nr_filters = i;

>   }

>   

> +static int etm_drv_config_find_sink(struct device *dev, void *data)

> +{

> +	struct amba_device *adev = to_amba_device(dev->parent);

> +	struct resource *res = &adev->res;

> +	u64 value = *((u64 *)data);

> +

> +	/*

> +	 * The HW mapping of a component is unique.  If the value we've been

> +	 * given matches the component's start address, then we must have found

> +	 * the device we are looking for.

> +	 */


To be frank, I don't quite like the idea of passing the base address of the
component as the key to locate a device, (even though that is unique and readily
available). I would rather prefer a programmable way to map the keys to the
"sink" devices, which works platform agnostic (e.g, ACPI support, where the base
address is not obvious from the name). Also if we decide to use a platform
agnostic naming scheme, it becomes even more complex.

We could assign a static "id/key" exported either via the device sysfs dir or
the "pmu" dir. I prefer the latter.

Thoughts ?

And whatever we decide to choose, needs to be clearly documented under the 
Documentation/perf/cs_etm.txt.

Cheers
Suzuki
Alexander Shishkin Dec. 18, 2018, 3:21 p.m. UTC | #2
Suzuki K Poulose <suzuki.poulose@arm.com> writes:

>> +	/*

>> +	 * The HW mapping of a component is unique.  If the value we've been

>> +	 * given matches the component's start address, then we must have found

>> +	 * the device we are looking for.

>> +	 */

>

> To be frank, I don't quite like the idea of passing the base address of the

> component as the key to locate a device, (even though that is unique and readily

> available). I would rather prefer a programmable way to map the keys to the

> "sink" devices, which works platform agnostic (e.g, ACPI support, where the base

> address is not obvious from the name). Also if we decide to use a platform

> agnostic naming scheme, it becomes even more complex.

>

> We could assign a static "id/key" exported either via the device sysfs dir or

> the "pmu" dir. I prefer the latter.

>

> Thoughts ?


So, my understanding is that we have a bunch of trace sources and a
bunch of trace sinks to choose from when we set up the perf event. The
current model basically treats trace sources as PMUs and relies on the
sink configuration process to be done via sysfs, which is not ideal as
an API.

The first thing that comes to mind is: can then the sinks be made their
own PMUs, so the above can be done via the existing SET_OUTPUT ioctl?

Regards,
--
Alex
Mathieu Poirier Dec. 18, 2018, 5:34 p.m. UTC | #3
Good day Suzuki,

On Tue, 18 Dec 2018 at 07:14, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>

> Hi Mathieu,

>

> On 17/12/2018 17:21, Mathieu Poirier wrote:

> > This patch uses the PMU driver configuration held in event::hw::drv_config

> > to select a sink for each event that is created (the old sysFS way of

> > working is kept around for backward compatibility).

> >

> > By proceeding in this way a sink can be used by multiple sessions

> > without having to play games with entries in sysFS.

> >

> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> > ---

> >   drivers/hwtracing/coresight/coresight-etm-perf.c | 74 ++++++++++++++++++++----

> >   1 file changed, 62 insertions(+), 12 deletions(-)

> >

> > diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c

> > index f21eb28b6782..a7e1fdef07f2 100644

> > --- a/drivers/hwtracing/coresight/coresight-etm-perf.c

> > +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c

> > @@ -4,6 +4,7 @@

> >    * Author: Mathieu Poirier <mathieu.poirier@linaro.org>

> >    */

> >

> > +#include <linux/amba/bus.h>

> >   #include <linux/coresight.h>

> >   #include <linux/coresight-pmu.h>

> >   #include <linux/cpumask.h>

> > @@ -11,6 +12,7 @@

> >   #include <linux/list.h>

> >   #include <linux/mm.h>

> >   #include <linux/init.h>

> > +#include <linux/ioport.h>

> >   #include <linux/perf_event.h>

> >   #include <linux/percpu-defs.h>

> >   #include <linux/slab.h>

> > @@ -177,6 +179,26 @@ static void etm_free_aux(void *data)

> >       schedule_work(&event_data->work);

> >   }

> >

> > +static struct coresight_device *etm_drv_config_sync(struct perf_event *event)

>

> minor nit: The name doesn't quite imply what we do here. Did you mean

> s/sync/sink ?

>


I chose "sync" with "synchronisation" in mind.  I tried to keep things
generic since we could potentially use the same interface to convey
complex PMU configuration.  Arguably we could go with "sink" for now
and change it to "sync" in the future - I'm not strongly opinionated
on that part.

> > +{

> > +     struct coresight_device *sink = NULL;

> > +     struct pmu_drv_config *drv_config = perf_event_get_drv_config(event);

> > +

> > +     /*

> > +      * Make sure we don't race with perf_drv_config_replace() in

> > +      * kernel/events/core.c.

> > +      */

> > +     raw_spin_lock(&drv_config->lock);

> > +

> > +     /* Copy what we got from user space if applicable. */

> > +     if (drv_config->config)

> > +             sink = drv_config->config;

> > +

> > +     raw_spin_unlock(&drv_config->lock);

> > +

> > +     return sink;

> > +}

> > +

> >   static void *etm_setup_aux(struct perf_event *event, void **pages,

> >                          int nr_pages, bool overwrite)

> >   {

> > @@ -190,18 +212,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,

> >               return NULL;

> >       INIT_WORK(&event_data->work, free_event_data);

> >

> > -     /*

> > -      * In theory nothing prevent tracers in a trace session from being

> > -      * associated with different sinks, nor having a sink per tracer.  But

> > -      * until we have HW with this kind of topology we need to assume tracers

> > -      * in a trace session are using the same sink.  Therefore go through

> > -      * the coresight bus and pick the first enabled sink.

> > -      *

> > -      * When operated from sysFS users are responsible to enable the sink

> > -      * while from perf, the perf tools will do it based on the choice made

> > -      * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.

> > -      */

> > -     sink = coresight_get_enabled_sink(true);

> > +     /* First get the sink config from user space. */

> > +     sink = etm_drv_config_sync(event);

> > +     if (!sink)

> > +             sink = coresight_get_enabled_sink(true);

> > +

> >       if (!sink || !sink_ops(sink)->alloc_buffer)

> >               goto err;

> >

> > @@ -454,6 +469,40 @@ static void etm_addr_filters_sync(struct perf_event *event)

> >       filters->nr_filters = i;

> >   }

> >

> > +static int etm_drv_config_find_sink(struct device *dev, void *data)

> > +{

> > +     struct amba_device *adev = to_amba_device(dev->parent);

> > +     struct resource *res = &adev->res;

> > +     u64 value = *((u64 *)data);

> > +

> > +     /*

> > +      * The HW mapping of a component is unique.  If the value we've been

> > +      * given matches the component's start address, then we must have found

> > +      * the device we are looking for.

> > +      */

>

> To be frank, I don't quite like the idea of passing the base address of the

> component as the key to locate a device, (even though that is unique and readily

> available). I would rather prefer a programmable way to map the keys to the

> "sink" devices, which works platform agnostic (e.g, ACPI support, where the base

> address is not obvious from the name). Also if we decide to use a platform

> agnostic naming scheme, it becomes even more complex.


This mechanism doesn't rely on the naming scheme - it exploits the
"resource" interface exported for each amba device [1].  As such
whether the component is discovered using ACPI or DT, we end up on the
same amba bus and using the same interface.

[1]. https://elixir.bootlin.com/linux/latest/source/drivers/amba/bus.c#L128

>

> We could assign a static "id/key" exported either via the device sysfs dir or

> the "pmu" dir. I prefer the latter.


Not sure what you mean by "pmu" directory - would you mind expanding
on that?  Using sysfs would be quite easy but I am reluctant to create
a new id/key mechanism and introduce another entry when we have the
component address that is unique and already available in the amba
directory structure.

Thanks for taking a look,
Mathieu

>

> Thoughts ?

>

> And whatever we decide to choose, needs to be clearly documented under the

> Documentation/perf/cs_etm.txt.

>

> Cheers

> Suzuki
Mathieu Poirier Dec. 18, 2018, 6:20 p.m. UTC | #4
Hi Alex,

On Tue, 18 Dec 2018 at 08:21, Alexander Shishkin
<alexander.shishkin@linux.intel.com> wrote:
>

> Suzuki K Poulose <suzuki.poulose@arm.com> writes:

>

> >> +    /*

> >> +     * The HW mapping of a component is unique.  If the value we've been

> >> +     * given matches the component's start address, then we must have found

> >> +     * the device we are looking for.

> >> +     */

> >

> > To be frank, I don't quite like the idea of passing the base address of the

> > component as the key to locate a device, (even though that is unique and readily

> > available). I would rather prefer a programmable way to map the keys to the

> > "sink" devices, which works platform agnostic (e.g, ACPI support, where the base

> > address is not obvious from the name). Also if we decide to use a platform

> > agnostic naming scheme, it becomes even more complex.

> >

> > We could assign a static "id/key" exported either via the device sysfs dir or

> > the "pmu" dir. I prefer the latter.

> >

> > Thoughts ?

>

> So, my understanding is that we have a bunch of trace sources and a

> bunch of trace sinks to choose from when we set up the perf event. The

> current model basically treats trace sources as PMUs and relies on the

> sink configuration process to be done via sysfs, which is not ideal as

> an API.


That is correct.  Most of the grief comes from the fact that when
tracing CPU-wide session sinks are concurrently used by more than one
CPU.

>

> The first thing that comes to mind is: can then the sinks be made their

> own PMUs, so the above can be done via the existing SET_OUTPUT ioctl?


I had a serious look at the SET_OUTPUT function as part of the
research that pre-dated implementing CPU-wide support for coresight.
The core does not allow events assigned to different CPUs to use the
same mmap'ed area, which is perfectly understandable.

To me the problem of sharing a sink between CPUs is inherent to
coresight and should be fixed within that framework (see full work
here [1] if interested).  The implementation associates a sink with an
mmap'ed area, just like PT and coresight --per-thread.  Where things
differ is that for coresight CPU-wide the sink is kept in function for
as long as a CPU is using it, ignoring other request for updates or to
switch it off.  So the first CPU to use it turns the sink on and the
last turns it off after collecting trace data from it.

Thanks,
Mathieu

[1]. https://git.linaro.org/people/mathieu.poirier/coresight.git/log/?h=cpu-wide-coresight

>

> Regards,

> --

> Alex
Suzuki K Poulose Dec. 19, 2018, 9:40 a.m. UTC | #5
On 18/12/2018 17:34, Mathieu Poirier wrote:
> Good day Suzuki,

> 

> On Tue, 18 Dec 2018 at 07:14, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:

>>

>> Hi Mathieu,

>>

>> On 17/12/2018 17:21, Mathieu Poirier wrote:

>>> This patch uses the PMU driver configuration held in event::hw::drv_config

>>> to select a sink for each event that is created (the old sysFS way of

>>> working is kept around for backward compatibility).

>>>

>>> By proceeding in this way a sink can be used by multiple sessions

>>> without having to play games with entries in sysFS.

>>>

>>> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

>>> ---

>>>    drivers/hwtracing/coresight/coresight-etm-perf.c | 74 ++++++++++++++++++++----

>>>    1 file changed, 62 insertions(+), 12 deletions(-)

>>>

>>> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c

>>> index f21eb28b6782..a7e1fdef07f2 100644

>>> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c

>>> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c

>>> @@ -4,6 +4,7 @@

>>>     * Author: Mathieu Poirier <mathieu.poirier@linaro.org>

>>>     */

>>>

>>> +#include <linux/amba/bus.h>

>>>    #include <linux/coresight.h>

>>>    #include <linux/coresight-pmu.h>

>>>    #include <linux/cpumask.h>

>>> @@ -11,6 +12,7 @@

>>>    #include <linux/list.h>

>>>    #include <linux/mm.h>

>>>    #include <linux/init.h>

>>> +#include <linux/ioport.h>

>>>    #include <linux/perf_event.h>

>>>    #include <linux/percpu-defs.h>

>>>    #include <linux/slab.h>

>>> @@ -177,6 +179,26 @@ static void etm_free_aux(void *data)

>>>        schedule_work(&event_data->work);

>>>    }

>>>

>>> +static struct coresight_device *etm_drv_config_sync(struct perf_event *event)

>>

>> minor nit: The name doesn't quite imply what we do here. Did you mean

>> s/sync/sink ?

>>

> 

> I chose "sync" with "synchronisation" in mind.  I tried to keep things

> generic since we could potentially use the same interface to convey

> complex PMU configuration.  Arguably we could go with "sink" for now

> and change it to "sync" in the future - I'm not strongly opinionated

> on that part.


Ok. I thought we were trying to grab the sink information from the event
drv_config, hence something that implies that would be slightly more
reader friendly. Again, I am not too keen on it.

> 

>>> +{

>>> +     struct coresight_device *sink = NULL;

>>> +     struct pmu_drv_config *drv_config = perf_event_get_drv_config(event);

>>> +

>>> +     /*

>>> +      * Make sure we don't race with perf_drv_config_replace() in

>>> +      * kernel/events/core.c.

>>> +      */

>>> +     raw_spin_lock(&drv_config->lock);

>>> +

>>> +     /* Copy what we got from user space if applicable. */

>>> +     if (drv_config->config)

>>> +             sink = drv_config->config;

>>> +

>>> +     raw_spin_unlock(&drv_config->lock);

>>> +

>>> +     return sink;

>>> +}

>>> +

>>>    static void *etm_setup_aux(struct perf_event *event, void **pages,

>>>                           int nr_pages, bool overwrite)

>>>    {

>>> @@ -190,18 +212,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,

>>>                return NULL;

>>>        INIT_WORK(&event_data->work, free_event_data);

>>>

>>> -     /*

>>> -      * In theory nothing prevent tracers in a trace session from being

>>> -      * associated with different sinks, nor having a sink per tracer.  But

>>> -      * until we have HW with this kind of topology we need to assume tracers

>>> -      * in a trace session are using the same sink.  Therefore go through

>>> -      * the coresight bus and pick the first enabled sink.

>>> -      *

>>> -      * When operated from sysFS users are responsible to enable the sink

>>> -      * while from perf, the perf tools will do it based on the choice made

>>> -      * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.

>>> -      */

>>> -     sink = coresight_get_enabled_sink(true);

>>> +     /* First get the sink config from user space. */

>>> +     sink = etm_drv_config_sync(event);

>>> +     if (!sink)

>>> +             sink = coresight_get_enabled_sink(true);

>>> +

>>>        if (!sink || !sink_ops(sink)->alloc_buffer)

>>>                goto err;

>>>

>>> @@ -454,6 +469,40 @@ static void etm_addr_filters_sync(struct perf_event *event)

>>>        filters->nr_filters = i;

>>>    }

>>>

>>> +static int etm_drv_config_find_sink(struct device *dev, void *data)

>>> +{

>>> +     struct amba_device *adev = to_amba_device(dev->parent);

>>> +     struct resource *res = &adev->res;

>>> +     u64 value = *((u64 *)data);

>>> +

>>> +     /*

>>> +      * The HW mapping of a component is unique.  If the value we've been

>>> +      * given matches the component's start address, then we must have found

>>> +      * the device we are looking for.

>>> +      */

>>

>> To be frank, I don't quite like the idea of passing the base address of the

>> component as the key to locate a device, (even though that is unique and readily

>> available). I would rather prefer a programmable way to map the keys to the

>> "sink" devices, which works platform agnostic (e.g, ACPI support, where the base

>> address is not obvious from the name). Also if we decide to use a platform

>> agnostic naming scheme, it becomes even more complex.

> 

> This mechanism doesn't rely on the naming scheme - it exploits the

> "resource" interface exported for each amba device [1].  As such

> whether the component is discovered using ACPI or DT, we end up on the

> same amba bus and using the same interface.

> 

> [1]. https://elixir.bootlin.com/linux/latest/source/drivers/amba/bus.c#L128


Ok. The only problem with this approach would be if the devices doesn't appear
on the AMBA bus (btw, which is not true for the existing IPs).

> 

>>

>> We could assign a static "id/key" exported either via the device sysfs dir or

>> the "pmu" dir. I prefer the latter.

> 

> Not sure what you mean by "pmu" directory - would you mind expanding

> on that?  Using sysfs would be quite easy but I am reluctant to create

> a new id/key mechanism and introduce another entry when we have the

> component address that is unique and already available in the amba

> directory structure.


We could add another directory under :

/sys/bus/event_source/devices/<PMU>/
				\_ events/
				\_ format/
say :
		\_ drv_config/
		Or
		\_ sinks/

and list the sinks, eg:
# cd $sysfs_pmu_dir/sinks
# cat <name_of_the_sink>
ID_of_the_sink

Btw, I am always inclined to using some bits off one of the "config" fields
("config1" or "config2") for the sink configuration. But I understand that
you have explored that avenue and chose this approach as we have further
configurations required for complex ETM settings.

Cheers
Suzuki
Mathieu Poirier Jan. 7, 2019, 6:18 p.m. UTC | #6
On Wed, 19 Dec 2018 at 02:40, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>

>

>

> On 18/12/2018 17:34, Mathieu Poirier wrote:

> > Good day Suzuki,

> >

> > On Tue, 18 Dec 2018 at 07:14, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:

> >>

> >> Hi Mathieu,

> >>

> >> On 17/12/2018 17:21, Mathieu Poirier wrote:

> >>> This patch uses the PMU driver configuration held in event::hw::drv_config

> >>> to select a sink for each event that is created (the old sysFS way of

> >>> working is kept around for backward compatibility).

> >>>

> >>> By proceeding in this way a sink can be used by multiple sessions

> >>> without having to play games with entries in sysFS.

> >>>

> >>> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> >>> ---

> >>>    drivers/hwtracing/coresight/coresight-etm-perf.c | 74 ++++++++++++++++++++----

> >>>    1 file changed, 62 insertions(+), 12 deletions(-)

> >>>

> >>> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c

> >>> index f21eb28b6782..a7e1fdef07f2 100644

> >>> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c

> >>> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c

> >>> @@ -4,6 +4,7 @@

> >>>     * Author: Mathieu Poirier <mathieu.poirier@linaro.org>

> >>>     */

> >>>

> >>> +#include <linux/amba/bus.h>

> >>>    #include <linux/coresight.h>

> >>>    #include <linux/coresight-pmu.h>

> >>>    #include <linux/cpumask.h>

> >>> @@ -11,6 +12,7 @@

> >>>    #include <linux/list.h>

> >>>    #include <linux/mm.h>

> >>>    #include <linux/init.h>

> >>> +#include <linux/ioport.h>

> >>>    #include <linux/perf_event.h>

> >>>    #include <linux/percpu-defs.h>

> >>>    #include <linux/slab.h>

> >>> @@ -177,6 +179,26 @@ static void etm_free_aux(void *data)

> >>>        schedule_work(&event_data->work);

> >>>    }

> >>>

> >>> +static struct coresight_device *etm_drv_config_sync(struct perf_event *event)

> >>

> >> minor nit: The name doesn't quite imply what we do here. Did you mean

> >> s/sync/sink ?

> >>

> >

> > I chose "sync" with "synchronisation" in mind.  I tried to keep things

> > generic since we could potentially use the same interface to convey

> > complex PMU configuration.  Arguably we could go with "sink" for now

> > and change it to "sync" in the future - I'm not strongly opinionated

> > on that part.

>

> Ok. I thought we were trying to grab the sink information from the event

> drv_config, hence something that implies that would be slightly more

> reader friendly. Again, I am not too keen on it.

>

> >

> >>> +{

> >>> +     struct coresight_device *sink = NULL;

> >>> +     struct pmu_drv_config *drv_config = perf_event_get_drv_config(event);

> >>> +

> >>> +     /*

> >>> +      * Make sure we don't race with perf_drv_config_replace() in

> >>> +      * kernel/events/core.c.

> >>> +      */

> >>> +     raw_spin_lock(&drv_config->lock);

> >>> +

> >>> +     /* Copy what we got from user space if applicable. */

> >>> +     if (drv_config->config)

> >>> +             sink = drv_config->config;

> >>> +

> >>> +     raw_spin_unlock(&drv_config->lock);

> >>> +

> >>> +     return sink;

> >>> +}

> >>> +

> >>>    static void *etm_setup_aux(struct perf_event *event, void **pages,

> >>>                           int nr_pages, bool overwrite)

> >>>    {

> >>> @@ -190,18 +212,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,

> >>>                return NULL;

> >>>        INIT_WORK(&event_data->work, free_event_data);

> >>>

> >>> -     /*

> >>> -      * In theory nothing prevent tracers in a trace session from being

> >>> -      * associated with different sinks, nor having a sink per tracer.  But

> >>> -      * until we have HW with this kind of topology we need to assume tracers

> >>> -      * in a trace session are using the same sink.  Therefore go through

> >>> -      * the coresight bus and pick the first enabled sink.

> >>> -      *

> >>> -      * When operated from sysFS users are responsible to enable the sink

> >>> -      * while from perf, the perf tools will do it based on the choice made

> >>> -      * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.

> >>> -      */

> >>> -     sink = coresight_get_enabled_sink(true);

> >>> +     /* First get the sink config from user space. */

> >>> +     sink = etm_drv_config_sync(event);

> >>> +     if (!sink)

> >>> +             sink = coresight_get_enabled_sink(true);

> >>> +

> >>>        if (!sink || !sink_ops(sink)->alloc_buffer)

> >>>                goto err;

> >>>

> >>> @@ -454,6 +469,40 @@ static void etm_addr_filters_sync(struct perf_event *event)

> >>>        filters->nr_filters = i;

> >>>    }

> >>>

> >>> +static int etm_drv_config_find_sink(struct device *dev, void *data)

> >>> +{

> >>> +     struct amba_device *adev = to_amba_device(dev->parent);

> >>> +     struct resource *res = &adev->res;

> >>> +     u64 value = *((u64 *)data);

> >>> +

> >>> +     /*

> >>> +      * The HW mapping of a component is unique.  If the value we've been

> >>> +      * given matches the component's start address, then we must have found

> >>> +      * the device we are looking for.

> >>> +      */

> >>

> >> To be frank, I don't quite like the idea of passing the base address of the

> >> component as the key to locate a device, (even though that is unique and readily

> >> available). I would rather prefer a programmable way to map the keys to the

> >> "sink" devices, which works platform agnostic (e.g, ACPI support, where the base

> >> address is not obvious from the name). Also if we decide to use a platform

> >> agnostic naming scheme, it becomes even more complex.

> >

> > This mechanism doesn't rely on the naming scheme - it exploits the

> > "resource" interface exported for each amba device [1].  As such

> > whether the component is discovered using ACPI or DT, we end up on the

> > same amba bus and using the same interface.

> >

> > [1]. https://elixir.bootlin.com/linux/latest/source/drivers/amba/bus.c#L128

>

> Ok. The only problem with this approach would be if the devices doesn't appear

> on the AMBA bus (btw, which is not true for the existing IPs).


This is news to me and definitely requires further attention.  I'd
rather come up with a design that account for those right away than
overhaul the whole thing in a year from now.

>

> >

> >>

> >> We could assign a static "id/key" exported either via the device sysfs dir or

> >> the "pmu" dir. I prefer the latter.

> >

> > Not sure what you mean by "pmu" directory - would you mind expanding

> > on that?  Using sysfs would be quite easy but I am reluctant to create

> > a new id/key mechanism and introduce another entry when we have the

> > component address that is unique and already available in the amba

> > directory structure.

>

> We could add another directory under :

>

> /sys/bus/event_source/devices/<PMU>/

>                                 \_ events/

>                                 \_ format/

> say :

>                 \_ drv_config/

>                 Or

>                 \_ sinks/

>

> and list the sinks, eg:

> # cd $sysfs_pmu_dir/sinks

> # cat <name_of_the_sink>

> ID_of_the_sink


There is merit to this idea.  Thinking along those lines simply using
"sinks" is probably a better approach since we don't yet know how PMU
configuration will unfold.  I'm also wondering if we have to
explicitly list the ID of the sink.  The ID itself should be fetch
from the device specific entries in sysfs like the "resource" property
of sinks that show up on the AMBA bus.  Sinks that don't show up on
the AMBA bus will likely have a "reg" property of something similar
and that is where the ID should be taken from.

Lastly it may be tricky to add a new directory structure to the PMU
entry as it is generic for all PMUs in the system.  But that is up for
investigation and I will look into doing something like that.

>

> Btw, I am always inclined to using some bits off one of the "config" fields

> ("config1" or "config2") for the sink configuration. But I understand that

> you have explored that avenue and chose this approach as we have further

> configurations required for complex ETM settings.


I've had a good conversation with myself over the holidays on that
topic (hence the late-ish reply).  My original approach was to use the
same ioctl() mechanism to do sink selection and complex PMU
configuration.  But as Greg confirmed with his comment on the previous
patch doing so requires to iron out how the latter will be done on ARM
(and probably on Intel too), something that is a different deal
altogether.

Taking a step back and forgetting about complex PMU configuration for
a minute, the ID of a sink can easily be stuffed in one of the
"config" fields of the perf_event_attr structure, something I alluded
to in the patchset's cover letter.  That way we can move along with
this feature and leave the discussion on complex PMU configuration for
another day.

I'll do a respin with the above in mind.

A happy new year to you,
Mathieu

>

> Cheers

> Suzuki
Suzuki K Poulose Jan. 9, 2019, 2:55 p.m. UTC | #7
Hi Mathieu,

On 07/01/2019 18:18, Mathieu Poirier wrote:
> On Wed, 19 Dec 2018 at 02:40, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:

>>

>>

>>

>> On 18/12/2018 17:34, Mathieu Poirier wrote:

>>> Good day Suzuki,

>>>

>>> On Tue, 18 Dec 2018 at 07:14, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:


>>>> We could assign a static "id/key" exported either via the device sysfs dir or

>>>> the "pmu" dir. I prefer the latter.

>>>

>>> Not sure what you mean by "pmu" directory - would you mind expanding

>>> on that?  Using sysfs would be quite easy but I am reluctant to create

>>> a new id/key mechanism and introduce another entry when we have the

>>> component address that is unique and already available in the amba

>>> directory structure.

>>

>> We could add another directory under :

>>

>> /sys/bus/event_source/devices/<PMU>/

>>                                  \_ events/

>>                                  \_ format/

>> say :

>>                  \_ drv_config/

>>                  Or

>>                  \_ sinks/

>>

>> and list the sinks, eg:

>> # cd $sysfs_pmu_dir/sinks

>> # cat <name_of_the_sink>

>> ID_of_the_sink

> 

> There is merit to this idea.  Thinking along those lines simply using

> "sinks" is probably a better approach since we don't yet know how PMU

> configuration will unfold.  I'm also wondering if we have to

> explicitly list the ID of the sink.  The ID itself should be fetch

> from the device specific entries in sysfs like the "resource" property

> of sinks that show up on the AMBA bus.  Sinks that don't show up on

> the AMBA bus will likely have a "reg" property of something similar

> and that is where the ID should be taken from.


I would recommend this be done by the PMU driver and expose it, rather
than specifying what is expected. If we keep it simple like an integer,
which is then mapped to the sink-device, we could save some bits in the
config field for further use and a the complex set of rules for the ID.

> 

> Lastly it may be tricky to add a new directory structure to the PMU

> entry as it is generic for all PMUs in the system.  But that is up for

> investigation and I will look into doing something like that.


I don't think this is particularly problematic. The PMU driver can provide
a list of attribute_groups which should eventually appear in the device
directory under the "bus/event_source/devices/<PMU>/". And that provides
a central place for finding the SINK id for all the available sinks, rather
than scanning for a sink under all the buses (e.g, platform, amba) wherever
that might show up.

> 

>>

>> Btw, I am always inclined to using some bits off one of the "config" fields

>> ("config1" or "config2") for the sink configuration. But I understand that

>> you have explored that avenue and chose this approach as we have further

>> configurations required for complex ETM settings.

> 

> I've had a good conversation with myself over the holidays on that

> topic (hence the late-ish reply).  My original approach was to use the

> same ioctl() mechanism to do sink selection and complex PMU

> configuration.  But as Greg confirmed with his comment on the previous

> patch doing so requires to iron out how the latter will be done on ARM

> (and probably on Intel too), something that is a different deal

> altogether.

> 

> Taking a step back and forgetting about complex PMU configuration for

> a minute, the ID of a sink can easily be stuffed in one of the

> "config" fields of the perf_event_attr structure, something I alluded

> to in the patchset's cover letter.  That way we can move along with

> this feature and leave the discussion on complex PMU configuration for

> another day.


Cool ! I completely agree with this.

> 

> I'll do a respin with the above in mind.

> 

> A happy new year to you,


You too.

Cheers
Suzuki
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index f21eb28b6782..a7e1fdef07f2 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -4,6 +4,7 @@ 
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
  */
 
+#include <linux/amba/bus.h>
 #include <linux/coresight.h>
 #include <linux/coresight-pmu.h>
 #include <linux/cpumask.h>
@@ -11,6 +12,7 @@ 
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/init.h>
+#include <linux/ioport.h>
 #include <linux/perf_event.h>
 #include <linux/percpu-defs.h>
 #include <linux/slab.h>
@@ -177,6 +179,26 @@  static void etm_free_aux(void *data)
 	schedule_work(&event_data->work);
 }
 
+static struct coresight_device *etm_drv_config_sync(struct perf_event *event)
+{
+	struct coresight_device *sink = NULL;
+	struct pmu_drv_config *drv_config = perf_event_get_drv_config(event);
+
+	/*
+	 * Make sure we don't race with perf_drv_config_replace() in
+	 * kernel/events/core.c.
+	 */
+	raw_spin_lock(&drv_config->lock);
+
+	/* Copy what we got from user space if applicable. */
+	if (drv_config->config)
+		sink = drv_config->config;
+
+	raw_spin_unlock(&drv_config->lock);
+
+	return sink;
+}
+
 static void *etm_setup_aux(struct perf_event *event, void **pages,
 			   int nr_pages, bool overwrite)
 {
@@ -190,18 +212,11 @@  static void *etm_setup_aux(struct perf_event *event, void **pages,
 		return NULL;
 	INIT_WORK(&event_data->work, free_event_data);
 
-	/*
-	 * In theory nothing prevent tracers in a trace session from being
-	 * associated with different sinks, nor having a sink per tracer.  But
-	 * until we have HW with this kind of topology we need to assume tracers
-	 * in a trace session are using the same sink.  Therefore go through
-	 * the coresight bus and pick the first enabled sink.
-	 *
-	 * When operated from sysFS users are responsible to enable the sink
-	 * while from perf, the perf tools will do it based on the choice made
-	 * on the cmd line.  As such the "enable_sink" flag in sysFS is reset.
-	 */
-	sink = coresight_get_enabled_sink(true);
+	/* First get the sink config from user space. */
+	sink = etm_drv_config_sync(event);
+	if (!sink)
+		sink = coresight_get_enabled_sink(true);
+
 	if (!sink || !sink_ops(sink)->alloc_buffer)
 		goto err;
 
@@ -454,6 +469,40 @@  static void etm_addr_filters_sync(struct perf_event *event)
 	filters->nr_filters = i;
 }
 
+static int etm_drv_config_find_sink(struct device *dev, void *data)
+{
+	struct amba_device *adev = to_amba_device(dev->parent);
+	struct resource *res = &adev->res;
+	u64 value = *((u64 *)data);
+
+	/*
+	 * The HW mapping of a component is unique.  If the value we've been
+	 * given matches the component's start address, then we must have found
+	 * the device we are looking for.
+	 */
+	if (res->start == value)
+		return 1;
+
+	return 0;
+}
+
+static void *etm_drv_config_validate(struct perf_event *event, u64 value)
+{
+	struct device *dev;
+	struct coresight_device *sink;
+
+	/* Look for the device with a res->start equal to @value. */
+	dev = bus_find_device(&coresight_bustype, NULL,
+			      &value, etm_drv_config_find_sink);
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	sink = to_coresight_device(dev);
+	put_device(dev);
+
+	return sink;
+}
+
 int etm_perf_symlink(struct coresight_device *csdev, bool link)
 {
 	char entry[sizeof("cpu9999999")];
@@ -498,6 +547,7 @@  static int __init etm_perf_init(void)
 	etm_pmu.addr_filters_sync	= etm_addr_filters_sync;
 	etm_pmu.addr_filters_validate	= etm_addr_filters_validate;
 	etm_pmu.nr_addr_filters		= ETM_ADDR_CMP_MAX;
+	etm_pmu.drv_config_validate	= etm_drv_config_validate;
 
 	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
 	if (ret == 0)