diff mbox

[Initial,Commit] API for Direct access of perf counters from Userspace

Message ID 1411724805-6304-2-git-send-email-yogesh.tillu@linaro.org
State New
Headers show

Commit Message

Yogesh Tillu Sept. 26, 2014, 9:46 a.m. UTC
Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org>
---
 helper/include/odph_perf.h | 118 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 helper/include/odph_perf.h

Comments

Bill Fischofer Sept. 26, 2014, 10:31 a.m. UTC | #1
To be consistent with ODP naming conventions, I'd suggest changing the
names of these routines:

s/odph_perf_open_counter/odph_perf_counter_open/
s/odph_perf_read_counter/odph_perf_counter_read/
s/odph_perf_write_counter/odph_perf_counter_write/
s/odph_perf_close_counter/odph_perf_counter_close/

Also, is the intent here to only support access to a single perf counter
(PMCNTENSET_EL0)?  I'd expect these routines to include an argument that
references the specific counter of interest rather than a void argument.

Bill


On Fri, Sep 26, 2014 at 4:46 AM, Yogesh Tillu <yogesh.tillu@linaro.org>
wrote:

> Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org>
> ---
>  helper/include/odph_perf.h | 118
> +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 118 insertions(+)
>  create mode 100644 helper/include/odph_perf.h
>
> diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h
> new file mode 100644
> index 0000000..3560924
> --- /dev/null
> +++ b/helper/include/odph_perf.h
> @@ -0,0 +1,118 @@
> +/* Copyright (c) 2014, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +
> +/**
> + * @file
> + *
> + * ODP Performance Counter Direct access from Userspace Header
> + */
> +
> +#ifndef ODPH_PERF_H_
> +#define ODPH_PERF_H_
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#if __aarch64__ /**< Check for ArmV8 */
> +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg */
> +#endif
> +
> +/**
> + * Open Performance counter
> + *
> + * @note api to enable performance counters in system, this function does
> + * enable sequence for respective arm versions
> + *
> + * @param void
> + *
> + * @return 0 if open successfully, otherwise -1
> + */
> +static inline int odph_perf_open_counter(void)
> +{
> +#if __aarch64__
> +/* Performance Monitors Count Enable Set register bit 31:0 disable,
> +1 enable */
> +asm volatile("msr pmcntenset_el0, %0" : : "r"
> (ARMV8_PMCNTENSET_EL0_ENABLE));
> +return 0;
> +#elif defined(__ARM_ARCH_7A__)
> +       return 0;
> +#else
> +       #error Unsupported Architecture
> +       return -1;
> +#endif
> +}
> +
> +/**
> + * Read Performance counter
> + *
> + * @note api to read performance cycle counters in system
> + *
> + * @param void
> + *
> + * @return cycle counter value if read successfully, otherwise -1
> + */
> +static inline uint64_t
> +odph_perf_read_counter(void)
> +{
> +uint64_t ret;
> +#if  defined __aarch64__
> +       asm volatile("mrs %0, pmccntr_el0" : "=r" (ret));
> +       return ret;
> +#elif defined(__ARM_ARCH_7A__)
> +       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret));
> +       return ret;
> +#else
> +       #error Unsupported architecture/compiler!
> +       return -1;
> +#endif
> +}
> +
> +/**
> + * Write Performance counter
> + *
> + * @note api to write value to Performance counter,
> + * NA for now
> + *
> + * @param void
> + *
> + * @return 0 if written successfully, otherwise -1
> + */
> +static inline int odph_perf_write_counter(void)
> +{
> +/* TODO: Stub, might be populated based on requirnment */
> +}
> +
> +/**
> + * Close Performance counter
> + *
> + * @note api to perform close sequnce for cycle counters in system
> + *
> + * @param void
> + *
> + * @return 0 if close successfully, otherwise -1
> + */
> +static inline int odph_perf_close_counter(void)
> +{
> +#if  defined __aarch64__
> +/* Performance Monitors Count Enable Set register bit 31:0 disable,
> +1 enable */
> +       asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31));
> +       /* Note above statement does not really clearing register. TODO */
> +       return 0;
> +#elif defined(__ARM_ARCH_7A__)
> +       return 0;
> +#else
> +       #error Unsupported architecture/compiler!
> +       return -1;
> +#endif
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* ODPH_PERF_H_ */
> --
> 1.9.1
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
Mike Holmes Sept. 26, 2014, 10:52 a.m. UTC | #2
I assume that this is an initial prototype,.

The open/close semantics suggest to me that I should open the name of the
counter I want and it returns a handle that I can read and write to access
the counter.
This also addresses Bills point that we need to be able to access any one
of potentially hundreds of counters in a specific SoC fastpath HW.

Mike

On 26 September 2014 06:31, Bill Fischofer <bill.fischofer@linaro.org>
wrote:

> To be consistent with ODP naming conventions, I'd suggest changing the
> names of these routines:
>
> s/odph_perf_open_counter/odph_perf_counter_open/
> s/odph_perf_read_counter/odph_perf_counter_read/
> s/odph_perf_write_counter/odph_perf_counter_write/
> s/odph_perf_close_counter/odph_perf_counter_close/
>
> Also, is the intent here to only support access to a single perf counter
> (PMCNTENSET_EL0)?  I'd expect these routines to include an argument that
> references the specific counter of interest rather than a void argument.
>
> Bill
>
>
> On Fri, Sep 26, 2014 at 4:46 AM, Yogesh Tillu <yogesh.tillu@linaro.org>
> wrote:
>
>> Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org>
>> ---
>>  helper/include/odph_perf.h | 118
>> +++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 118 insertions(+)
>>  create mode 100644 helper/include/odph_perf.h
>>
>> diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h
>> new file mode 100644
>> index 0000000..3560924
>> --- /dev/null
>> +++ b/helper/include/odph_perf.h
>> @@ -0,0 +1,118 @@
>> +/* Copyright (c) 2014, Linaro Limited
>> + * All rights reserved.
>> + *
>> + * SPDX-License-Identifier:     BSD-3-Clause
>> + */
>> +
>> +
>> +/**
>> + * @file
>> + *
>> + * ODP Performance Counter Direct access from Userspace Header
>> + */
>> +
>> +#ifndef ODPH_PERF_H_
>> +#define ODPH_PERF_H_
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#if __aarch64__ /**< Check for ArmV8 */
>> +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg */
>> +#endif
>> +
>> +/**
>> + * Open Performance counter
>> + *
>> + * @note api to enable performance counters in system, this function does
>> + * enable sequence for respective arm versions
>> + *
>> + * @param void
>> + *
>> + * @return 0 if open successfully, otherwise -1
>> + */
>> +static inline int odph_perf_open_counter(void)
>> +{
>> +#if __aarch64__
>> +/* Performance Monitors Count Enable Set register bit 31:0 disable,
>> +1 enable */
>> +asm volatile("msr pmcntenset_el0, %0" : : "r"
>> (ARMV8_PMCNTENSET_EL0_ENABLE));
>> +return 0;
>> +#elif defined(__ARM_ARCH_7A__)
>> +       return 0;
>> +#else
>> +       #error Unsupported Architecture
>> +       return -1;
>> +#endif
>> +}
>> +
>> +/**
>> + * Read Performance counter
>> + *
>> + * @note api to read performance cycle counters in system
>> + *
>> + * @param void
>> + *
>> + * @return cycle counter value if read successfully, otherwise -1
>> + */
>> +static inline uint64_t
>> +odph_perf_read_counter(void)
>> +{
>> +uint64_t ret;
>> +#if  defined __aarch64__
>> +       asm volatile("mrs %0, pmccntr_el0" : "=r" (ret));
>> +       return ret;
>> +#elif defined(__ARM_ARCH_7A__)
>> +       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret));
>> +       return ret;
>> +#else
>> +       #error Unsupported architecture/compiler!
>> +       return -1;
>> +#endif
>> +}
>> +
>> +/**
>> + * Write Performance counter
>> + *
>> + * @note api to write value to Performance counter,
>> + * NA for now
>> + *
>> + * @param void
>> + *
>> + * @return 0 if written successfully, otherwise -1
>> + */
>> +static inline int odph_perf_write_counter(void)
>> +{
>> +/* TODO: Stub, might be populated based on requirnment */
>> +}
>> +
>> +/**
>> + * Close Performance counter
>> + *
>> + * @note api to perform close sequnce for cycle counters in system
>> + *
>> + * @param void
>> + *
>> + * @return 0 if close successfully, otherwise -1
>> + */
>> +static inline int odph_perf_close_counter(void)
>> +{
>> +#if  defined __aarch64__
>> +/* Performance Monitors Count Enable Set register bit 31:0 disable,
>> +1 enable */
>> +       asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31));
>> +       /* Note above statement does not really clearing register. TODO */
>> +       return 0;
>> +#elif defined(__ARM_ARCH_7A__)
>> +       return 0;
>> +#else
>> +       #error Unsupported architecture/compiler!
>> +       return -1;
>> +#endif
>> +}
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* ODPH_PERF_H_ */
>> --
>> 1.9.1
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
Ola Liljedahl Sept. 26, 2014, 11:24 a.m. UTC | #3
Use cases, prerequisites, requirements, limitations, proposed design...
We don't have to write a twenty page document for this but we need to have
some clarity and agreement here.

Application running in user space wants to measure how many HW events (such
as cache misses or CPU cycles) have occurred between different points in
the execution. This is useful for profiling specific code segments (e.g.
from ODP dispatch to return) and is not used only for optimizing throughput
but also in order to verify real-time (latency and jitter) characteristics
as ODP in bare-metal-like environments may be used to replace dedicated
real-time executives. Describing the use cases is important as Linux
beliebers always will have problems understanding why the current solutions
don't work for us and will be sceptical to accepting our patches.

Must be able to enable user space access to PMU counters (as is needed on
ARM). This is probably separate from the user space library. Preferably
done without a kernel module, I can't understand why the kernel should not
have this feature built in.
Must be able to allocate and configure one *or several* HW PMU counters
with CPU architecture-defined or CPU implementation specific event types.
OK to be limited by physical resources (event types, number of PMU
counters).
Must be able to read those counters from user space with minimal SW
overhead (i.e. no system calls as this introduces too much overhead as has
been demonstrated by Yogesh).
Expect application threads to be CPU affine and not be subject to CPU
migration. No need to support migration of PMU configuration or counter
values between CPU's. CPU migration of a thread to some other CPU which
does not have user space access enable will lead to a crash (on ARM).
Perhaps the library should automatically configure CPU affinity or warn if
the thread is allowed to be migrated?
Probably PMU setup is per thread and not per process. Different threads
(running on different CPU's) could be doing event counting (with different
configurations) simultaneously. Or possible the setup is per-process (i.e.
shared between threads) but each thread/CPU has its own set of counters?
Discuss.
Stop counting and free resources, allowing other applications or threads to
reuse the HW resources.
We probably want the kernel to manage the HW resources (as the kernel is
also a user of these resources), the user space library will have to
allocate (or pre-allocate) PMU counters from the kernel. This can be done
using perf_event_open() but has problems today since the kernel just
allocates a virtual counter for you, not a physical counter. The virtual
counter may be remapped to different physical counters.

If we want to support PMU's outside of the CPU (e.g. on the interconnect or
memory controller or accelerators), we probably want a thread-agnostic API
as these PMU's have nothing to do with specific threads. I am not sure we
even need a "fastpath" user space API for such PMU's, the kernel support
and API might be sufficient.


On 26 September 2014 12:52, Mike Holmes <mike.holmes@linaro.org> wrote:

> I assume that this is an initial prototype,.
>
> The open/close semantics suggest to me that I should open the name of the
> counter I want and it returns a handle that I can read and write to access
> the counter.
> This also addresses Bills point that we need to be able to access any one
> of potentially hundreds of counters in a specific SoC fastpath HW.
>
> Mike
>
> On 26 September 2014 06:31, Bill Fischofer <bill.fischofer@linaro.org>
> wrote:
>
>> To be consistent with ODP naming conventions, I'd suggest changing the
>> names of these routines:
>>
>> s/odph_perf_open_counter/odph_perf_counter_open/
>> s/odph_perf_read_counter/odph_perf_counter_read/
>> s/odph_perf_write_counter/odph_perf_counter_write/
>> s/odph_perf_close_counter/odph_perf_counter_close/
>>
>> Also, is the intent here to only support access to a single perf counter
>> (PMCNTENSET_EL0)?  I'd expect these routines to include an argument that
>> references the specific counter of interest rather than a void argument.
>>
>> Bill
>>
>>
>> On Fri, Sep 26, 2014 at 4:46 AM, Yogesh Tillu <yogesh.tillu@linaro.org>
>> wrote:
>>
>>> Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org>
>>> ---
>>>  helper/include/odph_perf.h | 118
>>> +++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 118 insertions(+)
>>>  create mode 100644 helper/include/odph_perf.h
>>>
>>> diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h
>>> new file mode 100644
>>> index 0000000..3560924
>>> --- /dev/null
>>> +++ b/helper/include/odph_perf.h
>>> @@ -0,0 +1,118 @@
>>> +/* Copyright (c) 2014, Linaro Limited
>>> + * All rights reserved.
>>> + *
>>> + * SPDX-License-Identifier:     BSD-3-Clause
>>> + */
>>> +
>>> +
>>> +/**
>>> + * @file
>>> + *
>>> + * ODP Performance Counter Direct access from Userspace Header
>>> + */
>>> +
>>> +#ifndef ODPH_PERF_H_
>>> +#define ODPH_PERF_H_
>>> +#ifdef __cplusplus
>>> +extern "C" {
>>> +#endif
>>> +
>>> +#if __aarch64__ /**< Check for ArmV8 */
>>> +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg
>>> */
>>> +#endif
>>> +
>>> +/**
>>> + * Open Performance counter
>>> + *
>>> + * @note api to enable performance counters in system, this function
>>> does
>>> + * enable sequence for respective arm versions
>>> + *
>>> + * @param void
>>> + *
>>> + * @return 0 if open successfully, otherwise -1
>>> + */
>>> +static inline int odph_perf_open_counter(void)
>>> +{
>>> +#if __aarch64__
>>> +/* Performance Monitors Count Enable Set register bit 31:0 disable,
>>> +1 enable */
>>> +asm volatile("msr pmcntenset_el0, %0" : : "r"
>>> (ARMV8_PMCNTENSET_EL0_ENABLE));
>>> +return 0;
>>> +#elif defined(__ARM_ARCH_7A__)
>>> +       return 0;
>>> +#else
>>> +       #error Unsupported Architecture
>>> +       return -1;
>>> +#endif
>>> +}
>>> +
>>> +/**
>>> + * Read Performance counter
>>> + *
>>> + * @note api to read performance cycle counters in system
>>> + *
>>> + * @param void
>>> + *
>>> + * @return cycle counter value if read successfully, otherwise -1
>>> + */
>>> +static inline uint64_t
>>> +odph_perf_read_counter(void)
>>> +{
>>> +uint64_t ret;
>>> +#if  defined __aarch64__
>>> +       asm volatile("mrs %0, pmccntr_el0" : "=r" (ret));
>>> +       return ret;
>>> +#elif defined(__ARM_ARCH_7A__)
>>> +       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret));
>>> +       return ret;
>>> +#else
>>> +       #error Unsupported architecture/compiler!
>>> +       return -1;
>>> +#endif
>>> +}
>>> +
>>> +/**
>>> + * Write Performance counter
>>> + *
>>> + * @note api to write value to Performance counter,
>>> + * NA for now
>>> + *
>>> + * @param void
>>> + *
>>> + * @return 0 if written successfully, otherwise -1
>>> + */
>>> +static inline int odph_perf_write_counter(void)
>>> +{
>>> +/* TODO: Stub, might be populated based on requirnment */
>>> +}
>>> +
>>> +/**
>>> + * Close Performance counter
>>> + *
>>> + * @note api to perform close sequnce for cycle counters in system
>>> + *
>>> + * @param void
>>> + *
>>> + * @return 0 if close successfully, otherwise -1
>>> + */
>>> +static inline int odph_perf_close_counter(void)
>>> +{
>>> +#if  defined __aarch64__
>>> +/* Performance Monitors Count Enable Set register bit 31:0 disable,
>>> +1 enable */
>>> +       asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31));
>>> +       /* Note above statement does not really clearing register. TODO
>>> */
>>> +       return 0;
>>> +#elif defined(__ARM_ARCH_7A__)
>>> +       return 0;
>>> +#else
>>> +       #error Unsupported architecture/compiler!
>>> +       return -1;
>>> +#endif
>>> +}
>>> +
>>> +#ifdef __cplusplus
>>> +}
>>> +#endif
>>> +
>>> +#endif /* ODPH_PERF_H_ */
>>> --
>>> 1.9.1
>>>
>>>
>>> _______________________________________________
>>> lng-odp mailing list
>>> lng-odp@lists.linaro.org
>>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>>
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>>
>
>
> --
> *Mike Holmes*
> Linaro Technical Manager / Lead
> LNG - ODP
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
diff mbox

Patch

diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h
new file mode 100644
index 0000000..3560924
--- /dev/null
+++ b/helper/include/odph_perf.h
@@ -0,0 +1,118 @@ 
+/* Copyright (c) 2014, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+
+/**
+ * @file
+ *
+ * ODP Performance Counter Direct access from Userspace Header
+ */
+
+#ifndef ODPH_PERF_H_
+#define ODPH_PERF_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __aarch64__ /**< Check for ArmV8 */
+#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg */
+#endif
+
+/**
+ * Open Performance counter
+ *
+ * @note api to enable performance counters in system, this function does
+ * enable sequence for respective arm versions
+ *
+ * @param void
+ *
+ * @return 0 if open successfully, otherwise -1
+ */
+static inline int odph_perf_open_counter(void)
+{
+#if __aarch64__
+/* Performance Monitors Count Enable Set register bit 31:0 disable,
+1 enable */
+asm volatile("msr pmcntenset_el0, %0" : : "r" (ARMV8_PMCNTENSET_EL0_ENABLE));
+return 0;
+#elif defined(__ARM_ARCH_7A__)
+	return 0;
+#else
+	#error Unsupported Architecture
+	return -1;
+#endif
+}
+
+/**
+ * Read Performance counter
+ *
+ * @note api to read performance cycle counters in system
+ *
+ * @param void
+ *
+ * @return cycle counter value if read successfully, otherwise -1
+ */
+static inline uint64_t
+odph_perf_read_counter(void)
+{
+uint64_t ret;
+#if  defined __aarch64__
+	asm volatile("mrs %0, pmccntr_el0" : "=r" (ret));
+	return ret;
+#elif defined(__ARM_ARCH_7A__)
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret));
+	return ret;
+#else
+	#error Unsupported architecture/compiler!
+	return -1;
+#endif
+}
+
+/**
+ * Write Performance counter
+ *
+ * @note api to write value to Performance counter,
+ * NA for now
+ *
+ * @param void
+ *
+ * @return 0 if written successfully, otherwise -1
+ */
+static inline int odph_perf_write_counter(void)
+{
+/* TODO: Stub, might be populated based on requirnment */
+}
+
+/**
+ * Close Performance counter
+ *
+ * @note api to perform close sequnce for cycle counters in system
+ *
+ * @param void
+ *
+ * @return 0 if close successfully, otherwise -1
+ */
+static inline int odph_perf_close_counter(void)
+{
+#if  defined __aarch64__
+/* Performance Monitors Count Enable Set register bit 31:0 disable,
+1 enable */
+	asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31));
+	/* Note above statement does not really clearing register. TODO */
+	return 0;
+#elif defined(__ARM_ARCH_7A__)
+	return 0;
+#else
+	#error Unsupported architecture/compiler!
+	return -1;
+#endif
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ODPH_PERF_H_ */