diff mbox

[v13,2/8] perf evlist: Introduce aux evlist

Message ID 1467613209-191781-3-git-send-email-wangnan0@huawei.com
State Superseded
Headers show

Commit Message

Wang Nan July 4, 2016, 6:20 a.m. UTC
An auxiliary evlist is created by perf_evlist__new_aux() using an
existing evlist as its parent. An auxiliary evlist can have its own
'struct perf_mmap', but can't have any other data. User should use its
parent instead when accessing other data.

Auxiliary evlists are containers of 'struct perf_mmap'. It is introduced
to allow its parent evlist to map different events into separated mmaps.

Following commits create an auxiliary evlist for overwritable
events, because overwritable events need a read only and backwards ring
buffer, which is different from normal events.

To achieve this goal, this patch carefully changes 'evlist' to
'evlist->parent' in all functions in the path of 'perf_evlist__mmap_ex',
except 'evlist->mmap' related operations, to make sure all evlist
modifications (like pollfd and event id hash tables) goes to original
evlist.

A 'evlist->parent' pointer is added to 'struct perf_evlist' and points to
the evlist itself for normal evlists.

Children of one evlist are linked into it so one can find all children
from its parent.

To avoid potential complexity, forbid creating aux evlist from another
aux evlist.

Improve perf_evlist__munmap_filtered(), so when recording, if an event
is terminated, unmap mmaps, from parent and children.

Signed-off-by: Wang Nan <wangnan0@huawei.com>

Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: Nilay Vaish <nilayvaish@gmail.com>
Cc: pi3orama@163.com
---
 tools/perf/util/evlist.c | 49 +++++++++++++++++++++++++++++++++++++-----------
 tools/perf/util/evlist.h | 12 ++++++++++++
 2 files changed, 50 insertions(+), 11 deletions(-)

-- 
1.8.3.4

Comments

Wang Nan July 6, 2016, 12:16 p.m. UTC | #1
On 2016/7/6 19:36, Jiri Olsa wrote:
> On Mon, Jul 04, 2016 at 06:20:03AM +0000, Wang Nan wrote:

>

> SNIP

>

>> +struct perf_evlist *perf_evlist__new_aux(struct perf_evlist *parent)

>> +{

>> +	struct perf_evlist *evlist;

>> +

>> +	if (perf_evlist__is_aux(parent)) {

>> +		pr_err("Internal error: create aux evlist from another aux evlist\n");

>> +		return NULL;

>> +	}

>> +

>> +	evlist = zalloc(sizeof(*evlist));

>> +	if (!evlist)

>> +		return NULL;

>> +

>> +	perf_evlist__init(evlist, parent->cpus, parent->threads);

>> +	evlist->parent = parent;

>> +	INIT_LIST_HEAD(&evlist->list);

>> +	list_add(&evlist->list, &parent->children);

> I understand there's some reason for separating maps with and

> without overwrite set, but I'm missing it.. why is that?


You are asking overwrite, not write_backward?

Overwrite mapping needs to be mapped without PROT_WRITE, so its
control page is also read only, so perf_evlist__mmap_consume() is
not able to use, and there's no way to tell kernel to where we have
read. Kernel overwrite old records when its full. Compare with normal
mapping: perf uses perf_evlist__mmap_consume() to tell kernel the
last byte it has read, so kernel stop writing data to it when it full,
and issues LOST event. This is the reason we need to separate maps
with and without overwrite set.

For write backward: kernel write data in different direction, so
requires map separation.

Thank you.

> thanks,

> jirka
Wang Nan July 11, 2016, 10:20 a.m. UTC | #2
On 2016/7/8 22:46, Jiri Olsa wrote:
> On Wed, Jul 06, 2016 at 08:16:52PM +0800, Wangnan (F) wrote:

>>

>> On 2016/7/6 19:36, Jiri Olsa wrote:

>>> On Mon, Jul 04, 2016 at 06:20:03AM +0000, Wang Nan wrote:

>>>

>>> SNIP

>>>

>>>> +struct perf_evlist *perf_evlist__new_aux(struct perf_evlist *parent)

>>>> +{

>>>> +	struct perf_evlist *evlist;

>>>> +

>>>> +	if (perf_evlist__is_aux(parent)) {

>>>> +		pr_err("Internal error: create aux evlist from another aux evlist\n");

>>>> +		return NULL;

>>>> +	}

>>>> +

>>>> +	evlist = zalloc(sizeof(*evlist));

>>>> +	if (!evlist)

>>>> +		return NULL;

>>>> +

>>>> +	perf_evlist__init(evlist, parent->cpus, parent->threads);

>>>> +	evlist->parent = parent;

>>>> +	INIT_LIST_HEAD(&evlist->list);

>>>> +	list_add(&evlist->list, &parent->children);

>>> I understand there's some reason for separating maps with and

>>> without overwrite set, but I'm missing it.. why is that?

>> You are asking overwrite, not write_backward?

>>

>> Overwrite mapping needs to be mapped without PROT_WRITE, so its

>> control page is also read only, so perf_evlist__mmap_consume() is

>> not able to use, and there's no way to tell kernel to where we have

>> read. Kernel overwrite old records when its full. Compare with normal

>> mapping: perf uses perf_evlist__mmap_consume() to tell kernel the

>> last byte it has read, so kernel stop writing data to it when it full,

>> and issues LOST event. This is the reason we need to separate maps

>> with and without overwrite set.

>>

>> For write backward: kernel write data in different direction, so

>> requires map separation.

> I dont like the idea of duplicating whole perf_evlist

> in order just to map some events with overwrite/backward

>

> perf_evlist carries all the other info about events,

> not just memory maping..

>

> I think it'd be better to do it some other way, like:

>

>    - we have mmaps for events/evsels, so you're able to map

>      it differently with or without PROT_WRITE even in current

>      design.. there's struct perf_mmap that can carry that info

>      then it's the matter of reading/processing those maps

>      that needs to change.. new perf_evlist interface

>

>    - we could keep separate struct perf_mmap arrays for forward

>      and backward/overwrite maps

>

>    - ...

>

> I understand both mapping need different treatment,

> but I think that should be encapsulated within the

> struct perf_evlist interface


I don't like it either, but aux_evlist is the easiest way to
do this work. Other potential solutions require heavy API changes.

Thank you.
diff mbox

Patch

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7228596..7000fe2 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -41,10 +41,12 @@  void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
 	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
 		INIT_HLIST_HEAD(&evlist->heads[i]);
 	INIT_LIST_HEAD(&evlist->entries);
+	INIT_LIST_HEAD(&evlist->children);
 	perf_evlist__set_maps(evlist, cpus, threads);
 	fdarray__init(&evlist->pollfd, 64);
 	evlist->workload.pid = -1;
 	evlist->backward = false;
+	evlist->parent = evlist;
 }
 
 struct perf_evlist *perf_evlist__new(void)
@@ -490,13 +492,17 @@  static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
 					 void *arg __maybe_unused)
 {
 	struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
+	struct perf_evlist *child;
 
 	perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
+	list_for_each_entry(child, &evlist->children, list)
+		perf_evlist__mmap_put(child, fda->priv[fd].idx);
+
 }
 
 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
 {
-	return fdarray__filter(&evlist->pollfd, revents_and_mask,
+	return fdarray__filter(&evlist->parent->pollfd, revents_and_mask,
 			       perf_evlist__munmap_filtered, NULL);
 }
 
@@ -1015,7 +1021,7 @@  static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 	struct perf_evsel *evsel;
 	int revent;
 
-	evlist__for_each_entry(evlist, evsel) {
+	evlist__for_each_entry(evlist->parent, evsel) {
 		int fd;
 
 		if (!!evsel->attr.write_backward != (evlist->overwrite && evlist->backward))
@@ -1047,16 +1053,16 @@  static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 		 * Therefore don't add it for polling.
 		 */
 		if (!evsel->system_wide &&
-		    __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) {
+		    __perf_evlist__add_pollfd(evlist->parent, fd, idx, revent) < 0) {
 			perf_evlist__mmap_put(evlist, idx);
 			return -1;
 		}
 
 		if (evsel->attr.read_format & PERF_FORMAT_ID) {
-			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+			if (perf_evlist__id_add_fd(evlist->parent, evsel, cpu, thread,
 						   fd) < 0)
 				return -1;
-			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
+			perf_evlist__set_sid_idx(evlist->parent, evsel, idx, cpu,
 						 thread);
 		}
 	}
@@ -1097,13 +1103,13 @@  static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
 					struct mmap_params *mp)
 {
 	int thread;
-	int nr_threads = thread_map__nr(evlist->threads);
+	int nr_threads = thread_map__nr(evlist->parent->threads);
 
 	pr_debug2("perf event ring buffer mmapped per thread\n");
 	for (thread = 0; thread < nr_threads; thread++) {
 		int output = -1;
 
-		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
+		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist->parent, thread,
 					      false);
 
 		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
@@ -1242,8 +1248,8 @@  int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 bool auxtrace_overwrite)
 {
 	struct perf_evsel *evsel;
-	const struct cpu_map *cpus = evlist->cpus;
-	const struct thread_map *threads = evlist->threads;
+	const struct cpu_map *cpus = evlist->parent->cpus;
+	const struct thread_map *threads = evlist->parent->threads;
 	struct mmap_params mp = {
 		.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
 	};
@@ -1251,7 +1257,7 @@  int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
 		return -ENOMEM;
 
-	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+	if (evlist->parent->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist->parent) < 0)
 		return -ENOMEM;
 
 	evlist->overwrite = overwrite;
@@ -1262,7 +1268,7 @@  int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
 				   auxtrace_pages, auxtrace_overwrite);
 
-	evlist__for_each_entry(evlist, evsel) {
+	evlist__for_each_entry(evlist->parent, evsel) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
 		    evsel->sample_id == NULL &&
 		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
@@ -1919,3 +1925,24 @@  perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
 
 	return NULL;
 }
+
+struct perf_evlist *perf_evlist__new_aux(struct perf_evlist *parent)
+{
+	struct perf_evlist *evlist;
+
+	if (perf_evlist__is_aux(parent)) {
+		pr_err("Internal error: create aux evlist from another aux evlist\n");
+		return NULL;
+	}
+
+	evlist = zalloc(sizeof(*evlist));
+	if (!evlist)
+		return NULL;
+
+	perf_evlist__init(evlist, parent->cpus, parent->threads);
+	evlist->parent = parent;
+	INIT_LIST_HEAD(&evlist->list);
+	list_add(&evlist->list, &parent->children);
+
+	return evlist;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 872912b..99bcc02 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -37,6 +37,10 @@  struct perf_mmap {
 
 struct perf_evlist {
 	struct list_head entries;
+	union {
+		struct list_head children;
+		struct list_head list;
+	};
 	struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
 	int		 nr_entries;
 	int		 nr_groups;
@@ -60,8 +64,14 @@  struct perf_evlist {
 	struct perf_evsel *selected;
 	struct events_stats stats;
 	struct perf_env	*env;
+	struct perf_evlist *parent;
 };
 
+static inline bool perf_evlist__is_aux(struct perf_evlist *evlist)
+{
+	return evlist->parent != evlist;
+}
+
 struct perf_evsel_str_handler {
 	const char *name;
 	void	   *handler;
@@ -70,6 +80,8 @@  struct perf_evsel_str_handler {
 struct perf_evlist *perf_evlist__new(void);
 struct perf_evlist *perf_evlist__new_default(void);
 struct perf_evlist *perf_evlist__new_dummy(void);
+struct perf_evlist *perf_evlist__new_aux(struct perf_evlist *);
+
 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
 		       struct thread_map *threads);
 void perf_evlist__exit(struct perf_evlist *evlist);