[v13,5/8] perf record: Read from overwritable ring buffer

Message ID 1467613209-191781-6-git-send-email-wangnan0@huawei.com
State New
Headers show

Commit Message

Wang Nan July 4, 2016, 6:20 a.m.
overwrite_evt_state is introduced to reflect the state of overwritable
ring buffers. It is a state machine with 3 states:

    .________________(forbid)_____________.
    |                                     |
    |                                     V
 RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
    ^  ^              |   ^               |
    |  |__(forbid)____/   |___(forbid)___/|
    |                                     |
     \_________________(3)_______________/

 RUNNING      : Overwritable ring buffers are recording
 DATA_PENDING : We are required to collect overwritable ring buffers
 EMPTY        : We have collected data from those ring buffers.

 (1): Pause ring buffers for reading
 (2): Read from ring buffers
 (3): Resume ring buffers for recording

We can't avoid this complexity. Since we deliberately drop records from
overwritable ring buffer, there's no way for us to check remaining from
ring buffer itself (by checking head and old pointers). Therefore, we
need DATA_PENDING and EMPTY state to help us recording what we have done
to the ring buffer.

With the above state machine, this patch improves record__mmap_read_all(),
read from overwritable ring buffer when DATA_PENDING state is observed.

Signed-off-by: Wang Nan <wangnan0@huawei.com>

Signed-off-by: He Kuang <hekuang@huawei.com>

Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: Nilay Vaish <nilayvaish@gmail.com>
Cc: pi3orama@163.com
---
 tools/perf/builtin-record.c | 137 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 136 insertions(+), 1 deletion(-)

-- 
1.8.3.4

Comments

Wang Nan July 6, 2016, 12:03 p.m. | #1
On 2016/7/6 19:38, Jiri Olsa wrote:
> On Mon, Jul 04, 2016 at 06:20:06AM +0000, Wang Nan wrote:

>

> SNIP

>

>> +static void

>> +record__toggle_overwrite_evsels(struct record *rec,

>> +				enum overwrite_evt_state state)

>> +{

>> +	struct perf_evlist *evlist = rec->overwrite_evlist;

>> +	enum overwrite_evt_state old_state = rec->overwrite_evt_state;

>> +	enum action {

>> +		NONE,

>> +		PAUSE,

>> +		RESUME,

>> +	} action = NONE;

>> +

>> +	switch (old_state) {

>> +	case OVERWRITE_EVT_RUNNING: {

>> +		switch (state) {

>> +		case OVERWRITE_EVT_DATA_PENDING:

>> +			action = PAUSE;

>> +			break;

>> +		case OVERWRITE_EVT_RUNNING:

>> +		case OVERWRITE_EVT_EMPTY:

>> +		default:

>> +			goto state_err;

>> +		}

>> +		break;

>> +	}

>> +	case OVERWRITE_EVT_DATA_PENDING: {

>> +		switch (state) {

>> +		case OVERWRITE_EVT_EMPTY:

>> +			break;

>> +		case OVERWRITE_EVT_RUNNING:

>> +		case OVERWRITE_EVT_DATA_PENDING:

>> +		default:

>> +			goto state_err;

>> +		}

>> +		break;

>> +	}

>> +	case OVERWRITE_EVT_EMPTY: {

>> +		switch (state) {

>> +		case OVERWRITE_EVT_RUNNING:

>> +			action = RESUME;

>> +			break;

>> +		case OVERWRITE_EVT_EMPTY:

>> +		case OVERWRITE_EVT_DATA_PENDING:

>> +		default:

>> +			goto state_err;

>> +		}

>> +		break;

>> +	}

>> +	default:

>> +		WARN_ONCE(1, "Shouldn't get there\n");

>> +	}

>> +

>> +	rec->overwrite_evt_state = state;

>> +

>> +	if (!evlist)

>> +		return;

> I'd expect this check at the begining


I think even evlist is NULL the state changing is still required.
Actually, the state machine is independent with aux evlist. Even
we without overwritable evsels the state machine is still valid.
So let the state machine runs unconditionally.

> jirka
Wang Nan July 7, 2016, 4:59 a.m. | #2
On 2016/7/6 20:34, Jiri Olsa wrote:
> On Wed, Jul 06, 2016 at 08:03:28PM +0800, Wangnan (F) wrote:

>>

>> On 2016/7/6 19:38, Jiri Olsa wrote:

>>> On Mon, Jul 04, 2016 at 06:20:06AM +0000, Wang Nan wrote:

>>>

>>> SNIP

>>>

>>>> +static void

>>>> +record__toggle_overwrite_evsels(struct record *rec,

>>>> +				enum overwrite_evt_state state)

>>>> +{

>>>> +	struct perf_evlist *evlist = rec->overwrite_evlist;

>>>> +	enum overwrite_evt_state old_state = rec->overwrite_evt_state;

>>>> +	enum action {

>>>> +		NONE,

>>>> +		PAUSE,

>>>> +		RESUME,

>>>> +	} action = NONE;

>>>> +

>>>> +	switch (old_state) {

>>>> +	case OVERWRITE_EVT_RUNNING: {

>>>> +		switch (state) {

>>>> +		case OVERWRITE_EVT_DATA_PENDING:

>>>> +			action = PAUSE;

>>>> +			break;

>>>> +		case OVERWRITE_EVT_RUNNING:

>>>> +		case OVERWRITE_EVT_EMPTY:

>>>> +		default:

>>>> +			goto state_err;

>>>> +		}

>>>> +		break;

>>>> +	}

>>>> +	case OVERWRITE_EVT_DATA_PENDING: {

>>>> +		switch (state) {

>>>> +		case OVERWRITE_EVT_EMPTY:

>>>> +			break;

>>>> +		case OVERWRITE_EVT_RUNNING:

>>>> +		case OVERWRITE_EVT_DATA_PENDING:

>>>> +		default:

>>>> +			goto state_err;

>>>> +		}

>>>> +		break;

>>>> +	}

>>>> +	case OVERWRITE_EVT_EMPTY: {

>>>> +		switch (state) {

>>>> +		case OVERWRITE_EVT_RUNNING:

>>>> +			action = RESUME;

>>>> +			break;

>>>> +		case OVERWRITE_EVT_EMPTY:

>>>> +		case OVERWRITE_EVT_DATA_PENDING:

>>>> +		default:

>>>> +			goto state_err;

>>>> +		}

>>>> +		break;

>>>> +	}

>>>> +	default:

>>>> +		WARN_ONCE(1, "Shouldn't get there\n");

>>>> +	}

>>>> +

>>>> +	rec->overwrite_evt_state = state;

>>>> +

>>>> +	if (!evlist)

>>>> +		return;

>>> I'd expect this check at the begining

>> I think even evlist is NULL the state changing is still required.

>> Actually, the state machine is independent with aux evlist. Even

>> we without overwritable evsels the state machine is still valid.

>> So let the state machine runs unconditionally.

> hum, can't see that.. it's state machine to govern overwrite evlist, right?

> if there's no overwrite evlist we should keep the current processing


Not as easy as I thought. Look at following code:

>@@ -1006,8 +1122,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)

> 		}

>

> 		if (trigger_is_hit(&switch_output_trigger)) {

>+			/*

>+			 * If switch_output_trigger is hit, the data in

>+			 * overwritable ring buffer should have been collected,

>+			 * so overwrite_evt_state should be set to

>+			 * OVERWRITE_EVT_EMPTY.

>+			 *

>+			 * If SIGUSR2 raise after or during record__mmap_read_all(),

>+			 * record__mmap_read_all() didn't collect data from

>+			 * overwritable ring buffer. Read again.

>+			 */

>+			if (rec->overwrite_evt_state == OVERWRITE_EVT_RUNNING)

>+				continue;

> 			trigger_ready(&switch_output_trigger);

>

>+			/*

>+			 * Reenable events in overwrite ring buffer after

>+			 * record__mmap_read_all(): we should have collected

>+			 * data from it.

>+			 */

>+			record__toggle_overwrite_evsels(rec, OVERWRITE_EVT_RUNNING);

>+

> 			if (!quiet)

> 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",

> 					waking);


Here perf tests whether reading from overwritable ring buffer is required.
If SIGUSR2 is received just before the above trigger_is_hit, we should 
read from
overwrite ring buffer again. The OVERWRITE_EVT_RUNNING checker is for 
this reason.

Now if we stop the state machine, the state is stopped at 
OVERWRITE_EVT_RUNNING,
causes perf loops forever.

We can check rec->overwrite_evlist first, but it is ugly, since I 
believe the
overwritable state is independent to overwrite evlist. So I decide to 
introduce
a new state indicate the overwrite evlist is not ready.

Thank you.


> if it's meant to govern the mmap reading in general

> we should at least rename it

> jirka

Patch

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3b62295..2a1b3c0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -43,6 +43,30 @@ 
 #include <sys/mman.h>
 #include <asm/bug.h>
 
+/*
+ * State machine of overwrite_evt_state:
+ *
+ *    .________________(forbid)_____________.
+ *    |                                     V
+ * RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ *    ^  ^              |   ^               |
+ *    |  |__(forbid)____/   |___(forbid)___/|
+ *    |                                     |
+ *     \_________________(3)_______________/
+ *
+ * RUNNING      : Overwritable ring buffers are recording
+ * DATA_PENDING : We are required to collect overwritable ring buffers
+ * EMPTY        : We have collected data from those ring buffers.
+ *
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum overwrite_evt_state {
+	OVERWRITE_EVT_RUNNING,
+	OVERWRITE_EVT_DATA_PENDING,
+	OVERWRITE_EVT_EMPTY,
+};
 
 struct record {
 	struct perf_tool	tool;
@@ -62,6 +86,7 @@  struct record {
 	bool			buildid_all;
 	bool			timestamp_filename;
 	bool			switch_output;
+	enum overwrite_evt_state overwrite_evt_state;
 	unsigned long long	samples;
 };
 
@@ -463,6 +488,7 @@  try_again:
 
 	session->evlist = evlist;
 	perf_session__set_id_hdr_size(session);
+	rec->overwrite_evt_state = OVERWRITE_EVT_RUNNING;
 out:
 	return rc;
 }
@@ -543,6 +569,79 @@  static struct perf_event_header finished_round_event = {
 	.type = PERF_RECORD_FINISHED_ROUND,
 };
 
+static void
+record__toggle_overwrite_evsels(struct record *rec,
+				enum overwrite_evt_state state)
+{
+	struct perf_evlist *evlist = rec->overwrite_evlist;
+	enum overwrite_evt_state old_state = rec->overwrite_evt_state;
+	enum action {
+		NONE,
+		PAUSE,
+		RESUME,
+	} action = NONE;
+
+	switch (old_state) {
+	case OVERWRITE_EVT_RUNNING: {
+		switch (state) {
+		case OVERWRITE_EVT_DATA_PENDING:
+			action = PAUSE;
+			break;
+		case OVERWRITE_EVT_RUNNING:
+		case OVERWRITE_EVT_EMPTY:
+		default:
+			goto state_err;
+		}
+		break;
+	}
+	case OVERWRITE_EVT_DATA_PENDING: {
+		switch (state) {
+		case OVERWRITE_EVT_EMPTY:
+			break;
+		case OVERWRITE_EVT_RUNNING:
+		case OVERWRITE_EVT_DATA_PENDING:
+		default:
+			goto state_err;
+		}
+		break;
+	}
+	case OVERWRITE_EVT_EMPTY: {
+		switch (state) {
+		case OVERWRITE_EVT_RUNNING:
+			action = RESUME;
+			break;
+		case OVERWRITE_EVT_EMPTY:
+		case OVERWRITE_EVT_DATA_PENDING:
+		default:
+			goto state_err;
+		}
+		break;
+	}
+	default:
+		WARN_ONCE(1, "Shouldn't get there\n");
+	}
+
+	rec->overwrite_evt_state = state;
+
+	if (!evlist)
+		return;
+
+	switch (action) {
+	case PAUSE:
+		perf_evlist__pause(evlist);
+		break;
+	case RESUME:
+		perf_evlist__resume(evlist);
+		break;
+	case NONE:
+	default:
+		break;
+	}
+
+state_err:
+	return;
+}
+
 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist)
 {
 	u64 bytes_written = rec->bytes_written;
@@ -588,7 +687,13 @@  static int record__mmap_read_all(struct record *rec)
 	if (err)
 		return err;
 
-	return err;
+	if (rec->overwrite_evt_state == OVERWRITE_EVT_DATA_PENDING) {
+		err = record__mmap_read_evlist(rec, rec->overwrite_evlist);
+		if (err)
+			return err;
+		record__toggle_overwrite_evsels(rec, OVERWRITE_EVT_EMPTY);
+	}
+	return 0;
 }
 
 static void record__init_features(struct record *rec)
@@ -987,6 +1092,17 @@  static int __cmd_record(struct record *rec, int argc, const char **argv)
 	for (;;) {
 		unsigned long long hits = rec->samples;
 
+		/*
+		 * rec->overwrite_evt_state is possible to be
+		 * OVERWRITE_EVT_EMPTY here: when done == true and
+		 * hits != rec->samples in previous round.
+		 *
+		 * record__toggle_overwrite_evsels ensure we never
+		 * convert OVERWRITE_EVT_EMPTY to OVERWRITE_EVT_DATA_PENDING.
+		 */
+		if (trigger_is_hit(&switch_output_trigger) || done || draining)
+			record__toggle_overwrite_evsels(rec, OVERWRITE_EVT_DATA_PENDING);
+
 		if (record__mmap_read_all(rec) < 0) {
 			trigger_error(&auxtrace_snapshot_trigger);
 			trigger_error(&switch_output_trigger);
@@ -1006,8 +1122,27 @@  static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 
 		if (trigger_is_hit(&switch_output_trigger)) {
+			/*
+			 * If switch_output_trigger is hit, the data in
+			 * overwritable ring buffer should have been collected,
+			 * so overwrite_evt_state should be set to
+			 * OVERWRITE_EVT_EMPTY.
+			 *
+			 * If SIGUSR2 raise after or during record__mmap_read_all(),
+			 * record__mmap_read_all() didn't collect data from
+			 * overwritable ring buffer. Read again.
+			 */
+			if (rec->overwrite_evt_state == OVERWRITE_EVT_RUNNING)
+				continue;
 			trigger_ready(&switch_output_trigger);
 
+			/*
+			 * Reenable events in overwrite ring buffer after
+			 * record__mmap_read_all(): we should have collected
+			 * data from it.
+			 */
+			record__toggle_overwrite_evsels(rec, OVERWRITE_EVT_RUNNING);
+
 			if (!quiet)
 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
 					waking);