diff mbox series

[11/12] trace-cmd analyze: Account for dropped events

Message ID 20220324025726.1727204-12-rostedt@goodmis.org
State New
Headers show
Series trace-cmd: Add trace-cmd analyze command | expand

Commit Message

Steven Rostedt March 24, 2022, 2:57 a.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

If the trace.dat file contains dropped events due to the reader not being
able to keep up with the writer, account for it. Update the CPUs where
there are dropped events and tag the tasks that are on those CPUs.

Add to the output the fact that events were dropped.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tracecmd/trace-analyze.c | 87 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 84 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/tracecmd/trace-analyze.c b/tracecmd/trace-analyze.c
index 2603c360c404..2f1b2720810e 100644
--- a/tracecmd/trace-analyze.c
+++ b/tracecmd/trace-analyze.c
@@ -19,6 +19,8 @@  struct task_item;
 struct cpu_data {
 	unsigned long long	start_ts;
 	unsigned long long	last_ts;
+	unsigned long long	missed_events;
+	unsigned long long	unknown_missed_events;
 	struct task_item	*idle_task;
 	struct analysis_data	*data;
 	struct trace_hash	tasks;
@@ -27,6 +29,7 @@  struct cpu_data {
 	int			current_pid;
 	int			first_pid;
 	bool			last_idle;
+	bool			keep_start;
 };
 
 struct analysis_data {
@@ -45,6 +48,7 @@  struct analysis_data {
 	int			nr_tasks;
 	int			allocated_cpus;
 	int			cpus;
+	bool			missed_events;
 };
 
 struct sched_timings {
@@ -71,6 +75,7 @@  struct task_item {
 	int			last_cpu;
 	int			last_state;
 	bool			woken;
+	bool			dropped_events;
 };
 
 struct task_cpu_item {
@@ -306,6 +311,9 @@  static void update_cpu_times(struct cpu_data *cpu_data,
 		update_idle_task(cpu_data, task, record->ts);
 		if (record->ts < cpu_data->data->start_ts)
 			cpu_data->data->start_ts = record->ts;
+		/* If we lost events before here, keep the current start time */
+		if (record->missed_events)
+			cpu_data->keep_start = true;
 		return;
 	}
 
@@ -326,6 +334,10 @@  static void update_first_pid(struct cpu_data *cpu_data)
 	unsigned long long start_ts = cpu_data->data->start_ts;
 	unsigned long long delta;
 
+	/* If the CPU started with dropped events, do not update */
+	if (cpu_data->keep_start)
+		return;
+
 	cpu_task = get_cpu_task(cpu_data, cpu_data->first_pid);
 	task = cpu_task->task;
 	delta = cpu_data->start_ts - start_ts;
@@ -464,6 +476,50 @@  static void process_page_fault(struct analysis_data *data,
 	task->faulted++;
 }
 
+static void handle_missed_events(struct analysis_data *data,
+				 struct tep_record *record)
+{
+	struct trace_hash_item **bucket;
+	struct trace_hash_item *item;
+	struct task_cpu_item *cpu_task;
+	struct cpu_data *cpu_data;
+	struct task_item *task;
+	long long missed_events = record->missed_events;
+	int cpu;
+
+	/* If this is the first event for the CPU do nothing here */
+	if (record->cpu >= data->allocated_cpus ||
+	    !data->cpu_data[record->cpu].data)
+		return;
+
+	data->missed_events = true;
+
+	cpu_data = &data->cpu_data[record->cpu];
+	cpu = cpu_data->cpu;
+
+	if (missed_events > 0)
+		cpu_data->missed_events += missed_events;
+	else
+		cpu_data->unknown_missed_events++;
+
+	/* Need to reset all timings */
+	trace_hash_for_each_bucket(bucket, &cpu_data->tasks) {
+		trace_hash_for_each_item(item, bucket) {
+			cpu_task = task_cpu_from_hash(item);
+			task = cpu_task->task;
+			if (task->last_cpu != cpu)
+				continue;
+			task->preempt.last = record->ts;
+			task->sleep.last = record->ts;
+			task->blocked.last = record->ts;
+			task->other.last = record->ts;
+			task->woken = false;
+			task->last_state = -1;
+			task->dropped_events = true;
+		}
+	}
+}
+
 static bool match_type(int type, struct tep_event *event)
 {
 	return event && type == event->id;
@@ -477,6 +533,9 @@  static void process_cpu(struct analysis_data *data,
 	int type;
 	int pid;
 
+	if (record->missed_events)
+		handle_missed_events(data, record);
+
 	pid = tep_data_pid(tep, record);
 	if (pid < 0) /* Warn? */
 		return;
@@ -624,12 +683,29 @@  static void print_cpu_data(struct tep_handle *tep, struct cpu_data *cpu_data)
 	int nr_tasks;
 	int i = 0;
 
-	data = cpu_data->data;
-	total_time = data->last_ts - data->start_ts;
-
 	printf("\nCPU %d\n", cpu_data->cpu);
 	printf("-------\n");
 
+	if (cpu_data->missed_events || cpu_data->unknown_missed_events) {
+		printf("Missed: %llu events", cpu_data->missed_events);
+		if (cpu_data->unknown_missed_events)
+			printf(" + %llu drops of unknown events",
+			       cpu_data->unknown_missed_events);
+		printf("\n");
+	}
+
+	data = cpu_data->data;
+
+	if (cpu_data->keep_start) {
+		total_time = data->last_ts - cpu_data->start_ts;
+		printf("[ Events dropped before start of this CPU ]\n");
+		printf("Total time: ");
+		print_time(total_time, '_');
+		printf("\n");
+	} else {
+		total_time = data->last_ts - data->start_ts;
+	}
+
 	cpu_tasks = malloc(sizeof(*cpu_tasks) * cpu_data->nr_tasks);
 
 	if (!cpu_tasks)
@@ -678,6 +754,8 @@  static void print_task(struct tep_handle *tep, struct task_item *task)
 {
 	printf("\nTask: %d : %s:\n",
 	       task->pid , task->comm ? : tep_data_comm_from_pid(tep, task->pid));
+	if (task->dropped_events)
+		printf("[ Events dropped for this task's CPU, may be missing data ]\n");
 	printf("Runtime:    ");
 	print_time(task->runtime, '_');
 	printf("\n");
@@ -706,6 +784,9 @@  static void print_total(struct tep_handle *tep, struct analysis_data *data)
 	int cpu;
 	int i = 0;
 
+	if (data->missed_events)
+		printf("\n[ Missed events, some data may be missing ]\n");
+
 	total_time = data->last_ts - data->start_ts;
 	printf("\nTotal time: ");
 	print_time(total_time, '_');