diff mbox series

[06/12] trace-cmd analyze: Add tracing of tasks and their states

Message ID 20220324025726.1727204-7-rostedt@goodmis.org
State New
Headers show
Series trace-cmd: Add trace-cmd analyze command | expand

Commit Message

Steven Rostedt March 24, 2022, 2:57 a.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

If sched_switch is found and has the prev_state field, then record them
and keep track of the time the task was in the state, the number of times
it was scheduled out of the state, and print the total time, average time,
and number of times the task was scheduled out of that state. Also show
the time stamp of where the longest time it went into that state.

Currently the only states that are supported are "preempted" (which was in
state running), "blocked" (state uninterruptible), "sleeping" (state
interruptible), and "other" (all other states).

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tracecmd/trace-analyze.c | 143 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 139 insertions(+), 4 deletions(-)

Comments

John Keeping March 25, 2022, 7:37 p.m. UTC | #1
On Wed, Mar 23, 2022 at 10:57:20PM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
> 
> If sched_switch is found and has the prev_state field, then record them
> and keep track of the time the task was in the state, the number of times
> it was scheduled out of the state, and print the total time, average time,
> and number of times the task was scheduled out of that state. Also show
> the time stamp of where the longest time it went into that state.
> 
> Currently the only states that are supported are "preempted" (which was in
> state running), "blocked" (state uninterruptible), "sleeping" (state
> interruptible), and "other" (all other states).
> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
> @@ -288,6 +337,24 @@ static void process_switch(struct analysis_data *data,
>  		task->start_ts = record->ts;
>  		cpu_data->current_pid = pid;
>  
> +		switch (task->last_state) {
> +		case -1:
> +			/* First time seen */
> +			break;
> +		case 0:
> +			update_sched_timings(&task->preempt, record->ts);
> +			break;
> +		case 0x1:
> +			update_sched_timings(&task->sleep, record->ts);
> +			break;
> +		case 0x2:
> +			update_sched_timings(&task->blocked, record->ts);
> +			break;
> +		default:
> +			update_sched_timings(&task->other, record->ts);
> +		}
> +		task->last_state = val & 0x1f;

val is next_pid here, does last_state even need to be updated for the
switched-to task?
Steven Rostedt March 25, 2022, 8:19 p.m. UTC | #2
On Fri, 25 Mar 2022 19:37:03 +0000
John Keeping <john@metanate.com> wrote:

> > @@ -288,6 +337,24 @@ static void process_switch(struct analysis_data *data,
> >  		task->start_ts = record->ts;
> >  		cpu_data->current_pid = pid;
> >  
> > +		switch (task->last_state) {
> > +		case -1:
> > +			/* First time seen */
> > +			break;
> > +		case 0:
> > +			update_sched_timings(&task->preempt, record->ts);
> > +			break;
> > +		case 0x1:
> > +			update_sched_timings(&task->sleep, record->ts);
> > +			break;
> > +		case 0x2:
> > +			update_sched_timings(&task->blocked, record->ts);
> > +			break;
> > +		default:
> > +			update_sched_timings(&task->other, record->ts);
> > +		}
> > +		task->last_state = val & 0x1f;  
> 
> val is next_pid here, does last_state even need to be updated for the
> switched-to task?

-ECUTANDPASTE :-p

Thanks,

-- Steve
diff mbox series

Patch

diff --git a/tracecmd/trace-analyze.c b/tracecmd/trace-analyze.c
index 56123f9b4f86..ee4f71a7da26 100644
--- a/tracecmd/trace-analyze.c
+++ b/tracecmd/trace-analyze.c
@@ -31,6 +31,7 @@  struct analysis_data {
 	unsigned long long	last_ts;
 	struct tep_event	*switch_event;
 	struct tep_format_field	*prev_comm;
+	struct tep_format_field	*prev_state;
 	struct tep_format_field	*next_comm;
 	struct tep_format_field	*next_pid;
 	struct cpu_data		*cpu_data;
@@ -40,12 +41,25 @@  struct analysis_data {
 	int			cpus;
 };
 
+struct sched_timings {
+	unsigned long long	last;
+	unsigned long long	total;
+	unsigned long long	nr;
+	unsigned long long	longest;
+	unsigned long long	longest_ts;
+};
+
 struct task_item {
 	unsigned long long	runtime;
 	unsigned long long	start_ts;
+	struct sched_timings	preempt;
+	struct sched_timings	sleep;
+	struct sched_timings	blocked;
+	struct sched_timings	other;
 	char			*comm;
 	struct trace_hash_item	hash;
 	int			pid;
+	int			last_state;
 };
 
 struct task_cpu_item {
@@ -132,6 +146,7 @@  static struct task_item *get_task(struct cpu_data *cpu_data, int pid)
 		hash->key = find_pid;
 		cpu_data->data->nr_tasks++;
 		trace_hash_add(&cpu_data->data->tasks, hash);
+		task->last_state = -1;
 	}
 
 	return task_from_hash(hash);
@@ -157,6 +172,19 @@  static struct task_cpu_item *get_cpu_task(struct cpu_data *cpu_data, int pid)
 	return task_cpu_from_hash(hash);
 };
 
+static void update_sched_timings (struct sched_timings *time, unsigned long long ts)
+{
+	unsigned long long delta;
+
+	delta = ts - time->last;
+	time->total += delta;
+	if (delta > time->longest) {
+		time->longest = delta;
+		time->longest_ts = time->last;
+	}
+	time->nr++;
+}
+
 /* Update times for a task scheduling off the CPU */
 static void update_cpu_task_times(struct cpu_data *cpu_data,
 				  struct task_cpu_item *cpu_task,
@@ -265,6 +293,7 @@  static void process_switch(struct analysis_data *data,
 	struct cpu_data *cpu_data = &data->cpu_data[record->cpu];
 	struct task_cpu_item *cpu_task;
 	struct task_item *task;
+	unsigned long long val;
 	const char *comm;
 
 	cpu_task = get_cpu_task(cpu_data, pid);
@@ -273,9 +302,29 @@  static void process_switch(struct analysis_data *data,
 	update_cpu_task_times(cpu_data, cpu_task, record->ts);
 
 	/* Fill in missing comms */
-	if (pid && data->prev_comm && !task->comm) {
-		comm = (char *)(record->data + data->prev_comm->offset);
-		task->comm = strdup(comm);
+	if (pid) {
+		if (data->prev_state) {
+			tep_read_number_field(data->prev_state, record->data, &val);
+			switch (val & 0x1f) {
+			case 0:
+				task->preempt.last = record->ts;
+				break;
+			case 0x1:
+				task->sleep.last = record->ts;
+				break;
+			case 0x2:
+				task->blocked.last = record->ts;
+				break;
+			default:
+				task->other.last = record->ts;
+			}
+			task->last_state = val & 0x1f;
+		}
+
+		if (data->prev_comm && !task->comm) {
+			comm = (char *)(record->data + data->prev_comm->offset);
+			task->comm = strdup(comm);
+		}
 	}
 
 	if (data->next_pid) {
@@ -288,6 +337,24 @@  static void process_switch(struct analysis_data *data,
 		task->start_ts = record->ts;
 		cpu_data->current_pid = pid;
 
+		switch (task->last_state) {
+		case -1:
+			/* First time seen */
+			break;
+		case 0:
+			update_sched_timings(&task->preempt, record->ts);
+			break;
+		case 0x1:
+			update_sched_timings(&task->sleep, record->ts);
+			break;
+		case 0x2:
+			update_sched_timings(&task->blocked, record->ts);
+			break;
+		default:
+			update_sched_timings(&task->other, record->ts);
+		}
+		task->last_state = val & 0x1f;
+
 		/* Fill in missing comms */
 		if (pid && data->next_comm && !task->comm) {
 			comm = (char *)(record->data + data->next_comm->offset);
@@ -343,6 +410,17 @@  static int cmp_cpu_tasks(const void *A, const void *B)
 	return (*a)->runtime < (*b)->runtime;
 }
 
+static int cmp_task_pids(const void *A, const void *B)
+{
+	struct task_item * const *a = A;
+	struct task_item * const *b = B;
+
+	if ((*a)->pid < (*b)->pid)
+		return -1;
+
+	return (*a)->pid > (*b)->pid;
+}
+
 static void print_time(unsigned long long ts, char delim)
 {
 	unsigned long long secs;
@@ -374,6 +452,34 @@  static void print_time(unsigned long long ts, char delim)
 	}
 }
 
+static void print_sched_timings(const char *label, struct sched_timings *time)
+{
+	unsigned long long avg;
+	int n;
+
+	if (!time->nr)
+		return;
+
+	printf("%s:\t", label);
+	print_time(time->total, 0);
+	n = printf(" (%llu)", time->nr);
+	if (n < 8)
+		printf("%*.s", 8 - n, "");
+	avg = time->total / time->nr;
+	print_time(avg, 0);
+	if (time_in_nsecs)
+		printf(" ");
+	else
+		printf("    ");
+	print_time(time->longest, 0);
+	if (time_in_nsecs)
+		printf(" ");
+	else
+		printf("    ");
+	print_time(time->longest_ts, 0);
+	printf("\n");
+}
+
 static void print_cpu_data(struct tep_handle *tep, struct cpu_data *cpu_data)
 {
 	unsigned long long total_time;
@@ -435,6 +541,27 @@  static void print_cpu_data(struct tep_handle *tep, struct cpu_data *cpu_data)
 	free(cpu_tasks);
 }
 
+static void print_task(struct tep_handle *tep, struct task_item *task)
+{
+	const char *a = "";
+
+	if (time_in_nsecs)
+		a = "   ";
+
+	printf("\nTask: %d : %s:\n",
+	       task->pid , task->comm ? : tep_data_comm_from_pid(tep, task->pid));
+	printf("Runtime: ");
+	print_time(task->runtime, '_');
+	printf("\nType                 Total    %s(cnt)       Avg              Longest       Where\n",
+		a);
+	printf("----                 -----    %s-----       ---              -------       -----\n",
+		a);
+	print_sched_timings("Preempted", &task->preempt);
+	print_sched_timings("Blocked", &task->blocked);
+	print_sched_timings("Sleeping", &task->sleep);
+	print_sched_timings("Other", &task->other);
+}
+
 static void print_total(struct tep_handle *tep, struct analysis_data *data)
 {
 	unsigned long long total_time;
@@ -502,7 +629,6 @@  static void print_total(struct tep_handle *tep, struct analysis_data *data)
 		print_time(tasks[i]->runtime, '_');
 		printf(" (%%%lld)\n", (tasks[i]->runtime * 100) / total_time);
 	}
-	free(tasks);
 
 	printf("\n");
 
@@ -511,6 +637,14 @@  static void print_total(struct tep_handle *tep, struct analysis_data *data)
 			continue;
 		print_cpu_data(tep, &data->cpu_data[i]);
 	}
+
+	qsort(tasks, nr_tasks, sizeof(*tasks), cmp_task_pids);
+
+	printf("\n");
+	for (i = 0; i < nr_tasks; i++) {
+		print_task(tep, tasks[i]);
+	}
+	free(tasks);
 }
 
 static void free_tasks(struct trace_hash *hash)
@@ -581,6 +715,7 @@  static void do_trace_analyze(struct tracecmd_input *handle)
 		data.next_pid = tep_find_field(data.switch_event, "next_pid");
 		data.next_comm = tep_find_field(data.switch_event, "next_comm");
 		data.prev_comm = tep_find_field(data.switch_event, "prev_comm");
+		data.prev_state = tep_find_field(data.switch_event, "prev_state");
 	}
 
 	do {