@@ -31,6 +31,7 @@ struct analysis_data {
unsigned long long last_ts;
struct tep_event *switch_event;
struct tep_format_field *prev_comm;
+ struct tep_format_field *prev_state;
struct tep_format_field *next_comm;
struct tep_format_field *next_pid;
struct cpu_data *cpu_data;
@@ -40,12 +41,25 @@ struct analysis_data {
int cpus;
};
+struct sched_timings {
+ unsigned long long last;
+ unsigned long long total;
+ unsigned long long nr;
+ unsigned long long longest;
+ unsigned long long longest_ts;
+};
+
struct task_item {
unsigned long long runtime;
unsigned long long start_ts;
+ struct sched_timings preempt;
+ struct sched_timings sleep;
+ struct sched_timings blocked;
+ struct sched_timings other;
char *comm;
struct trace_hash_item hash;
int pid;
+ int last_state;
};
struct task_cpu_item {
@@ -132,6 +146,7 @@ static struct task_item *get_task(struct cpu_data *cpu_data, int pid)
hash->key = find_pid;
cpu_data->data->nr_tasks++;
trace_hash_add(&cpu_data->data->tasks, hash);
+ task->last_state = -1;
}
return task_from_hash(hash);
@@ -157,6 +172,19 @@ static struct task_cpu_item *get_cpu_task(struct cpu_data *cpu_data, int pid)
return task_cpu_from_hash(hash);
};
+static void update_sched_timings (struct sched_timings *time, unsigned long long ts)
+{
+ unsigned long long delta;
+
+ delta = ts - time->last;
+ time->total += delta;
+ if (delta > time->longest) {
+ time->longest = delta;
+ time->longest_ts = time->last;
+ }
+ time->nr++;
+}
+
/* Update times for a task scheduling off the CPU */
static void update_cpu_task_times(struct cpu_data *cpu_data,
struct task_cpu_item *cpu_task,
@@ -265,6 +293,7 @@ static void process_switch(struct analysis_data *data,
struct cpu_data *cpu_data = &data->cpu_data[record->cpu];
struct task_cpu_item *cpu_task;
struct task_item *task;
+ unsigned long long val;
const char *comm;
cpu_task = get_cpu_task(cpu_data, pid);
@@ -273,9 +302,29 @@ static void process_switch(struct analysis_data *data,
update_cpu_task_times(cpu_data, cpu_task, record->ts);
/* Fill in missing comms */
- if (pid && data->prev_comm && !task->comm) {
- comm = (char *)(record->data + data->prev_comm->offset);
- task->comm = strdup(comm);
+ if (pid) {
+ if (data->prev_state) {
+ tep_read_number_field(data->prev_state, record->data, &val);
+ switch (val & 0x1f) {
+ case 0:
+ task->preempt.last = record->ts;
+ break;
+ case 0x1:
+ task->sleep.last = record->ts;
+ break;
+ case 0x2:
+ task->blocked.last = record->ts;
+ break;
+ default:
+ task->other.last = record->ts;
+ }
+ task->last_state = val & 0x1f;
+ }
+
+ if (data->prev_comm && !task->comm) {
+ comm = (char *)(record->data + data->prev_comm->offset);
+ task->comm = strdup(comm);
+ }
}
if (data->next_pid) {
@@ -288,6 +337,24 @@ static void process_switch(struct analysis_data *data,
task->start_ts = record->ts;
cpu_data->current_pid = pid;
+ switch (task->last_state) {
+ case -1:
+ /* First time seen */
+ break;
+ case 0:
+ update_sched_timings(&task->preempt, record->ts);
+ break;
+ case 0x1:
+ update_sched_timings(&task->sleep, record->ts);
+ break;
+ case 0x2:
+ update_sched_timings(&task->blocked, record->ts);
+ break;
+ default:
+ update_sched_timings(&task->other, record->ts);
+ }
+ task->last_state = val & 0x1f;
+
/* Fill in missing comms */
if (pid && data->next_comm && !task->comm) {
comm = (char *)(record->data + data->next_comm->offset);
@@ -343,6 +410,17 @@ static int cmp_cpu_tasks(const void *A, const void *B)
return (*a)->runtime < (*b)->runtime;
}
+static int cmp_task_pids(const void *A, const void *B)
+{
+ struct task_item * const *a = A;
+ struct task_item * const *b = B;
+
+ if ((*a)->pid < (*b)->pid)
+ return -1;
+
+ return (*a)->pid > (*b)->pid;
+}
+
static void print_time(unsigned long long ts, char delim)
{
unsigned long long secs;
@@ -374,6 +452,34 @@ static void print_time(unsigned long long ts, char delim)
}
}
+static void print_sched_timings(const char *label, struct sched_timings *time)
+{
+ unsigned long long avg;
+ int n;
+
+ if (!time->nr)
+ return;
+
+ printf("%s:\t", label);
+ print_time(time->total, 0);
+ n = printf(" (%llu)", time->nr);
+ if (n < 8)
+ printf("%*.s", 8 - n, "");
+ avg = time->total / time->nr;
+ print_time(avg, 0);
+ if (time_in_nsecs)
+ printf(" ");
+ else
+ printf(" ");
+ print_time(time->longest, 0);
+ if (time_in_nsecs)
+ printf(" ");
+ else
+ printf(" ");
+ print_time(time->longest_ts, 0);
+ printf("\n");
+}
+
static void print_cpu_data(struct tep_handle *tep, struct cpu_data *cpu_data)
{
unsigned long long total_time;
@@ -435,6 +541,27 @@ static void print_cpu_data(struct tep_handle *tep, struct cpu_data *cpu_data)
free(cpu_tasks);
}
+static void print_task(struct tep_handle *tep, struct task_item *task)
+{
+ const char *a = "";
+
+ if (time_in_nsecs)
+ a = " ";
+
+ printf("\nTask: %d : %s:\n",
+ task->pid , task->comm ? : tep_data_comm_from_pid(tep, task->pid));
+ printf("Runtime: ");
+ print_time(task->runtime, '_');
+ printf("\nType Total %s(cnt) Avg Longest Where\n",
+ a);
+ printf("---- ----- %s----- --- ------- -----\n",
+ a);
+ print_sched_timings("Preempted", &task->preempt);
+ print_sched_timings("Blocked", &task->blocked);
+ print_sched_timings("Sleeping", &task->sleep);
+ print_sched_timings("Other", &task->other);
+}
+
static void print_total(struct tep_handle *tep, struct analysis_data *data)
{
unsigned long long total_time;
@@ -502,7 +629,6 @@ static void print_total(struct tep_handle *tep, struct analysis_data *data)
print_time(tasks[i]->runtime, '_');
printf(" (%%%lld)\n", (tasks[i]->runtime * 100) / total_time);
}
- free(tasks);
printf("\n");
@@ -511,6 +637,14 @@ static void print_total(struct tep_handle *tep, struct analysis_data *data)
continue;
print_cpu_data(tep, &data->cpu_data[i]);
}
+
+ qsort(tasks, nr_tasks, sizeof(*tasks), cmp_task_pids);
+
+ printf("\n");
+ for (i = 0; i < nr_tasks; i++) {
+ print_task(tep, tasks[i]);
+ }
+ free(tasks);
}
static void free_tasks(struct trace_hash *hash)
@@ -581,6 +715,7 @@ static void do_trace_analyze(struct tracecmd_input *handle)
data.next_pid = tep_find_field(data.switch_event, "next_pid");
data.next_comm = tep_find_field(data.switch_event, "next_comm");
data.prev_comm = tep_find_field(data.switch_event, "prev_comm");
+ data.prev_state = tep_find_field(data.switch_event, "prev_state");
}
do {