diff mbox series

[v6,1/3] tracing/hist: Add poll(POLLIN) support on hist file

Message ID 172907576461.470540.1954983717302445787.stgit@mhiramat.roam.corp.google.com
State New
Headers show
Series tracing: Support poll on event hist file | expand

Commit Message

Masami Hiramatsu (Google) Oct. 16, 2024, 10:49 a.m. UTC
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Add poll syscall support on the `hist` file. The Waiter will be waken
up when the histogram is updated with POLLIN.

Currently, there is no way to wait for a specific event in userspace.
So user needs to peek the `trace` periodicaly, or wait on `trace_pipe`.
But that is not good idea to peek the `trace` for the event randomely
happens. And `trace_pipe` is not coming back until a page is filled
with events.

This allows user to wait for a specific events on `hist` file. User
can set a histogram trigger on the event which they want to monitor.
And poll() on its `hist` file. Since this poll() returns POLLIN,
the next poll() will return soon unless you do read() on hist file.

NOTE: To read the hist file again, you must set the file offset to 0,
but just for monitoring the event, you may not need to read the
histogram.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Tom Zanussi <zanussi@kernel.org>
---
 Changes in v6:
   - Use a global poll irq_work and wait_queue.
---
 include/linux/trace_events.h     |   14 +++++++
 kernel/trace/trace_events.c      |   14 +++++++
 kernel/trace/trace_events_hist.c |   75 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 100 insertions(+), 3 deletions(-)

Comments

Steven Rostedt Dec. 19, 2024, 4:23 p.m. UTC | #1
On Wed, 16 Oct 2024 19:49:24 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Add poll syscall support on the `hist` file. The Waiter will be waken
> up when the histogram is updated with POLLIN.
> 
> Currently, there is no way to wait for a specific event in userspace.
> So user needs to peek the `trace` periodicaly, or wait on `trace_pipe`.


> But that is not good idea to peek the `trace` for the event randomely
> happens. And `trace_pipe` is not coming back until a page is filled
> with events.

I would reword the above to:

    But it is not a good idea to peek at the `trace` for an event that
    randomly happens. And `trace_pipe` is not coming back until a page is
    filled with events.

> 
> This allows user to wait for a specific events on `hist` file. User
> can set a histogram trigger on the event which they want to monitor.
> And poll() on its `hist` file. Since this poll() returns POLLIN,
> the next poll() will return soon unless you do read() on hist file.

And that to:

    This allows a user to wait for a specific event on the `hist` file. User
    can set a histogram trigger on the event which they want to monitor
    and poll() on its `hist` file. Since this poll() returns POLLIN, the next
    poll() will return soon unless a read() happens on that hist file.

> 
> NOTE: To read the hist file again, you must set the file offset to 0,
> but just for monitoring the event, you may not need to read the
> histogram.
> 
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> Reviewed-by: Tom Zanussi <zanussi@kernel.org>
> ---
>  Changes in v6:
>    - Use a global poll irq_work and wait_queue.
> ---
>  include/linux/trace_events.h     |   14 +++++++
>  kernel/trace/trace_events.c      |   14 +++++++
>  kernel/trace/trace_events_hist.c |   75 ++++++++++++++++++++++++++++++++++++--
>  3 files changed, 100 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 42bedcddd511..46c771a61f2a 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -685,6 +685,20 @@ struct trace_event_file {
>  	atomic_t		tm_ref;	/* trigger-mode reference counter */
>  };
>  
> +#ifdef CONFIG_HIST_TRIGGERS
> +extern struct irq_work hist_poll_work;
> +extern wait_queue_head_t hist_poll_wq;
> +
> +static inline void hist_poll_wakeup(void)
> +{
> +	if (wq_has_sleeper(&hist_poll_wq))
> +		irq_work_queue(&hist_poll_work);
> +}
> +
> +#define hist_poll_wait(file, wait)	\
> +	poll_wait(file, &hist_poll_wq, wait)
> +#endif
> +
>  #define __TRACE_EVENT_FLAGS(name, value)				\
>  	static int __init trace_init_flags_##name(void)			\
>  	{								\
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 7266ec2a4eea..0a7cb30043ef 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -2972,6 +2972,20 @@ static bool event_in_systems(struct trace_event_call *call,
>  	return !*p || isspace(*p) || *p == ',';
>  }
>  
> +#ifdef CONFIG_HIST_TRIGGERS
> +/*
> + * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger
> + * may happen in any context.
> + */
> +static void hist_poll_event_irq_work(struct irq_work *work)
> +{
> +	wake_up_all(&hist_poll_wq);
> +}
> +
> +DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work);
> +DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq);
> +#endif
> +
>  static struct trace_event_file *
>  trace_create_new_event(struct trace_event_call *call,
>  		       struct trace_array *tr)
> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> index 5f9119eb7c67..107eaa0f40f1 100644
> --- a/kernel/trace/trace_events_hist.c
> +++ b/kernel/trace/trace_events_hist.c
> @@ -5314,6 +5314,8 @@ static void event_hist_trigger(struct event_trigger_data *data,
>  
>  	if (resolve_var_refs(hist_data, key, var_ref_vals, true))
>  		hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals);
> +
> +	hist_poll_wakeup();
>  }
>  
>  static void hist_trigger_stacktrace_print(struct seq_file *m,
> @@ -5593,15 +5595,36 @@ static void hist_trigger_show(struct seq_file *m,
>  		   n_entries, (u64)atomic64_read(&hist_data->map->drops));
>  }
>  
> +struct hist_file_data {
> +	struct file *file;
> +	u64 last_read;
> +};
> +
> +static u64 get_hist_hit_count(struct trace_event_file *event_file)
> +{
> +	struct hist_trigger_data *hist_data;
> +	struct event_trigger_data *data;
> +	u64 ret = 0;
> +
> +	list_for_each_entry(data, &event_file->triggers, list) {
> +		if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) {
> +			hist_data = data->private_data;
> +			ret += atomic64_read(&hist_data->map->hits);
> +		}
> +	}
> +	return ret;
> +}
> +
>  static int hist_show(struct seq_file *m, void *v)
>  {
> +	struct hist_file_data *hist_file = m->private;
>  	struct event_trigger_data *data;
>  	struct trace_event_file *event_file;
>  	int n = 0, ret = 0;
>  
>  	mutex_lock(&event_mutex);
>  
> -	event_file = event_file_file(m->private);
> +	event_file = event_file_file(hist_file->file);
>  	if (unlikely(!event_file)) {
>  		ret = -ENODEV;
>  		goto out_unlock;
> @@ -5611,6 +5634,7 @@ static int hist_show(struct seq_file *m, void *v)
>  		if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
>  			hist_trigger_show(m, data, n++);
>  	}
> +	hist_file->last_read = get_hist_hit_count(event_file);
>  
>   out_unlock:
>  	mutex_unlock(&event_mutex);
> @@ -5618,24 +5642,69 @@ static int hist_show(struct seq_file *m, void *v)
>  	return ret;
>  }
>  
> +static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wait)
> +{
> +	struct trace_event_file *event_file;
> +	struct seq_file *m = file->private_data;
> +	struct hist_file_data *hist_file = m->private;
> +	__poll_t ret = 0;
> +
> +	mutex_lock(&event_mutex);

Let's start using guard(mutex)(&event_mutex);

I'm working on changing the other locations in this file with a separate
patch. I don't want to add new ones.

-- Steve

> +
> +	event_file = event_file_data(file);
> +	if (!event_file) {
> +		ret = EPOLLERR;
> +		goto out_unlock;
> +	}
> +
> +	hist_poll_wait(file, wait);
> +
> +	if (hist_file->last_read != get_hist_hit_count(event_file))
> +		ret = EPOLLIN | EPOLLRDNORM;
> +
> +out_unlock:
> +	mutex_unlock(&event_mutex);
> +
> +	return ret;
> +}
> +
> +static int event_hist_release(struct inode *inode, struct file *file)
> +{
> +	struct seq_file *m = file->private_data;
> +	struct hist_file_data *hist_file = m->private;
> +
> +	kfree(hist_file);
> +	return tracing_single_release_file_tr(inode, file);
> +}
> +
>  static int event_hist_open(struct inode *inode, struct file *file)
>  {
> +	struct hist_file_data *hist_file;
>  	int ret;
>  
>  	ret = tracing_open_file_tr(inode, file);
>  	if (ret)
>  		return ret;
>  
> +	hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
> +	if (!hist_file)
> +		return -ENOMEM;
> +	hist_file->file = file;
> +
>  	/* Clear private_data to avoid warning in single_open() */
>  	file->private_data = NULL;
> -	return single_open(file, hist_show, file);
> +	ret = single_open(file, hist_show, hist_file);
> +	if (ret)
> +		kfree(hist_file);
> +	return ret;
>  }
>  
>  const struct file_operations event_hist_fops = {
>  	.open = event_hist_open,
>  	.read = seq_read,
>  	.llseek = seq_lseek,
> -	.release = tracing_single_release_file_tr,
> +	.release = event_hist_release,
> +	.poll = event_hist_poll,
>  };
>  
>  #ifdef CONFIG_HIST_TRIGGERS_DEBUG
diff mbox series

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 42bedcddd511..46c771a61f2a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -685,6 +685,20 @@  struct trace_event_file {
 	atomic_t		tm_ref;	/* trigger-mode reference counter */
 };
 
+#ifdef CONFIG_HIST_TRIGGERS
+extern struct irq_work hist_poll_work;
+extern wait_queue_head_t hist_poll_wq;
+
+static inline void hist_poll_wakeup(void)
+{
+	if (wq_has_sleeper(&hist_poll_wq))
+		irq_work_queue(&hist_poll_work);
+}
+
+#define hist_poll_wait(file, wait)	\
+	poll_wait(file, &hist_poll_wq, wait)
+#endif
+
 #define __TRACE_EVENT_FLAGS(name, value)				\
 	static int __init trace_init_flags_##name(void)			\
 	{								\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7266ec2a4eea..0a7cb30043ef 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2972,6 +2972,20 @@  static bool event_in_systems(struct trace_event_call *call,
 	return !*p || isspace(*p) || *p == ',';
 }
 
+#ifdef CONFIG_HIST_TRIGGERS
+/*
+ * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger
+ * may happen in any context.
+ */
+static void hist_poll_event_irq_work(struct irq_work *work)
+{
+	wake_up_all(&hist_poll_wq);
+}
+
+DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work);
+DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq);
+#endif
+
 static struct trace_event_file *
 trace_create_new_event(struct trace_event_call *call,
 		       struct trace_array *tr)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5f9119eb7c67..107eaa0f40f1 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5314,6 +5314,8 @@  static void event_hist_trigger(struct event_trigger_data *data,
 
 	if (resolve_var_refs(hist_data, key, var_ref_vals, true))
 		hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals);
+
+	hist_poll_wakeup();
 }
 
 static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -5593,15 +5595,36 @@  static void hist_trigger_show(struct seq_file *m,
 		   n_entries, (u64)atomic64_read(&hist_data->map->drops));
 }
 
+struct hist_file_data {
+	struct file *file;
+	u64 last_read;
+};
+
+static u64 get_hist_hit_count(struct trace_event_file *event_file)
+{
+	struct hist_trigger_data *hist_data;
+	struct event_trigger_data *data;
+	u64 ret = 0;
+
+	list_for_each_entry(data, &event_file->triggers, list) {
+		if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = data->private_data;
+			ret += atomic64_read(&hist_data->map->hits);
+		}
+	}
+	return ret;
+}
+
 static int hist_show(struct seq_file *m, void *v)
 {
+	struct hist_file_data *hist_file = m->private;
 	struct event_trigger_data *data;
 	struct trace_event_file *event_file;
 	int n = 0, ret = 0;
 
 	mutex_lock(&event_mutex);
 
-	event_file = event_file_file(m->private);
+	event_file = event_file_file(hist_file->file);
 	if (unlikely(!event_file)) {
 		ret = -ENODEV;
 		goto out_unlock;
@@ -5611,6 +5634,7 @@  static int hist_show(struct seq_file *m, void *v)
 		if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
 			hist_trigger_show(m, data, n++);
 	}
+	hist_file->last_read = get_hist_hit_count(event_file);
 
  out_unlock:
 	mutex_unlock(&event_mutex);
@@ -5618,24 +5642,69 @@  static int hist_show(struct seq_file *m, void *v)
 	return ret;
 }
 
+static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct trace_event_file *event_file;
+	struct seq_file *m = file->private_data;
+	struct hist_file_data *hist_file = m->private;
+	__poll_t ret = 0;
+
+	mutex_lock(&event_mutex);
+
+	event_file = event_file_data(file);
+	if (!event_file) {
+		ret = EPOLLERR;
+		goto out_unlock;
+	}
+
+	hist_poll_wait(file, wait);
+
+	if (hist_file->last_read != get_hist_hit_count(event_file))
+		ret = EPOLLIN | EPOLLRDNORM;
+
+out_unlock:
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
+static int event_hist_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct hist_file_data *hist_file = m->private;
+
+	kfree(hist_file);
+	return tracing_single_release_file_tr(inode, file);
+}
+
 static int event_hist_open(struct inode *inode, struct file *file)
 {
+	struct hist_file_data *hist_file;
 	int ret;
 
 	ret = tracing_open_file_tr(inode, file);
 	if (ret)
 		return ret;
 
+	hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
+	if (!hist_file)
+		return -ENOMEM;
+	hist_file->file = file;
+
 	/* Clear private_data to avoid warning in single_open() */
 	file->private_data = NULL;
-	return single_open(file, hist_show, file);
+	ret = single_open(file, hist_show, hist_file);
+	if (ret)
+		kfree(hist_file);
+	return ret;
 }
 
 const struct file_operations event_hist_fops = {
 	.open = event_hist_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = tracing_single_release_file_tr,
+	.release = event_hist_release,
+	.poll = event_hist_poll,
 };
 
 #ifdef CONFIG_HIST_TRIGGERS_DEBUG