diff mbox

perf core: Introduce new ioctl options to pause and resume ring buffer

Message ID 1453117921-122482-1-git-send-email-wangnan0@huawei.com
State Superseded
Headers show

Commit Message

Wang Nan Jan. 18, 2016, 11:52 a.m. UTC
Add an extra ioctl() to pause/resume ring-buffer output.

In some situations we want to read from ring buffer only when we
ensure nothing can write to the ring buffer during reading. Without
this patch we have to turn off all events attached to this ring buffer.
This patch is for supporting overwritable ring buffer with TAILSIZE
selected.

Signed-off-by: Wang Nan <wangnan0@huawei.com>

Cc: He Kuang <hekuang@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 include/uapi/linux/perf_event.h |  2 ++
 kernel/events/core.c            | 14 ++++++++++++++
 kernel/events/internal.h        | 11 +++++++++++
 kernel/events/ring_buffer.c     |  4 +++-
 4 files changed, 30 insertions(+), 1 deletion(-)

-- 
1.8.3.4

Comments

Wang Nan Jan. 19, 2016, 2:55 a.m. UTC | #1
On 2016/1/18 20:02, Peter Zijlstra wrote:
> On Mon, Jan 18, 2016 at 11:52:01AM +0000, Wang Nan wrote:

>

>> +#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IO ('$', 9)

>> +#define PERF_EVENT_IOC_RESUME_OUTPUT	_IO ('$', 10)

> Would not a single IOCTL with a 'boolean' parameter make more sense?


Good suggestion.

>> +++ b/kernel/events/ring_buffer.c

>> @@ -125,7 +125,7 @@ int perf_output_begin(struct perf_output_handle *handle,

>>   	if (unlikely(!rb))

>>   		goto out;

>>   

>> -	if (unlikely(!rb->nr_pages))

>> +	if (unlikely(rb->paused))

>>   		goto out;

> Should we increment rb->lost in this case?


Not sure about this. The ring buffer is paused deliberately, shall we 
consider the
events we miss as losted events? However I'll try it in next version.

Thank you.
diff mbox

Patch

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4e8dde8..9508070 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -402,6 +402,8 @@  struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IO ('$', 9)
+#define PERF_EVENT_IOC_RESUME_OUTPUT	_IO ('$', 10)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2d59b59..d5a0c34 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4241,6 +4241,20 @@  static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 	case PERF_EVENT_IOC_SET_BPF:
 		return perf_event_set_bpf_prog(event, arg);
 
+	case PERF_EVENT_IOC_PAUSE_OUTPUT:
+	case PERF_EVENT_IOC_RESUME_OUTPUT: {
+		struct ring_buffer *rb;
+
+		rcu_read_lock();
+		rb = rcu_dereference(event->rb);
+		if (!event->rb) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		rb_toggle_paused(rb, cmd == PERF_EVENT_IOC_PAUSE_OUTPUT);
+		rcu_read_unlock();
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2bbad9c..6a93d1b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -18,6 +18,7 @@  struct ring_buffer {
 #endif
 	int				nr_pages;	/* nr of data pages  */
 	int				overwrite;	/* can overwrite itself */
+	int				paused;		/* can write into ring buffer */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 
@@ -65,6 +66,16 @@  static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	rb_free(rb);
 }
 
+static inline void
+rb_toggle_paused(struct ring_buffer *rb,
+		 bool pause)
+{
+	if (!pause && rb->nr_pages)
+		rb->paused = 0;
+	else
+		rb->paused = 1;
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 5f8bd89..11a1676 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -125,7 +125,7 @@  int perf_output_begin(struct perf_output_handle *handle,
 	if (unlikely(!rb))
 		goto out;
 
-	if (unlikely(!rb->nr_pages))
+	if (unlikely(rb->paused))
 		goto out;
 
 	handle->rb    = rb;
@@ -245,6 +245,8 @@  ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
 	init_irq_work(&rb->irq_work, rb_irq_work);
+
+	rb->paused = rb->nr_pages ? 0 : 1;
 }
 
 static void ring_buffer_put_async(struct ring_buffer *rb)