diff mbox series

[1/2] perf mmap: Fix perf backward recording

Message ID 20171101055327.141281-2-wangnan0@huawei.com
State Superseded
Headers show
Series perf record: Fix --overwrite and clarify concepts | expand

Commit Message

Wang Nan Nov. 1, 2017, 5:53 a.m. UTC
perf record backward recording doesn't work as we expected: it never
overwrite when ring buffer full.

Test:

(Run a busy printing python task background like this:

 while True:
     print 123

send SIGUSR2 to perf to capture snapshot.)

 # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output
 [ perf record: dump data: Woken up 1 times ]
 [ perf record: Dump perf.data.2017110101520743 ]
 [ perf record: dump data: Woken up 1 times ]
 [ perf record: Dump perf.data.2017110101521251 ]
 [ perf record: dump data: Woken up 1 times ]
 [ perf record: Dump perf.data.2017110101521692 ]
 ^C[ perf record: Woken up 1 times to write data ]
 [ perf record: Dump perf.data.2017110101521936 ]
 [ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]

 # ./perf script -i ./perf.data.2017110101520743 | head -n3
             perf  2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
             perf  2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
           python  2545 [000] 12449.310800:  raw_syscalls:sys_exit: NR 1 = 4
 # ./perf script -i ./perf.data.2017110101521251 | head -n3
             perf  2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
             perf  2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
           python  2545 [000] 12449.310800:  raw_syscalls:sys_exit: NR 1 = 4
 # ./perf script -i ./perf.data.2017110101521692 | head -n3
             perf  2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)
             perf  2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)
           python  2545 [000] 12449.310800:  raw_syscalls:sys_exit: NR 1 = 4

Timestamps are never change, but my background task is a dead loop, can
easily overwhelme the ring buffer.

This patch fix it by force unsetting PROT_WRITE for backward ring
buffer, so all backward ring buffer become overwrite ring buffer.

Test result:

 # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output
 [ perf record: dump data: Woken up 1 times ]
 [ perf record: Dump perf.data.2017110101285323 ]
 [ perf record: dump data: Woken up 1 times ]
 [ perf record: Dump perf.data.2017110101290053 ]
 [ perf record: dump data: Woken up 1 times ]
 [ perf record: Dump perf.data.2017110101290446 ]
 ^C[ perf record: Woken up 1 times to write data ]
 [ perf record: Dump perf.data.2017110101290837 ]
 [ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]
 # ./perf script -i ./perf.data.2017110101285323 | head -n3
           python  2545 [000] 11064.268083:  raw_syscalls:sys_exit: NR 1 = 4
           python  2545 [000] 11064.268084: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
           python  2545 [000] 11064.268086:  raw_syscalls:sys_exit: NR 1 = 4
 # ./perf script -i ./perf.data.2017110101290 | head -n3
 failed to open ./perf.data.2017110101290: No such file or directory
 # ./perf script -i ./perf.data.2017110101290053 | head -n3
           python  2545 [000] 11071.564062: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
           python  2545 [000] 11071.564064:  raw_syscalls:sys_exit: NR 1 = 4
           python  2545 [000] 11071.564066: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
 # ./perf script -i ./perf.data.2017110101290 | head -n3
 perf.data.2017110101290053  perf.data.2017110101290446  perf.data.2017110101290837
 # ./perf script -i ./perf.data.2017110101290446 | head -n3
             sshd  1321 [000] 11075.499473:  raw_syscalls:sys_exit: NR 14 = 0
             sshd  1321 [000] 11075.499474: raw_syscalls:sys_enter: NR 14 (2, 7ffe98899490, 0, 8, 0, 3000)
             sshd  1321 [000] 11075.499474:  raw_syscalls:sys_exit: NR 14 = 0
 # ./perf script -i ./perf.data.2017110101290837 | head -n3
           python  2545 [000] 11079.280844:  raw_syscalls:sys_exit: NR 1 = 4
           python  2545 [000] 11079.280847: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)
           python  2545 [000] 11079.280850:  raw_syscalls:sys_exit: NR 1 = 4

Signed-off-by: Wang Nan <wangnan0@huawei.com>

---
 tools/perf/util/evlist.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

-- 
2.10.1

Comments

Namhyung Kim Nov. 1, 2017, 9:49 a.m. UTC | #1
Hi,

On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:
> perf record backward recording doesn't work as we expected: it never

> overwrite when ring buffer full.

> 

> Test:

> 

> (Run a busy printing python task background like this:

> 

>  while True:

>      print 123

> 

> send SIGUSR2 to perf to capture snapshot.)

> 

>  # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output

>  [ perf record: dump data: Woken up 1 times ]

>  [ perf record: Dump perf.data.2017110101520743 ]

>  [ perf record: dump data: Woken up 1 times ]

>  [ perf record: Dump perf.data.2017110101521251 ]

>  [ perf record: dump data: Woken up 1 times ]

>  [ perf record: Dump perf.data.2017110101521692 ]

>  ^C[ perf record: Woken up 1 times to write data ]

>  [ perf record: Dump perf.data.2017110101521936 ]

>  [ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]

> 

>  # ./perf script -i ./perf.data.2017110101520743 | head -n3

>              perf  2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)

>              perf  2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)

>            python  2545 [000] 12449.310800:  raw_syscalls:sys_exit: NR 1 = 4

>  # ./perf script -i ./perf.data.2017110101521251 | head -n3

>              perf  2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)

>              perf  2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)

>            python  2545 [000] 12449.310800:  raw_syscalls:sys_exit: NR 1 = 4

>  # ./perf script -i ./perf.data.2017110101521692 | head -n3

>              perf  2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0)

>              perf  2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0)

>            python  2545 [000] 12449.310800:  raw_syscalls:sys_exit: NR 1 = 4

> 

> Timestamps are never change, but my background task is a dead loop, can

> easily overwhelme the ring buffer.

> 

> This patch fix it by force unsetting PROT_WRITE for backward ring

> buffer, so all backward ring buffer become overwrite ring buffer.

> 

> Test result:

> 

>  # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output

>  [ perf record: dump data: Woken up 1 times ]

>  [ perf record: Dump perf.data.2017110101285323 ]

>  [ perf record: dump data: Woken up 1 times ]

>  [ perf record: Dump perf.data.2017110101290053 ]

>  [ perf record: dump data: Woken up 1 times ]

>  [ perf record: Dump perf.data.2017110101290446 ]

>  ^C[ perf record: Woken up 1 times to write data ]

>  [ perf record: Dump perf.data.2017110101290837 ]

>  [ perf record: Captured and wrote 0.826 MB perf.data.<timestamp> ]

>  # ./perf script -i ./perf.data.2017110101285323 | head -n3

>            python  2545 [000] 11064.268083:  raw_syscalls:sys_exit: NR 1 = 4

>            python  2545 [000] 11064.268084: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)

>            python  2545 [000] 11064.268086:  raw_syscalls:sys_exit: NR 1 = 4

>  # ./perf script -i ./perf.data.2017110101290 | head -n3

>  failed to open ./perf.data.2017110101290: No such file or directory

>  # ./perf script -i ./perf.data.2017110101290053 | head -n3

>            python  2545 [000] 11071.564062: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)

>            python  2545 [000] 11071.564064:  raw_syscalls:sys_exit: NR 1 = 4

>            python  2545 [000] 11071.564066: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)

>  # ./perf script -i ./perf.data.2017110101290 | head -n3

>  perf.data.2017110101290053  perf.data.2017110101290446  perf.data.2017110101290837

>  # ./perf script -i ./perf.data.2017110101290446 | head -n3

>              sshd  1321 [000] 11075.499473:  raw_syscalls:sys_exit: NR 14 = 0

>              sshd  1321 [000] 11075.499474: raw_syscalls:sys_enter: NR 14 (2, 7ffe98899490, 0, 8, 0, 3000)

>              sshd  1321 [000] 11075.499474:  raw_syscalls:sys_exit: NR 14 = 0

>  # ./perf script -i ./perf.data.2017110101290837 | head -n3

>            python  2545 [000] 11079.280844:  raw_syscalls:sys_exit: NR 1 = 4

>            python  2545 [000] 11079.280847: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0)

>            python  2545 [000] 11079.280850:  raw_syscalls:sys_exit: NR 1 = 4

> 

> Signed-off-by: Wang Nan <wangnan0@huawei.com>

> ---

>  tools/perf/util/evlist.c | 8 +++++++-

>  1 file changed, 7 insertions(+), 1 deletion(-)

> 

> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> index c6c891e..4c5daba 100644

> --- a/tools/perf/util/evlist.c

> +++ b/tools/perf/util/evlist.c

> @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,

>  }

>  

>  static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

> -				       struct mmap_params *mp, int cpu_idx,

> +				       struct mmap_params *_mp, int cpu_idx,

>  				       int thread, int *_output, int *_output_backward)

>  {

>  	struct perf_evsel *evsel;

>  	int revent;

>  	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

> +	struct mmap_params *mp;

>  

>  	evlist__for_each_entry(evlist, evsel) {

>  		struct perf_mmap *maps = evlist->mmap;

> +		struct mmap_params rdonly_mp;

>  		int *output = _output;

>  		int fd;

>  		int cpu;

>  

> +		mp = _mp;

>  		if (evsel->attr.write_backward) {

>  			output = _output_backward;

>  			maps = evlist->backward_mmap;

> +			rdonly_mp = *_mp;

> +			rdonly_mp.prot &= ~PROT_WRITE;

> +			mp = &rdonly_mp;

>  

>  			if (!maps) {

>  				maps = perf_evlist__alloc_mmap(evlist);

> -- 


What about this instead (not tested)?


diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c6c891e154a6..27ebe355e794 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
                if (*output == -1) {
                        *output = fd;
 
+                       if (evsel->attr.write_backward)
+                               mp->prot = PROT_READ;
+                       else
+                               mp->prot = PROT_READ | PROT_WRITE;
+
                        if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
                                return -1;
                } else {
@@ -1058,9 +1063,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
        struct perf_evsel *evsel;
        const struct cpu_map *cpus = evlist->cpus;
        const struct thread_map *threads = evlist->threads;
-       struct mmap_params mp = {
-               .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
-       };
+       struct mmap_params mp;
 
        if (!evlist->mmap)
                evlist->mmap = perf_evlist__alloc_mmap(evlist);
Wang Nan Nov. 1, 2017, 10:32 a.m. UTC | #2
On 2017/11/1 17:49, Namhyung Kim wrote:
> Hi,

>

> On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:

>> perf record backward recording doesn't work as we expected: it never

>> overwrite when ring buffer full.

>>

>> Test:

>>

>> (Run a busy printing python task background like this:

>>

>>   while True:

>>       print 123

>>

>> send SIGUSR2 to perf to capture snapshot.)


[SNIP]

>>

>> Signed-off-by: Wang Nan <wangnan0@huawei.com>

>> ---

>>   tools/perf/util/evlist.c | 8 +++++++-

>>   1 file changed, 7 insertions(+), 1 deletion(-)

>>

>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

>> index c6c891e..4c5daba 100644

>> --- a/tools/perf/util/evlist.c

>> +++ b/tools/perf/util/evlist.c

>> @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,

>>   }

>>   

>>   static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

>> -				       struct mmap_params *mp, int cpu_idx,

>> +				       struct mmap_params *_mp, int cpu_idx,

>>   				       int thread, int *_output, int *_output_backward)

>>   {

>>   	struct perf_evsel *evsel;

>>   	int revent;

>>   	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

>> +	struct mmap_params *mp;

>>   

>>   	evlist__for_each_entry(evlist, evsel) {

>>   		struct perf_mmap *maps = evlist->mmap;

>> +		struct mmap_params rdonly_mp;

>>   		int *output = _output;

>>   		int fd;

>>   		int cpu;

>>   

>> +		mp = _mp;

>>   		if (evsel->attr.write_backward) {

>>   			output = _output_backward;

>>   			maps = evlist->backward_mmap;

>> +			rdonly_mp = *_mp;

>> +			rdonly_mp.prot &= ~PROT_WRITE;

>> +			mp = &rdonly_mp;

>>   

>>   			if (!maps) {

>>   				maps = perf_evlist__alloc_mmap(evlist);

>> -- 

> What about this instead (not tested)?

>

>

> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> index c6c891e154a6..27ebe355e794 100644

> --- a/tools/perf/util/evlist.c

> +++ b/tools/perf/util/evlist.c

> @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

>                  if (*output == -1) {

>                          *output = fd;

>   

> +                       if (evsel->attr.write_backward)

> +                               mp->prot = PROT_READ;

> +                       else

> +                               mp->prot = PROT_READ | PROT_WRITE;

> +


If evlist->overwrite is true, PROT_WRITE should be unset even if 
write_backward is
not set. If you want to delay the setting of mp->prot, you need to consider
both evlist->overwrite and evsel->attr.write_backward.

Then why not removing mp->prot totally, and add a prot argument to 
perf_mmap__mmap,
since prot setting is always decided independently?

>                          if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)

>                                  return -1;

>                  } else {

> @@ -1058,9 +1063,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,

>          struct perf_evsel *evsel;

>          const struct cpu_map *cpus = evlist->cpus;

>          const struct thread_map *threads = evlist->threads;

> -       struct mmap_params mp = {

> -               .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),

> -       };

> +       struct mmap_params mp;

>   

>          if (!evlist->mmap)

>                  evlist->mmap = perf_evlist__alloc_mmap(evlist);


The 'overwrite' argument in perf_evlist__mmap_ex() controls 
evlist->overwrite.

Thank you.
Namhyung Kim Nov. 1, 2017, noon UTC | #3
On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:
> 

> 

> On 2017/11/1 17:49, Namhyung Kim wrote:

> > Hi,

> > 

> > On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:

> > > perf record backward recording doesn't work as we expected: it never

> > > overwrite when ring buffer full.

> > > 

> > > Test:

> > > 

> > > (Run a busy printing python task background like this:

> > > 

> > >   while True:

> > >       print 123

> > > 

> > > send SIGUSR2 to perf to capture snapshot.)

> 

> [SNIP]

> 

> > > 

> > > Signed-off-by: Wang Nan <wangnan0@huawei.com>

> > > ---

> > >   tools/perf/util/evlist.c | 8 +++++++-

> > >   1 file changed, 7 insertions(+), 1 deletion(-)

> > > 

> > > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> > > index c6c891e..4c5daba 100644

> > > --- a/tools/perf/util/evlist.c

> > > +++ b/tools/perf/util/evlist.c

> > > @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,

> > >   }

> > >   static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

> > > -				       struct mmap_params *mp, int cpu_idx,

> > > +				       struct mmap_params *_mp, int cpu_idx,

> > >   				       int thread, int *_output, int *_output_backward)

> > >   {

> > >   	struct perf_evsel *evsel;

> > >   	int revent;

> > >   	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

> > > +	struct mmap_params *mp;

> > >   	evlist__for_each_entry(evlist, evsel) {

> > >   		struct perf_mmap *maps = evlist->mmap;

> > > +		struct mmap_params rdonly_mp;

> > >   		int *output = _output;

> > >   		int fd;

> > >   		int cpu;

> > > +		mp = _mp;

> > >   		if (evsel->attr.write_backward) {

> > >   			output = _output_backward;

> > >   			maps = evlist->backward_mmap;

> > > +			rdonly_mp = *_mp;

> > > +			rdonly_mp.prot &= ~PROT_WRITE;

> > > +			mp = &rdonly_mp;

> > >   			if (!maps) {

> > >   				maps = perf_evlist__alloc_mmap(evlist);

> > > -- 

> > What about this instead (not tested)?

> > 

> > 

> > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> > index c6c891e154a6..27ebe355e794 100644

> > --- a/tools/perf/util/evlist.c

> > +++ b/tools/perf/util/evlist.c

> > @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

> >                  if (*output == -1) {

> >                          *output = fd;

> > +                       if (evsel->attr.write_backward)

> > +                               mp->prot = PROT_READ;

> > +                       else

> > +                               mp->prot = PROT_READ | PROT_WRITE;

> > +

> 

> If evlist->overwrite is true, PROT_WRITE should be unset even if

> write_backward is

> not set. If you want to delay the setting of mp->prot, you need to consider

> both evlist->overwrite and evsel->attr.write_backward.


I thought evsel->attr.write_backward should be set when
evlist->overwrite is set.  Do you mean following case?

  perf record --overwrite -e 'cycles/no-overwrite/'


> 

> Then why not removing mp->prot totally, and add a prot argument to

> perf_mmap__mmap,

> since prot setting is always decided independently?


I'm ok with it.


> 

> >                          if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)

> >                                  return -1;

> >                  } else {

> > @@ -1058,9 +1063,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,

> >          struct perf_evsel *evsel;

> >          const struct cpu_map *cpus = evlist->cpus;

> >          const struct thread_map *threads = evlist->threads;

> > -       struct mmap_params mp = {

> > -               .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),

> > -       };

> > +       struct mmap_params mp;

> >          if (!evlist->mmap)

> >                  evlist->mmap = perf_evlist__alloc_mmap(evlist);

> 

> The 'overwrite' argument in perf_evlist__mmap_ex() controls

> evlist->overwrite.


Right, and it's always "false" for recording in the current code.

Thanks,
Namhyung
Wang Nan Nov. 1, 2017, 12:10 p.m. UTC | #4
On 2017/11/1 20:00, Namhyung Kim wrote:
> On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

>>

>> On 2017/11/1 17:49, Namhyung Kim wrote:

>>> Hi,

>>>

>>> On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:

>>>> perf record backward recording doesn't work as we expected: it never

>>>> overwrite when ring buffer full.

>>>>

>>>> Test:

>>>>

>>>> (Run a busy printing python task background like this:

>>>>

>>>>    while True:

>>>>        print 123

>>>>

>>>> send SIGUSR2 to perf to capture snapshot.)

>> [SNIP]

>>

>>>> Signed-off-by: Wang Nan <wangnan0@huawei.com>

>>>> ---

>>>>    tools/perf/util/evlist.c | 8 +++++++-

>>>>    1 file changed, 7 insertions(+), 1 deletion(-)

>>>>

>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

>>>> index c6c891e..4c5daba 100644

>>>> --- a/tools/perf/util/evlist.c

>>>> +++ b/tools/perf/util/evlist.c

>>>> @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,

>>>>    }

>>>>    static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

>>>> -				       struct mmap_params *mp, int cpu_idx,

>>>> +				       struct mmap_params *_mp, int cpu_idx,

>>>>    				       int thread, int *_output, int *_output_backward)

>>>>    {

>>>>    	struct perf_evsel *evsel;

>>>>    	int revent;

>>>>    	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

>>>> +	struct mmap_params *mp;

>>>>    	evlist__for_each_entry(evlist, evsel) {

>>>>    		struct perf_mmap *maps = evlist->mmap;

>>>> +		struct mmap_params rdonly_mp;

>>>>    		int *output = _output;

>>>>    		int fd;

>>>>    		int cpu;

>>>> +		mp = _mp;

>>>>    		if (evsel->attr.write_backward) {

>>>>    			output = _output_backward;

>>>>    			maps = evlist->backward_mmap;

>>>> +			rdonly_mp = *_mp;

>>>> +			rdonly_mp.prot &= ~PROT_WRITE;

>>>> +			mp = &rdonly_mp;

>>>>    			if (!maps) {

>>>>    				maps = perf_evlist__alloc_mmap(evlist);

>>>> -- 

>>> What about this instead (not tested)?

>>>

>>>

>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

>>> index c6c891e154a6..27ebe355e794 100644

>>> --- a/tools/perf/util/evlist.c

>>> +++ b/tools/perf/util/evlist.c

>>> @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

>>>                   if (*output == -1) {

>>>                           *output = fd;

>>> +                       if (evsel->attr.write_backward)

>>> +                               mp->prot = PROT_READ;

>>> +                       else

>>> +                               mp->prot = PROT_READ | PROT_WRITE;

>>> +

>> If evlist->overwrite is true, PROT_WRITE should be unset even if

>> write_backward is

>> not set. If you want to delay the setting of mp->prot, you need to consider

>> both evlist->overwrite and evsel->attr.write_backward.

> I thought evsel->attr.write_backward should be set when

> evlist->overwrite is set.  Do you mean following case?

>

>    perf record --overwrite -e 'cycles/no-overwrite/'

>


No. evlist->overwrite is unrelated to '--overwrite'. This is why I
said the concept of 'overwrite' and 'backward' is ambiguous.

perf record never sets 'evlist->overwrite'. What '--overwrite' actually
does is setting write_backward. Some testcases needs overwrite evlist.

Thank you.
Jiri Olsa Nov. 1, 2017, 12:39 p.m. UTC | #5
On Wed, Nov 01, 2017 at 08:10:49PM +0800, Wangnan (F) wrote:

SNIP

> > > > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> > > > index c6c891e154a6..27ebe355e794 100644

> > > > --- a/tools/perf/util/evlist.c

> > > > +++ b/tools/perf/util/evlist.c

> > > > @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

> > > >                   if (*output == -1) {

> > > >                           *output = fd;

> > > > +                       if (evsel->attr.write_backward)

> > > > +                               mp->prot = PROT_READ;

> > > > +                       else

> > > > +                               mp->prot = PROT_READ | PROT_WRITE;

> > > > +

> > > If evlist->overwrite is true, PROT_WRITE should be unset even if

> > > write_backward is

> > > not set. If you want to delay the setting of mp->prot, you need to consider

> > > both evlist->overwrite and evsel->attr.write_backward.

> > I thought evsel->attr.write_backward should be set when

> > evlist->overwrite is set.  Do you mean following case?

> > 

> >    perf record --overwrite -e 'cycles/no-overwrite/'

> > 

> 

> No. evlist->overwrite is unrelated to '--overwrite'. This is why I

> said the concept of 'overwrite' and 'backward' is ambiguous.

> 

> perf record never sets 'evlist->overwrite'. What '--overwrite' actually

> does is setting write_backward. Some testcases needs overwrite evlist.


did not check deeply, but so why can't we do the below?

jirka


---
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f4d9fc54b382..4643c487bd29 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -300,7 +300,7 @@ static int record__mmap_evlist(struct record *rec,
 	struct record_opts *opts = &rec->opts;
 	char msg[512];
 
-	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->overwrite,
 				 opts->auxtrace_mmap_pages,
 				 opts->auxtrace_snapshot_mode) < 0) {
 		if (errno == EPERM) {
Wang Nan Nov. 1, 2017, 12:56 p.m. UTC | #6
On 2017/11/1 20:39, Jiri Olsa wrote:
> On Wed, Nov 01, 2017 at 08:10:49PM +0800, Wangnan (F) wrote:

>

> SNIP

>

>>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

>>>>> index c6c891e154a6..27ebe355e794 100644

>>>>> --- a/tools/perf/util/evlist.c

>>>>> +++ b/tools/perf/util/evlist.c

>>>>> @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

>>>>>                    if (*output == -1) {

>>>>>                            *output = fd;

>>>>> +                       if (evsel->attr.write_backward)

>>>>> +                               mp->prot = PROT_READ;

>>>>> +                       else

>>>>> +                               mp->prot = PROT_READ | PROT_WRITE;

>>>>> +

>>>> If evlist->overwrite is true, PROT_WRITE should be unset even if

>>>> write_backward is

>>>> not set. If you want to delay the setting of mp->prot, you need to consider

>>>> both evlist->overwrite and evsel->attr.write_backward.

>>> I thought evsel->attr.write_backward should be set when

>>> evlist->overwrite is set.  Do you mean following case?

>>>

>>>     perf record --overwrite -e 'cycles/no-overwrite/'

>>>

>> No. evlist->overwrite is unrelated to '--overwrite'. This is why I

>> said the concept of 'overwrite' and 'backward' is ambiguous.

>>

>> perf record never sets 'evlist->overwrite'. What '--overwrite' actually

>> does is setting write_backward. Some testcases needs overwrite evlist.

> did not check deeply, but so why can't we do the below?

>

> jirka

>

>

> ---

> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

> index f4d9fc54b382..4643c487bd29 100644

> --- a/tools/perf/builtin-record.c

> +++ b/tools/perf/builtin-record.c

> @@ -300,7 +300,7 @@ static int record__mmap_evlist(struct record *rec,

>   	struct record_opts *opts = &rec->opts;

>   	char msg[512];

>   

> -	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,

> +	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->overwrite,

>   				 opts->auxtrace_mmap_pages,

>   				 opts->auxtrace_snapshot_mode) < 0) {


perf_evlist__mmap_ex's overwrite argument is used to control evlist->mmap.

Consider Namhyung's example:

   perf record --overwrite -e 'cycles/no-overwrite/'

In this case both evlist->mmap and evlist->backward_mmap would be set to overwrite.
'cycles' will be put into evlist->mmap, so it will be set to overwrite incorrectly.

Patch 2/2 clarifies these concepts. What we want is the flight recorder mode,
not only a read only ring buffer. Even flight recorder mode is selected, we can
still have a normal ring buffer which keep output data.

Thank you.
Liang, Kan Nov. 1, 2017, 1:57 p.m. UTC | #7
> On 2017/11/1 20:00, Namhyung Kim wrote:

> > On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

> >>

> >> On 2017/11/1 17:49, Namhyung Kim wrote:

> >>> Hi,

> >>>

> >>> On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:

> >>>> perf record backward recording doesn't work as we expected: it never

> >>>> overwrite when ring buffer full.

> >>>>

> >>>> Test:

> >>>>

> >>>> (Run a busy printing python task background like this:

> >>>>

> >>>>    while True:

> >>>>        print 123

> >>>>

> >>>> send SIGUSR2 to perf to capture snapshot.)

> >> [SNIP]

> >>

> >>>> Signed-off-by: Wang Nan <wangnan0@huawei.com>

> >>>> ---

> >>>>    tools/perf/util/evlist.c | 8 +++++++-

> >>>>    1 file changed, 7 insertions(+), 1 deletion(-)

> >>>>

> >>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> >>>> index c6c891e..4c5daba 100644

> >>>> --- a/tools/perf/util/evlist.c

> >>>> +++ b/tools/perf/util/evlist.c

> >>>> @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist

> *evlist __maybe_unused,

> >>>>    }

> >>>>    static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int

> idx,

> >>>> -				       struct mmap_params *mp, int cpu_idx,

> >>>> +				       struct mmap_params *_mp, int cpu_idx,

> >>>>    				       int thread, int *_output, int

> *_output_backward)

> >>>>    {

> >>>>    	struct perf_evsel *evsel;

> >>>>    	int revent;

> >>>>    	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

> >>>> +	struct mmap_params *mp;

> >>>>    	evlist__for_each_entry(evlist, evsel) {

> >>>>    		struct perf_mmap *maps = evlist->mmap;

> >>>> +		struct mmap_params rdonly_mp;

> >>>>    		int *output = _output;

> >>>>    		int fd;

> >>>>    		int cpu;

> >>>> +		mp = _mp;

> >>>>    		if (evsel->attr.write_backward) {

> >>>>    			output = _output_backward;

> >>>>    			maps = evlist->backward_mmap;

> >>>> +			rdonly_mp = *_mp;

> >>>> +			rdonly_mp.prot &= ~PROT_WRITE;

> >>>> +			mp = &rdonly_mp;

> >>>>    			if (!maps) {

> >>>>    				maps = perf_evlist__alloc_mmap(evlist);

> >>>> --

> >>> What about this instead (not tested)?

> >>>

> >>>

> >>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> >>> index c6c891e154a6..27ebe355e794 100644

> >>> --- a/tools/perf/util/evlist.c

> >>> +++ b/tools/perf/util/evlist.c

> >>> @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct

> perf_evlist *evlist, int idx,

> >>>                   if (*output == -1) {

> >>>                           *output = fd;

> >>> +                       if (evsel->attr.write_backward)

> >>> +                               mp->prot = PROT_READ;

> >>> +                       else

> >>> +                               mp->prot = PROT_READ | PROT_WRITE;

> >>> +

> >> If evlist->overwrite is true, PROT_WRITE should be unset even if

> >> write_backward is

> >> not set. If you want to delay the setting of mp->prot, you need to consider

> >> both evlist->overwrite and evsel->attr.write_backward.

> > I thought evsel->attr.write_backward should be set when

> > evlist->overwrite is set.  Do you mean following case?

> >

> >    perf record --overwrite -e 'cycles/no-overwrite/'

> >

> 

> No. evlist->overwrite is unrelated to '--overwrite'. This is why I

> said the concept of 'overwrite' and 'backward' is ambiguous.

>


Yes, I think we should make it clear.

As we discussed previously, there are four possible combinations
to access ring buffer , 'forward non-overwrite', 'forward overwrite',
'backward non-overwrite' and 'backward overwrite'.

Actually, not all of the combinations are necessary.
- 'forward overwrite' mode brings some problems which were mentioned
  in commit ID 9ecda41acb97 ("perf/core: Add ::write_backward attribute
  to perf event").
- 'backward non-overwrite' mode is very similar as 'forward non-overwrite'.
   There is no extra advantage. Only keep one non-overwrite mode is enough.
So 'forward non-overwrite' and 'backward overwrite' are enough for all perf tools.

Furthermore, 'forward' and 'backward' only indicate the direction of the
ring buffer. They don't impact the result and performance. It is not
important as the concept of overwrite/non-overwrite.

To simplify the concept, only 'non-overwrite' mode and 'overwrite' mode should
be kept. 'non-overwrite' mode indicates the forward ring buffer. 'overwrite' mode
indicates the backward ring buffer.

> perf record never sets 'evlist->overwrite'. What '--overwrite' actually

> does is setting write_backward. Some testcases needs overwrite evlist.

> 


There are only four test cases which set overwrite, sw-clock,task-exit,
mmap-basic, backward-ring-buffer.
Only backward-ring-buffer is 'backward overwrite'.
The rest three are all 'forward overwrite'. We just need to set write_backward
to convert them to 'backward overwrite'.
I think it's not hard to clean up.

Thanks,
Kan
Wang Nan Nov. 1, 2017, 4:12 p.m. UTC | #8
On 2017/11/1 21:57, Liang, Kan wrote:
>> On 2017/11/1 20:00, Namhyung Kim wrote:

>>> On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

>>>> On 2017/11/1 17:49, Namhyung Kim wrote:

>>>>> Hi,

>>>>>

>>>>> On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:

>>>>>> perf record backward recording doesn't work as we expected: it never

>>>>>> overwrite when ring buffer full.

>>>>>>

>>>>>> Test:

>>>>>>

>>>>>> (Run a busy printing python task background like this:

>>>>>>

>>>>>>     while True:

>>>>>>         print 123

>>>>>>

>>>>>> send SIGUSR2 to perf to capture snapshot.)

>>>> [SNIP]

>>>>

>>>>>> Signed-off-by: Wang Nan <wangnan0@huawei.com>

>>>>>> ---

>>>>>>     tools/perf/util/evlist.c | 8 +++++++-

>>>>>>     1 file changed, 7 insertions(+), 1 deletion(-)

>>>>>>

>>>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

>>>>>> index c6c891e..4c5daba 100644

>>>>>> --- a/tools/perf/util/evlist.c

>>>>>> +++ b/tools/perf/util/evlist.c

>>>>>> @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist

>> *evlist __maybe_unused,

>>>>>>     }

>>>>>>     static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int

>> idx,

>>>>>> -				       struct mmap_params *mp, int cpu_idx,

>>>>>> +				       struct mmap_params *_mp, int cpu_idx,

>>>>>>     				       int thread, int *_output, int

>> *_output_backward)

>>>>>>     {

>>>>>>     	struct perf_evsel *evsel;

>>>>>>     	int revent;

>>>>>>     	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

>>>>>> +	struct mmap_params *mp;

>>>>>>     	evlist__for_each_entry(evlist, evsel) {

>>>>>>     		struct perf_mmap *maps = evlist->mmap;

>>>>>> +		struct mmap_params rdonly_mp;

>>>>>>     		int *output = _output;

>>>>>>     		int fd;

>>>>>>     		int cpu;

>>>>>> +		mp = _mp;

>>>>>>     		if (evsel->attr.write_backward) {

>>>>>>     			output = _output_backward;

>>>>>>     			maps = evlist->backward_mmap;

>>>>>> +			rdonly_mp = *_mp;

>>>>>> +			rdonly_mp.prot &= ~PROT_WRITE;

>>>>>> +			mp = &rdonly_mp;

>>>>>>     			if (!maps) {

>>>>>>     				maps = perf_evlist__alloc_mmap(evlist);

>>>>>> --

>>>>> What about this instead (not tested)?

>>>>>

>>>>>

>>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

>>>>> index c6c891e154a6..27ebe355e794 100644

>>>>> --- a/tools/perf/util/evlist.c

>>>>> +++ b/tools/perf/util/evlist.c

>>>>> @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct

>> perf_evlist *evlist, int idx,

>>>>>                    if (*output == -1) {

>>>>>                            *output = fd;

>>>>> +                       if (evsel->attr.write_backward)

>>>>> +                               mp->prot = PROT_READ;

>>>>> +                       else

>>>>> +                               mp->prot = PROT_READ | PROT_WRITE;

>>>>> +

>>>> If evlist->overwrite is true, PROT_WRITE should be unset even if

>>>> write_backward is

>>>> not set. If you want to delay the setting of mp->prot, you need to consider

>>>> both evlist->overwrite and evsel->attr.write_backward.

>>> I thought evsel->attr.write_backward should be set when

>>> evlist->overwrite is set.  Do you mean following case?

>>>

>>>     perf record --overwrite -e 'cycles/no-overwrite/'

>>>

>> No. evlist->overwrite is unrelated to '--overwrite'. This is why I

>> said the concept of 'overwrite' and 'backward' is ambiguous.

>>

> Yes, I think we should make it clear.

>

> As we discussed previously, there are four possible combinations

> to access ring buffer , 'forward non-overwrite', 'forward overwrite',

> 'backward non-overwrite' and 'backward overwrite'.

>

> Actually, not all of the combinations are necessary.

> - 'forward overwrite' mode brings some problems which were mentioned

>    in commit ID 9ecda41acb97 ("perf/core: Add ::write_backward attribute

>    to perf event").

> - 'backward non-overwrite' mode is very similar as 'forward non-overwrite'.

>     There is no extra advantage. Only keep one non-overwrite mode is enough.

> So 'forward non-overwrite' and 'backward overwrite' are enough for all perf tools.

>

> Furthermore, 'forward' and 'backward' only indicate the direction of the

> ring buffer. They don't impact the result and performance. It is not

> important as the concept of overwrite/non-overwrite.

>

> To simplify the concept, only 'non-overwrite' mode and 'overwrite' mode should

> be kept. 'non-overwrite' mode indicates the forward ring buffer. 'overwrite' mode

> indicates the backward ring buffer.

>

>> perf record never sets 'evlist->overwrite'. What '--overwrite' actually

>> does is setting write_backward. Some testcases needs overwrite evlist.

>>

> There are only four test cases which set overwrite, sw-clock,task-exit,

> mmap-basic, backward-ring-buffer.

> Only backward-ring-buffer is 'backward overwrite'.

> The rest three are all 'forward overwrite'. We just need to set write_backward

> to convert them to 'backward overwrite'.

> I think it's not hard to clean up.


If we add a new rule that overwrite ring buffers are always backward
then it is not hard to cleanup. However, the support of forward
overwrite ring buffer has a long history and the code is not written
by me. I'd like to check if there is some reason to keep support this
configuration?

Thank you.
Liang, Kan Nov. 1, 2017, 4:22 p.m. UTC | #9
> On 2017/11/1 21:57, Liang, Kan wrote:

> >> On 2017/11/1 20:00, Namhyung Kim wrote:

> >>> On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

> >>>> On 2017/11/1 17:49, Namhyung Kim wrote:

> >>>>> Hi,

> >>>>>

> >>>>> On Wed, Nov 01, 2017 at 05:53:26AM +0000, Wang Nan wrote:

> >>>>>> perf record backward recording doesn't work as we expected: it

> >>>>>> never overwrite when ring buffer full.

> >>>>>>

> >>>>>> Test:

> >>>>>>

> >>>>>> (Run a busy printing python task background like this:

> >>>>>>

> >>>>>>     while True:

> >>>>>>         print 123

> >>>>>>

> >>>>>> send SIGUSR2 to perf to capture snapshot.)

> >>>> [SNIP]

> >>>>

> >>>>>> Signed-off-by: Wang Nan <wangnan0@huawei.com>

> >>>>>> ---

> >>>>>>     tools/perf/util/evlist.c | 8 +++++++-

> >>>>>>     1 file changed, 7 insertions(+), 1 deletion(-)

> >>>>>>

> >>>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> >>>>>> index c6c891e..4c5daba 100644

> >>>>>> --- a/tools/perf/util/evlist.c

> >>>>>> +++ b/tools/perf/util/evlist.c

> >>>>>> @@ -799,22 +799,28 @@ perf_evlist__should_poll(struct perf_evlist

> >> *evlist __maybe_unused,

> >>>>>>     }

> >>>>>>     static int perf_evlist__mmap_per_evsel(struct perf_evlist

> >>>>>> *evlist, int

> >> idx,

> >>>>>> -				       struct mmap_params *mp, int

> cpu_idx,

> >>>>>> +				       struct mmap_params *_mp, int

> cpu_idx,

> >>>>>>     				       int thread, int *_output, int

> >> *_output_backward)

> >>>>>>     {

> >>>>>>     	struct perf_evsel *evsel;

> >>>>>>     	int revent;

> >>>>>>     	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);

> >>>>>> +	struct mmap_params *mp;

> >>>>>>     	evlist__for_each_entry(evlist, evsel) {

> >>>>>>     		struct perf_mmap *maps = evlist->mmap;

> >>>>>> +		struct mmap_params rdonly_mp;

> >>>>>>     		int *output = _output;

> >>>>>>     		int fd;

> >>>>>>     		int cpu;

> >>>>>> +		mp = _mp;

> >>>>>>     		if (evsel->attr.write_backward) {

> >>>>>>     			output = _output_backward;

> >>>>>>     			maps = evlist->backward_mmap;

> >>>>>> +			rdonly_mp = *_mp;

> >>>>>> +			rdonly_mp.prot &= ~PROT_WRITE;

> >>>>>> +			mp = &rdonly_mp;

> >>>>>>     			if (!maps) {

> >>>>>>     				maps =

> perf_evlist__alloc_mmap(evlist);

> >>>>>> --

> >>>>> What about this instead (not tested)?

> >>>>>

> >>>>>

> >>>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> >>>>> index c6c891e154a6..27ebe355e794 100644

> >>>>> --- a/tools/perf/util/evlist.c

> >>>>> +++ b/tools/perf/util/evlist.c

> >>>>> @@ -838,6 +838,11 @@ static int

> perf_evlist__mmap_per_evsel(struct

> >> perf_evlist *evlist, int idx,

> >>>>>                    if (*output == -1) {

> >>>>>                            *output = fd;

> >>>>> +                       if (evsel->attr.write_backward)

> >>>>> +                               mp->prot = PROT_READ;

> >>>>> +                       else

> >>>>> +                               mp->prot = PROT_READ | PROT_WRITE;

> >>>>> +

> >>>> If evlist->overwrite is true, PROT_WRITE should be unset even if

> >>>> write_backward is not set. If you want to delay the setting of

> >>>> mp->prot, you need to consider both evlist->overwrite and

> >>>> evsel->attr.write_backward.

> >>> I thought evsel->attr.write_backward should be set when

> >>> evlist->overwrite is set.  Do you mean following case?

> >>>

> >>>     perf record --overwrite -e 'cycles/no-overwrite/'

> >>>

> >> No. evlist->overwrite is unrelated to '--overwrite'. This is why I

> >> said the concept of 'overwrite' and 'backward' is ambiguous.

> >>

> > Yes, I think we should make it clear.

> >

> > As we discussed previously, there are four possible combinations to

> > access ring buffer , 'forward non-overwrite', 'forward overwrite',

> > 'backward non-overwrite' and 'backward overwrite'.

> >

> > Actually, not all of the combinations are necessary.

> > - 'forward overwrite' mode brings some problems which were mentioned

> >    in commit ID 9ecda41acb97 ("perf/core: Add ::write_backward attribute

> >    to perf event").

> > - 'backward non-overwrite' mode is very similar as 'forward non-overwrite'.

> >     There is no extra advantage. Only keep one non-overwrite mode is

> enough.

> > So 'forward non-overwrite' and 'backward overwrite' are enough for all

> perf tools.

> >

> > Furthermore, 'forward' and 'backward' only indicate the direction of

> > the ring buffer. They don't impact the result and performance. It is

> > not important as the concept of overwrite/non-overwrite.

> >

> > To simplify the concept, only 'non-overwrite' mode and 'overwrite'

> > mode should be kept. 'non-overwrite' mode indicates the forward ring

> > buffer. 'overwrite' mode indicates the backward ring buffer.

> >

> >> perf record never sets 'evlist->overwrite'. What '--overwrite'

> >> actually does is setting write_backward. Some testcases needs overwrite

> evlist.

> >>

> > There are only four test cases which set overwrite,

> > sw-clock,task-exit, mmap-basic, backward-ring-buffer.

> > Only backward-ring-buffer is 'backward overwrite'.

> > The rest three are all 'forward overwrite'. We just need to set

> > write_backward to convert them to 'backward overwrite'.

> > I think it's not hard to clean up.

> 

> If we add a new rule that overwrite ring buffers are always backward then it

> is not hard to cleanup. However, the support of forward overwrite ring

> buffer has a long history and the code is not written by me. I'd like to check if

> there is some reason to keep support this configuration?

> 


As my observation, currently, there are no perf tools which support
'forward overwrite'.
There are only three test cases (sw-clock, task-exit, mmap-basic) which
is set to 'forward overwrite'. I don’t see any reason it cannot be changed to
'backward overwrite'

Arnaldo? Jirka? Kim?

What do you think?

Thanks,
Kan
Namhyung Kim Nov. 2, 2017, 5:34 a.m. UTC | #10
Hi Kan,

On Wed, Nov 01, 2017 at 04:22:53PM +0000, Liang, Kan wrote:
> > On 2017/11/1 21:57, Liang, Kan wrote:

> > >> On 2017/11/1 20:00, Namhyung Kim wrote:

> > >>> On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

> > > There are only four test cases which set overwrite,

> > > sw-clock,task-exit, mmap-basic, backward-ring-buffer.

> > > Only backward-ring-buffer is 'backward overwrite'.

> > > The rest three are all 'forward overwrite'. We just need to set

> > > write_backward to convert them to 'backward overwrite'.

> > > I think it's not hard to clean up.

> > 

> > If we add a new rule that overwrite ring buffers are always backward then it

> > is not hard to cleanup. However, the support of forward overwrite ring

> > buffer has a long history and the code is not written by me. I'd like to check if

> > there is some reason to keep support this configuration?

> > 

> 

> As my observation, currently, there are no perf tools which support

> 'forward overwrite'.

> There are only three test cases (sw-clock, task-exit, mmap-basic) which

> is set to 'forward overwrite'. I don’t see any reason it cannot be changed to

> 'backward overwrite'

> 

> Arnaldo? Jirka? Kim?

> 

> What do you think?


I think sw-clock, task-exit and mmap-basic test cases can be changed
to use the forward non-overwrite mode.

The forward overwrite might be used by externel applications accessing
the ring buffer directly but not needed for perf tools IMHO.  Let's
keep the code simpler as much as possible.

Thanks,
Namhyung
Liang, Kan Nov. 2, 2017, 1:25 p.m. UTC | #11
Hi Namhyung,

> On Wed, Nov 01, 2017 at 04:22:53PM +0000, Liang, Kan wrote:

> > > On 2017/11/1 21:57, Liang, Kan wrote:

> > > >> On 2017/11/1 20:00, Namhyung Kim wrote:

> > > >>> On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

> > > > There are only four test cases which set overwrite,

> > > > sw-clock,task-exit, mmap-basic, backward-ring-buffer.

> > > > Only backward-ring-buffer is 'backward overwrite'.

> > > > The rest three are all 'forward overwrite'. We just need to set

> > > > write_backward to convert them to 'backward overwrite'.

> > > > I think it's not hard to clean up.

> > >

> > > If we add a new rule that overwrite ring buffers are always backward

> > > then it is not hard to cleanup. However, the support of forward

> > > overwrite ring buffer has a long history and the code is not written

> > > by me. I'd like to check if there is some reason to keep support this

> configuration?

> > >

> >

> > As my observation, currently, there are no perf tools which support

> > 'forward overwrite'.

> > There are only three test cases (sw-clock, task-exit, mmap-basic)

> > which is set to 'forward overwrite'. I don’t see any reason it cannot

> > be changed to 'backward overwrite'

> >

> > Arnaldo? Jirka? Kim?

> >

> > What do you think?

> 

> I think sw-clock, task-exit and mmap-basic test cases can be changed to use

> the forward non-overwrite mode.

> 

> The forward overwrite might be used by externel applications accessing the

> ring buffer directly but not needed for perf tools IMHO.  


The proposal is only for perf tool, not kernel. So external applications can still
use forward overwrite to access the ring buffer.

> Let's keep the code simpler as much as possible.


Agree.
Now, there are too many options to access the ring buffer. Not all of them are
supported.
I think we should only keep the crucial options (overwrite/non-overwrite), clearly
define them in the document and cleanup the code.

Also, perf record doesn't use the generic interface (e.g. perf_evlist__mmap*) as other
tools to access ring buffer. Because the generic interface is hardcoded to only support
forward non-overwrite. We should cleanup it as well. But that could be done later
separately.

Thanks,
Kan
Jiri Olsa Nov. 2, 2017, 2:59 p.m. UTC | #12
On Thu, Nov 02, 2017 at 01:25:08PM +0000, Liang, Kan wrote:
> Hi Namhyung,

> 

> > On Wed, Nov 01, 2017 at 04:22:53PM +0000, Liang, Kan wrote:

> > > > On 2017/11/1 21:57, Liang, Kan wrote:

> > > > >> On 2017/11/1 20:00, Namhyung Kim wrote:

> > > > >>> On Wed, Nov 01, 2017 at 06:32:50PM +0800, Wangnan (F) wrote:

> > > > > There are only four test cases which set overwrite,

> > > > > sw-clock,task-exit, mmap-basic, backward-ring-buffer.

> > > > > Only backward-ring-buffer is 'backward overwrite'.

> > > > > The rest three are all 'forward overwrite'. We just need to set

> > > > > write_backward to convert them to 'backward overwrite'.

> > > > > I think it's not hard to clean up.

> > > >

> > > > If we add a new rule that overwrite ring buffers are always backward

> > > > then it is not hard to cleanup. However, the support of forward

> > > > overwrite ring buffer has a long history and the code is not written

> > > > by me. I'd like to check if there is some reason to keep support this

> > configuration?

> > > >

> > >

> > > As my observation, currently, there are no perf tools which support

> > > 'forward overwrite'.

> > > There are only three test cases (sw-clock, task-exit, mmap-basic)

> > > which is set to 'forward overwrite'. I don’t see any reason it cannot

> > > be changed to 'backward overwrite'

> > >

> > > Arnaldo? Jirka? Kim?

> > >

> > > What do you think?

> > 

> > I think sw-clock, task-exit and mmap-basic test cases can be changed to use

> > the forward non-overwrite mode.


agreed, we can change them to forward non-overwrite mode

> > The forward overwrite might be used by externel applications accessing the

> > ring buffer directly but not needed for perf tools IMHO.  

> 

> The proposal is only for perf tool, not kernel. So external applications can still

> use forward overwrite to access the ring buffer.

> 

> > Let's keep the code simpler as much as possible.

> 

> Agree.

> Now, there are too many options to access the ring buffer. Not all of them are

> supported.

> I think we should only keep the crucial options (overwrite/non-overwrite), clearly

> define them in the document and cleanup the code.


as you said earlier only 2 modes make sense, so I think perf record should have:

  - forward non-overwrite mode by default
  - backward overwrite mode when '--overwrite' option is set

and make it clear in the docs, maybe in special perf-record man page section

so far I still like the '--overwrite' option more than --flight-recorder' ;-)
also it's been out there for some time now

jirka
Jiri Olsa Nov. 2, 2017, 3:12 p.m. UTC | #13
On Wed, Nov 01, 2017 at 08:56:32PM +0800, Wangnan (F) wrote:
> 

> 

> On 2017/11/1 20:39, Jiri Olsa wrote:

> > On Wed, Nov 01, 2017 at 08:10:49PM +0800, Wangnan (F) wrote:

> > 

> > SNIP

> > 

> > > > > > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

> > > > > > index c6c891e154a6..27ebe355e794 100644

> > > > > > --- a/tools/perf/util/evlist.c

> > > > > > +++ b/tools/perf/util/evlist.c

> > > > > > @@ -838,6 +838,11 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,

> > > > > >                    if (*output == -1) {

> > > > > >                            *output = fd;

> > > > > > +                       if (evsel->attr.write_backward)

> > > > > > +                               mp->prot = PROT_READ;

> > > > > > +                       else

> > > > > > +                               mp->prot = PROT_READ | PROT_WRITE;

> > > > > > +

> > > > > If evlist->overwrite is true, PROT_WRITE should be unset even if

> > > > > write_backward is

> > > > > not set. If you want to delay the setting of mp->prot, you need to consider

> > > > > both evlist->overwrite and evsel->attr.write_backward.

> > > > I thought evsel->attr.write_backward should be set when

> > > > evlist->overwrite is set.  Do you mean following case?

> > > > 

> > > >     perf record --overwrite -e 'cycles/no-overwrite/'

> > > > 

> > > No. evlist->overwrite is unrelated to '--overwrite'. This is why I

> > > said the concept of 'overwrite' and 'backward' is ambiguous.

> > > 

> > > perf record never sets 'evlist->overwrite'. What '--overwrite' actually

> > > does is setting write_backward. Some testcases needs overwrite evlist.

> > did not check deeply, but so why can't we do the below?

> > 

> > jirka

> > 

> > 

> > ---

> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

> > index f4d9fc54b382..4643c487bd29 100644

> > --- a/tools/perf/builtin-record.c

> > +++ b/tools/perf/builtin-record.c

> > @@ -300,7 +300,7 @@ static int record__mmap_evlist(struct record *rec,

> >   	struct record_opts *opts = &rec->opts;

> >   	char msg[512];

> > -	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,

> > +	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->overwrite,

> >   				 opts->auxtrace_mmap_pages,

> >   				 opts->auxtrace_snapshot_mode) < 0) {

> 

> perf_evlist__mmap_ex's overwrite argument is used to control evlist->mmap.

> 

> Consider Namhyung's example:

> 

>   perf record --overwrite -e 'cycles/no-overwrite/'

> 

> In this case both evlist->mmap and evlist->backward_mmap would be set to overwrite.

> 'cycles' will be put into evlist->mmap, so it will be set to overwrite incorrectly.


right, missed the separate mmaps..

so we have some code that uses evlist->overwrite, which is always
set to 'false' in perf record.. but in the crucial checks like
for perf_mmap__consume we use the 'backward' bool to save the day

that might need some consolidation as well.. we could keep the
overwrite flag in the struct perf_mmap.. that could simplify the code

jirka
diff mbox series

Patch

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c6c891e..4c5daba 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -799,22 +799,28 @@  perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
 }
 
 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
-				       struct mmap_params *mp, int cpu_idx,
+				       struct mmap_params *_mp, int cpu_idx,
 				       int thread, int *_output, int *_output_backward)
 {
 	struct perf_evsel *evsel;
 	int revent;
 	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
+	struct mmap_params *mp;
 
 	evlist__for_each_entry(evlist, evsel) {
 		struct perf_mmap *maps = evlist->mmap;
+		struct mmap_params rdonly_mp;
 		int *output = _output;
 		int fd;
 		int cpu;
 
+		mp = _mp;
 		if (evsel->attr.write_backward) {
 			output = _output_backward;
 			maps = evlist->backward_mmap;
+			rdonly_mp = *_mp;
+			rdonly_mp.prot &= ~PROT_WRITE;
+			mp = &rdonly_mp;
 
 			if (!maps) {
 				maps = perf_evlist__alloc_mmap(evlist);