diff mbox series

perf cs-etm: Improve completeness for kernel address space

Message ID 20190617150024.11787-1-leo.yan@linaro.org
State New
Headers show
Series perf cs-etm: Improve completeness for kernel address space | expand

Commit Message

Leo Yan June 17, 2019, 3 p.m. UTC
Arm and arm64 architecture reserve some memory regions prior to the
symbol '_stext' and these memory regions later will be used by device
module and BPF jit.  The current code misses to consider these memory
regions thus any address in the regions will be taken as user space
mode, but perf cannot find the corresponding dso with the wrong CPU
mode so we misses to generate samples for device module and BPF
related trace data.

This patch parse the link scripts to get the memory size prior to start
address and reduce this size from 'etmq->etm->kernel_start', then can
get a fixed up kernel start address which contain memory regions for
device module and BPF.  Finally, cs_etm__cpu_mode() can return right
mode for these memory regions and perf can successfully generate
samples.

The reason for parsing the link scripts is Arm architecture changes text
offset dependent on different platforms, which define multiple text
offsets in $kernel/arch/arm/Makefile.  This offset is decided when build
kernel and the final value is extended in the link script, so we can
extract the used value from the link script.  We use the same way to
parse arm64 link script as well.  If fail to find the link script, the
pre start memory size is assumed as zero, in this case it has no any
change caused with this patch.

Below is detailed info for testing this patch:

- Build LLVM/Clang 8.0 or later version;

- Configure perf with ~/.perfconfig:

  root@debian:~# cat ~/.perfconfig
  # this file is auto-generated.
  [llvm]
          clang-path = /mnt/build/llvm-build/build/install/bin/clang
          kbuild-dir = /mnt/linux-kernel/linux-cs-dev/
          clang-opt = "-DLINUX_VERSION_CODE=0x50200 -g"
          dump-obj = true

  [trace]
          show_zeros = yes
          show_duration = no
          no_inherit = yes
          show_timestamp = no
          show_arg_names = no
          args_alignment = 40
          show_prefix = yes

- Run 'perf trace' command with eBPF event:

  root@debian:~# perf trace -e string \
      -e $kernel/tools/perf/examples/bpf/augmented_raw_syscalls.c

- Read eBPF program memory mapping in kernel:

  root@debian:~# echo 1 > /proc/sys/net/core/bpf_jit_kallsyms
  root@debian:~# cat /proc/kallsyms | grep -E "bpf_prog_.+_sys_[enter|exit]"
  ffff000000086a84 t bpf_prog_f173133dc38ccf87_sys_enter  [bpf]
  ffff000000088618 t bpf_prog_c1bd85c092d6e4aa_sys_exit   [bpf]

- Launch any program which accesses file system frequently so can hit
  the system calls trace flow with eBPF event;

- Capture CoreSight trace data with filtering eBPF program:

  root@debian:~# perf record -e cs_etm/@20070000.etr/ \
	  --filter 'filter 0xffff000000086a84/0x800' -a sleep 5s

- Annotate for symbol 'bpf_prog_f173133dc38ccf87_sys_enter':

  root@debian:~# perf report
  Then select 'branches' samples and press 'a' to annotate symbol
  'bpf_prog_f173133dc38ccf87_sys_enter', press 'P' to print to the
  bpf_prog_f173133dc38ccf87_sys_enter.annotation file:

  root@debian:~# cat bpf_prog_f173133dc38ccf87_sys_enter.annotation

  bpf_prog_f173133dc38ccf87_sys_enter() bpf_prog_f173133dc38ccf87_sys_enter
  Event: branches

  Percent      int sys_enter(struct syscall_enter_args *args)
                 stp  x29, x30, [sp, #-16]!

               	int key = 0;
                 mov  x29, sp

                       augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
                 stp  x19, x20, [sp, #-16]!

                       augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
                 stp  x21, x22, [sp, #-16]!

                 stp  x25, x26, [sp, #-16]!

               	return bpf_get_current_pid_tgid();
                 mov  x25, sp

               	return bpf_get_current_pid_tgid();
                 mov  x26, #0x0                   	// #0

                 sub  sp, sp, #0x10

               	return bpf_map_lookup_elem(pids, &pid) != NULL;
                 add  x19, x0, #0x0

                 mov  x0, #0x0                   	// #0

                 mov  x10, #0xfffffffffffffff8    	// #-8

               	if (pid_filter__has(&pids_filtered, getpid()))
                 str  w0, [x25, x10]

               	probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
                 add  x1, x25, #0x0

               	probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
                 mov  x10, #0xfffffffffffffff8    	// #-8

               	syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
                 add  x1, x1, x10

               	syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
                 mov  x0, #0xffff8009ffffffff    	// #-140694538682369

                 movk x0, #0x6698, lsl #16

                 movk x0, #0x3e00

                 mov  x10, #0xffffffffffff1040    	// #-61376

               	if (syscall == NULL || !syscall->enabled)
                 movk x10, #0x1023, lsl #16

               	if (syscall == NULL || !syscall->enabled)
                 movk x10, #0x0, lsl #32

               	loop_iter_first()
    3.69       → blr  bpf_prog_f173133dc38ccf87_sys_enter
               	loop_iter_first()
                 add  x7, x0, #0x0

               	loop_iter_first()
                 add  x20, x7, #0x0

               	int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg);
                 mov  x0, #0x1                   	// #1

  [...]

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Leo Yan <leo.yan@linaro.org>

---
 tools/perf/Makefile.config | 24 ++++++++++++++++++++++++
 tools/perf/util/cs-etm.c   | 26 +++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

-- 
2.17.1

Comments

Mathieu Poirier June 19, 2019, 5:49 p.m. UTC | #1
Hi Leo,

On Mon, 17 Jun 2019 at 09:00, Leo Yan <leo.yan@linaro.org> wrote:
>

> Arm and arm64 architecture reserve some memory regions prior to the

> symbol '_stext' and these memory regions later will be used by device

> module and BPF jit.  The current code misses to consider these memory

> regions thus any address in the regions will be taken as user space

> mode, but perf cannot find the corresponding dso with the wrong CPU

> mode so we misses to generate samples for device module and BPF

> related trace data.

>

> This patch parse the link scripts to get the memory size prior to start

> address and reduce this size from 'etmq->etm->kernel_start', then can

> get a fixed up kernel start address which contain memory regions for

> device module and BPF.  Finally, cs_etm__cpu_mode() can return right

> mode for these memory regions and perf can successfully generate

> samples.

>

> The reason for parsing the link scripts is Arm architecture changes text

> offset dependent on different platforms, which define multiple text

> offsets in $kernel/arch/arm/Makefile.  This offset is decided when build

> kernel and the final value is extended in the link script, so we can

> extract the used value from the link script.  We use the same way to

> parse arm64 link script as well.  If fail to find the link script, the

> pre start memory size is assumed as zero, in this case it has no any

> change caused with this patch.

>

> Below is detailed info for testing this patch:

>

> - Build LLVM/Clang 8.0 or later version;

>

> - Configure perf with ~/.perfconfig:

>

>   root@debian:~# cat ~/.perfconfig

>   # this file is auto-generated.

>   [llvm]

>           clang-path = /mnt/build/llvm-build/build/install/bin/clang

>           kbuild-dir = /mnt/linux-kernel/linux-cs-dev/

>           clang-opt = "-DLINUX_VERSION_CODE=0x50200 -g"

>           dump-obj = true

>

>   [trace]

>           show_zeros = yes

>           show_duration = no

>           no_inherit = yes

>           show_timestamp = no

>           show_arg_names = no

>           args_alignment = 40

>           show_prefix = yes

>

> - Run 'perf trace' command with eBPF event:

>

>   root@debian:~# perf trace -e string \

>       -e $kernel/tools/perf/examples/bpf/augmented_raw_syscalls.c

>

> - Read eBPF program memory mapping in kernel:

>

>   root@debian:~# echo 1 > /proc/sys/net/core/bpf_jit_kallsyms

>   root@debian:~# cat /proc/kallsyms | grep -E "bpf_prog_.+_sys_[enter|exit]"

>   ffff000000086a84 t bpf_prog_f173133dc38ccf87_sys_enter  [bpf]

>   ffff000000088618 t bpf_prog_c1bd85c092d6e4aa_sys_exit   [bpf]

>

> - Launch any program which accesses file system frequently so can hit

>   the system calls trace flow with eBPF event;

>

> - Capture CoreSight trace data with filtering eBPF program:

>

>   root@debian:~# perf record -e cs_etm/@20070000.etr/ \

>           --filter 'filter 0xffff000000086a84/0x800' -a sleep 5s

>

> - Annotate for symbol 'bpf_prog_f173133dc38ccf87_sys_enter':

>

>   root@debian:~# perf report

>   Then select 'branches' samples and press 'a' to annotate symbol

>   'bpf_prog_f173133dc38ccf87_sys_enter', press 'P' to print to the

>   bpf_prog_f173133dc38ccf87_sys_enter.annotation file:

>

>   root@debian:~# cat bpf_prog_f173133dc38ccf87_sys_enter.annotation

>

>   bpf_prog_f173133dc38ccf87_sys_enter() bpf_prog_f173133dc38ccf87_sys_enter

>   Event: branches

>

>   Percent      int sys_enter(struct syscall_enter_args *args)

>                  stp  x29, x30, [sp, #-16]!

>

>                 int key = 0;

>                  mov  x29, sp

>

>                        augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);

>                  stp  x19, x20, [sp, #-16]!

>

>                        augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);

>                  stp  x21, x22, [sp, #-16]!

>

>                  stp  x25, x26, [sp, #-16]!

>

>                 return bpf_get_current_pid_tgid();

>                  mov  x25, sp

>

>                 return bpf_get_current_pid_tgid();

>                  mov  x26, #0x0                         // #0

>

>                  sub  sp, sp, #0x10

>

>                 return bpf_map_lookup_elem(pids, &pid) != NULL;

>                  add  x19, x0, #0x0

>

>                  mov  x0, #0x0                          // #0

>

>                  mov  x10, #0xfffffffffffffff8          // #-8

>

>                 if (pid_filter__has(&pids_filtered, getpid()))

>                  str  w0, [x25, x10]

>

>                 probe_read(&augmented_args->args, sizeof(augmented_args->args), args);

>                  add  x1, x25, #0x0

>

>                 probe_read(&augmented_args->args, sizeof(augmented_args->args), args);

>                  mov  x10, #0xfffffffffffffff8          // #-8

>

>                 syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);

>                  add  x1, x1, x10

>

>                 syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);

>                  mov  x0, #0xffff8009ffffffff           // #-140694538682369

>

>                  movk x0, #0x6698, lsl #16

>

>                  movk x0, #0x3e00

>

>                  mov  x10, #0xffffffffffff1040          // #-61376

>

>                 if (syscall == NULL || !syscall->enabled)

>                  movk x10, #0x1023, lsl #16

>

>                 if (syscall == NULL || !syscall->enabled)

>                  movk x10, #0x0, lsl #32

>

>                 loop_iter_first()

>     3.69       → blr  bpf_prog_f173133dc38ccf87_sys_enter

>                 loop_iter_first()

>                  add  x7, x0, #0x0

>

>                 loop_iter_first()

>                  add  x20, x7, #0x0

>

>                 int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg);

>                  mov  x0, #0x1                          // #1

>

>   [...]

>

> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>

> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>

> Cc: Jiri Olsa <jolsa@redhat.com>

> Cc: Namhyung Kim <namhyung@kernel.org>

> Cc: Peter Zijlstra <peterz@infradead.org>

> Cc: Suzuki Poulouse <suzuki.poulose@arm.com>

> Cc: coresight@lists.linaro.org

> Cc: linux-arm-kernel@lists.infradead.org

> Signed-off-by: Leo Yan <leo.yan@linaro.org>

> ---

>  tools/perf/Makefile.config | 24 ++++++++++++++++++++++++

>  tools/perf/util/cs-etm.c   | 26 +++++++++++++++++++++++++-

>  2 files changed, 49 insertions(+), 1 deletion(-)

>

> diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config

> index 51dd00f65709..4776c2c1fb6d 100644

> --- a/tools/perf/Makefile.config

> +++ b/tools/perf/Makefile.config

> @@ -418,6 +418,30 @@ ifdef CORESIGHT

>      endif

>      LDFLAGS += $(LIBOPENCSD_LDFLAGS)

>      EXTLIBS += $(OPENCSDLIBS)

> +    ifneq ($(wildcard $(srctree)/arch/arm64/kernel/vmlinux.lds),)

> +      # Extract info from lds:

> +      #  . = ((((((((0xffffffffffffffff)) - (((1)) << (48)) + 1) + (0)) + (0x08000000))) + (0x08000000))) + 0x00080000;

> +      # ARM64_PRE_START_SIZE := (0x08000000 + 0x08000000 + 0x00080000)

> +      ARM64_PRE_START_SIZE := $(shell egrep ' \. \= \({8}0x[0-9a-fA-F]+\){2}' \

> +        $(srctree)/arch/arm64/kernel/vmlinux.lds | \

> +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> +        awk -F' ' '{print "("$$6 "+"  $$7 "+" $$8")"}' 2>/dev/null)

> +    else

> +      ARM64_PRE_START_SIZE := 0

> +    endif

> +    CFLAGS += -DARM64_PRE_START_SIZE="$(ARM64_PRE_START_SIZE)"

> +    ifneq ($(wildcard $(srctree)/arch/arm/kernel/vmlinux.lds),)

> +      # Extract info from lds:

> +      #   . = ((0xC0000000)) + 0x00208000;

> +      # ARM_PRE_START_SIZE := 0x00208000

> +      ARM_PRE_START_SIZE := $(shell egrep ' \. \= \({2}0x[0-9a-fA-F]+\){2}' \

> +        $(srctree)/arch/arm/kernel/vmlinux.lds | \

> +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> +        awk -F' ' '{print "("$$2")"}' 2>/dev/null)

> +    else

> +      ARM_PRE_START_SIZE := 0

> +    endif

> +    CFLAGS += -DARM_PRE_START_SIZE="$(ARM_PRE_START_SIZE)"

>      $(call detected,CONFIG_LIBOPENCSD)

>      ifdef CSTRACE_RAW

>        CFLAGS += -DCS_DEBUG_RAW

> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

> index 0c7776b51045..ae831f836c70 100644

> --- a/tools/perf/util/cs-etm.c

> +++ b/tools/perf/util/cs-etm.c

> @@ -613,10 +613,34 @@ static void cs_etm__free(struct perf_session *session)

>  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)

>  {

>         struct machine *machine;

> +       u64 fixup_kernel_start = 0;

> +       const char *arch;

>

>         machine = etmq->etm->machine;

> +       arch = perf_env__arch(machine->env);

>

> -       if (address >= etmq->etm->kernel_start) {

> +       /*

> +        * Since arm and arm64 specify some memory regions prior to

> +        * 'kernel_start', kernel addresses can be less than 'kernel_start'.

> +        *

> +        * For arm architecture, the 16MB virtual memory space prior to

> +        * 'kernel_start' is allocated to device modules, a PMD table if

> +        * CONFIG_HIGHMEM is enabled and a PGD table.

> +        *

> +        * For arm64 architecture, the root PGD table, device module memory

> +        * region and BPF jit region are prior to 'kernel_start'.

> +        *

> +        * To reflect the complete kernel address space, compensate these

> +        * pre-defined regions for kernel start address.

> +        */

> +       if (!strcmp(arch, "arm64"))

> +               fixup_kernel_start = etmq->etm->kernel_start -

> +                                    ARM64_PRE_START_SIZE;

> +       else if (!strcmp(arch, "arm"))

> +               fixup_kernel_start = etmq->etm->kernel_start -

> +                                    ARM_PRE_START_SIZE;


I will test your work but from a quick look wouldn't it be better to
have a single define name here?  From looking at the modifications you
did to Makefile.config there doesn't seem to be a reason to have two.

Thanks,
Mathieu

> +

> +       if (address >= fixup_kernel_start) {

>                 if (machine__is_host(machine))

>                         return PERF_RECORD_MISC_KERNEL;

>                 else

> --

> 2.17.1

>
Leo Yan June 20, 2019, 12:58 a.m. UTC | #2
Hi Mathieu,

On Wed, Jun 19, 2019 at 11:49:44AM -0600, Mathieu Poirier wrote:

[...]

> > diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config

> > index 51dd00f65709..4776c2c1fb6d 100644

> > --- a/tools/perf/Makefile.config

> > +++ b/tools/perf/Makefile.config

> > @@ -418,6 +418,30 @@ ifdef CORESIGHT

> >      endif

> >      LDFLAGS += $(LIBOPENCSD_LDFLAGS)

> >      EXTLIBS += $(OPENCSDLIBS)

> > +    ifneq ($(wildcard $(srctree)/arch/arm64/kernel/vmlinux.lds),)

> > +      # Extract info from lds:

> > +      #  . = ((((((((0xffffffffffffffff)) - (((1)) << (48)) + 1) + (0)) + (0x08000000))) + (0x08000000))) + 0x00080000;

> > +      # ARM64_PRE_START_SIZE := (0x08000000 + 0x08000000 + 0x00080000)

> > +      ARM64_PRE_START_SIZE := $(shell egrep ' \. \= \({8}0x[0-9a-fA-F]+\){2}' \

> > +        $(srctree)/arch/arm64/kernel/vmlinux.lds | \

> > +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> > +        awk -F' ' '{print "("$$6 "+"  $$7 "+" $$8")"}' 2>/dev/null)

> > +    else

> > +      ARM64_PRE_START_SIZE := 0

> > +    endif

> > +    CFLAGS += -DARM64_PRE_START_SIZE="$(ARM64_PRE_START_SIZE)"

> > +    ifneq ($(wildcard $(srctree)/arch/arm/kernel/vmlinux.lds),)

> > +      # Extract info from lds:

> > +      #   . = ((0xC0000000)) + 0x00208000;

> > +      # ARM_PRE_START_SIZE := 0x00208000

> > +      ARM_PRE_START_SIZE := $(shell egrep ' \. \= \({2}0x[0-9a-fA-F]+\){2}' \

> > +        $(srctree)/arch/arm/kernel/vmlinux.lds | \

> > +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> > +        awk -F' ' '{print "("$$2")"}' 2>/dev/null)

> > +    else

> > +      ARM_PRE_START_SIZE := 0

> > +    endif

> > +    CFLAGS += -DARM_PRE_START_SIZE="$(ARM_PRE_START_SIZE)"

> >      $(call detected,CONFIG_LIBOPENCSD)

> >      ifdef CSTRACE_RAW

> >        CFLAGS += -DCS_DEBUG_RAW

> > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

> > index 0c7776b51045..ae831f836c70 100644

> > --- a/tools/perf/util/cs-etm.c

> > +++ b/tools/perf/util/cs-etm.c

> > @@ -613,10 +613,34 @@ static void cs_etm__free(struct perf_session *session)

> >  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)

> >  {

> >         struct machine *machine;

> > +       u64 fixup_kernel_start = 0;

> > +       const char *arch;

> >

> >         machine = etmq->etm->machine;

> > +       arch = perf_env__arch(machine->env);

> >

> > -       if (address >= etmq->etm->kernel_start) {

> > +       /*

> > +        * Since arm and arm64 specify some memory regions prior to

> > +        * 'kernel_start', kernel addresses can be less than 'kernel_start'.

> > +        *

> > +        * For arm architecture, the 16MB virtual memory space prior to

> > +        * 'kernel_start' is allocated to device modules, a PMD table if

> > +        * CONFIG_HIGHMEM is enabled and a PGD table.

> > +        *

> > +        * For arm64 architecture, the root PGD table, device module memory

> > +        * region and BPF jit region are prior to 'kernel_start'.

> > +        *

> > +        * To reflect the complete kernel address space, compensate these

> > +        * pre-defined regions for kernel start address.

> > +        */

> > +       if (!strcmp(arch, "arm64"))

> > +               fixup_kernel_start = etmq->etm->kernel_start -

> > +                                    ARM64_PRE_START_SIZE;

> > +       else if (!strcmp(arch, "arm"))

> > +               fixup_kernel_start = etmq->etm->kernel_start -

> > +                                    ARM_PRE_START_SIZE;

> 

> I will test your work but from a quick look wouldn't it be better to

> have a single define name here?  From looking at the modifications you

> did to Makefile.config there doesn't seem to be a reason to have two.


Thanks for suggestion.  I changed to use single define
ARM_PRE_START_SIZE and sent patch v2 [1].

If possible, please test patch v2.

Thanks,
Leo Yan

[1] https://lore.kernel.org/linux-arm-kernel/20190620005428.20883-1-leo.yan@linaro.org/T/#u

> > +

> > +       if (address >= fixup_kernel_start) {

> >                 if (machine__is_host(machine))

> >                         return PERF_RECORD_MISC_KERNEL;

> >                 else

> > --

> > 2.17.1

> >
Arnaldo Carvalho de Melo June 24, 2019, 7 p.m. UTC | #3
Em Thu, Jun 20, 2019 at 08:58:29AM +0800, Leo Yan escreveu:
> Hi Mathieu,

> 

> On Wed, Jun 19, 2019 at 11:49:44AM -0600, Mathieu Poirier wrote:

> 

> [...]

> 

> > > diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config

> > > index 51dd00f65709..4776c2c1fb6d 100644

> > > --- a/tools/perf/Makefile.config

> > > +++ b/tools/perf/Makefile.config

> > > @@ -418,6 +418,30 @@ ifdef CORESIGHT

> > >      endif

> > >      LDFLAGS += $(LIBOPENCSD_LDFLAGS)

> > >      EXTLIBS += $(OPENCSDLIBS)

> > > +    ifneq ($(wildcard $(srctree)/arch/arm64/kernel/vmlinux.lds),)

> > > +      # Extract info from lds:

> > > +      #  . = ((((((((0xffffffffffffffff)) - (((1)) << (48)) + 1) + (0)) + (0x08000000))) + (0x08000000))) + 0x00080000;

> > > +      # ARM64_PRE_START_SIZE := (0x08000000 + 0x08000000 + 0x00080000)

> > > +      ARM64_PRE_START_SIZE := $(shell egrep ' \. \= \({8}0x[0-9a-fA-F]+\){2}' \

> > > +        $(srctree)/arch/arm64/kernel/vmlinux.lds | \

> > > +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> > > +        awk -F' ' '{print "("$$6 "+"  $$7 "+" $$8")"}' 2>/dev/null)

> > > +    else

> > > +      ARM64_PRE_START_SIZE := 0

> > > +    endif

> > > +    CFLAGS += -DARM64_PRE_START_SIZE="$(ARM64_PRE_START_SIZE)"

> > > +    ifneq ($(wildcard $(srctree)/arch/arm/kernel/vmlinux.lds),)

> > > +      # Extract info from lds:

> > > +      #   . = ((0xC0000000)) + 0x00208000;

> > > +      # ARM_PRE_START_SIZE := 0x00208000

> > > +      ARM_PRE_START_SIZE := $(shell egrep ' \. \= \({2}0x[0-9a-fA-F]+\){2}' \

> > > +        $(srctree)/arch/arm/kernel/vmlinux.lds | \

> > > +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> > > +        awk -F' ' '{print "("$$2")"}' 2>/dev/null)

> > > +    else

> > > +      ARM_PRE_START_SIZE := 0

> > > +    endif

> > > +    CFLAGS += -DARM_PRE_START_SIZE="$(ARM_PRE_START_SIZE)"

> > >      $(call detected,CONFIG_LIBOPENCSD)

> > >      ifdef CSTRACE_RAW

> > >        CFLAGS += -DCS_DEBUG_RAW

> > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

> > > index 0c7776b51045..ae831f836c70 100644

> > > --- a/tools/perf/util/cs-etm.c

> > > +++ b/tools/perf/util/cs-etm.c

> > > @@ -613,10 +613,34 @@ static void cs_etm__free(struct perf_session *session)

> > >  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)

> > >  {

> > >         struct machine *machine;

> > > +       u64 fixup_kernel_start = 0;

> > > +       const char *arch;

> > >

> > >         machine = etmq->etm->machine;

> > > +       arch = perf_env__arch(machine->env);

> > >

> > > -       if (address >= etmq->etm->kernel_start) {

> > > +       /*

> > > +        * Since arm and arm64 specify some memory regions prior to

> > > +        * 'kernel_start', kernel addresses can be less than 'kernel_start'.

> > > +        *

> > > +        * For arm architecture, the 16MB virtual memory space prior to

> > > +        * 'kernel_start' is allocated to device modules, a PMD table if

> > > +        * CONFIG_HIGHMEM is enabled and a PGD table.

> > > +        *

> > > +        * For arm64 architecture, the root PGD table, device module memory

> > > +        * region and BPF jit region are prior to 'kernel_start'.

> > > +        *

> > > +        * To reflect the complete kernel address space, compensate these

> > > +        * pre-defined regions for kernel start address.

> > > +        */

> > > +       if (!strcmp(arch, "arm64"))

> > > +               fixup_kernel_start = etmq->etm->kernel_start -

> > > +                                    ARM64_PRE_START_SIZE;

> > > +       else if (!strcmp(arch, "arm"))

> > > +               fixup_kernel_start = etmq->etm->kernel_start -

> > > +                                    ARM_PRE_START_SIZE;

> > 

> > I will test your work but from a quick look wouldn't it be better to

> > have a single define name here?  From looking at the modifications you

> > did to Makefile.config there doesn't seem to be a reason to have two.

> 

> Thanks for suggestion.  I changed to use single define

> ARM_PRE_START_SIZE and sent patch v2 [1].

> 

> If possible, please test patch v2.

> 

> Thanks,

> Leo Yan


So just for the record, I'm waiting for Mathieu on this one, i.e. for
him to test/ack v3.

- Arnaldo
 
> [1] https://lore.kernel.org/linux-arm-kernel/20190620005428.20883-1-leo.yan@linaro.org/T/#u

> 

> > > +

> > > +       if (address >= fixup_kernel_start) {

> > >                 if (machine__is_host(machine))

> > >                         return PERF_RECORD_MISC_KERNEL;

> > >                 else

> > > --

> > > 2.17.1

> > >


-- 

- Arnaldo
Leo Yan June 25, 2019, 4:51 a.m. UTC | #4
Hi Arnaldo,

On Mon, Jun 24, 2019 at 04:00:09PM -0300, Arnaldo Carvalho de Melo wrote:

[...]

> > > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

> > > > index 0c7776b51045..ae831f836c70 100644

> > > > --- a/tools/perf/util/cs-etm.c

> > > > +++ b/tools/perf/util/cs-etm.c

> > > > @@ -613,10 +613,34 @@ static void cs_etm__free(struct perf_session *session)

> > > >  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)

> > > >  {

> > > >         struct machine *machine;

> > > > +       u64 fixup_kernel_start = 0;

> > > > +       const char *arch;

> > > >

> > > >         machine = etmq->etm->machine;

> > > > +       arch = perf_env__arch(machine->env);

> > > >

> > > > -       if (address >= etmq->etm->kernel_start) {

> > > > +       /*

> > > > +        * Since arm and arm64 specify some memory regions prior to

> > > > +        * 'kernel_start', kernel addresses can be less than 'kernel_start'.

> > > > +        *

> > > > +        * For arm architecture, the 16MB virtual memory space prior to

> > > > +        * 'kernel_start' is allocated to device modules, a PMD table if

> > > > +        * CONFIG_HIGHMEM is enabled and a PGD table.

> > > > +        *

> > > > +        * For arm64 architecture, the root PGD table, device module memory

> > > > +        * region and BPF jit region are prior to 'kernel_start'.

> > > > +        *

> > > > +        * To reflect the complete kernel address space, compensate these

> > > > +        * pre-defined regions for kernel start address.

> > > > +        */

> > > > +       if (!strcmp(arch, "arm64"))

> > > > +               fixup_kernel_start = etmq->etm->kernel_start -

> > > > +                                    ARM64_PRE_START_SIZE;

> > > > +       else if (!strcmp(arch, "arm"))

> > > > +               fixup_kernel_start = etmq->etm->kernel_start -

> > > > +                                    ARM_PRE_START_SIZE;

> > > 

> > > I will test your work but from a quick look wouldn't it be better to

> > > have a single define name here?  From looking at the modifications you

> > > did to Makefile.config there doesn't seem to be a reason to have two.

> > 

> > Thanks for suggestion.  I changed to use single define

> > ARM_PRE_START_SIZE and sent patch v2 [1].

> > 

> > If possible, please test patch v2.

> > 

> > Thanks,

> > Leo Yan

> 

> So just for the record, I'm waiting for Mathieu on this one, i.e. for

> him to test/ack v3.


Yes, this makes sense.  I'd like to get Mathieu's green light as well,
it needs to take much time to build llvm/clang on SBC, so it's no rush.

Thanks,
Leo Yan
Mathieu Poirier June 25, 2019, 5:14 p.m. UTC | #5
On Mon, 24 Jun 2019 at 13:00, Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>

> Em Thu, Jun 20, 2019 at 08:58:29AM +0800, Leo Yan escreveu:

> > Hi Mathieu,

> >

> > On Wed, Jun 19, 2019 at 11:49:44AM -0600, Mathieu Poirier wrote:

> >

> > [...]

> >

> > > > diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config

> > > > index 51dd00f65709..4776c2c1fb6d 100644

> > > > --- a/tools/perf/Makefile.config

> > > > +++ b/tools/perf/Makefile.config

> > > > @@ -418,6 +418,30 @@ ifdef CORESIGHT

> > > >      endif

> > > >      LDFLAGS += $(LIBOPENCSD_LDFLAGS)

> > > >      EXTLIBS += $(OPENCSDLIBS)

> > > > +    ifneq ($(wildcard $(srctree)/arch/arm64/kernel/vmlinux.lds),)

> > > > +      # Extract info from lds:

> > > > +      #  . = ((((((((0xffffffffffffffff)) - (((1)) << (48)) + 1) + (0)) + (0x08000000))) + (0x08000000))) + 0x00080000;

> > > > +      # ARM64_PRE_START_SIZE := (0x08000000 + 0x08000000 + 0x00080000)

> > > > +      ARM64_PRE_START_SIZE := $(shell egrep ' \. \= \({8}0x[0-9a-fA-F]+\){2}' \

> > > > +        $(srctree)/arch/arm64/kernel/vmlinux.lds | \

> > > > +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> > > > +        awk -F' ' '{print "("$$6 "+"  $$7 "+" $$8")"}' 2>/dev/null)

> > > > +    else

> > > > +      ARM64_PRE_START_SIZE := 0

> > > > +    endif

> > > > +    CFLAGS += -DARM64_PRE_START_SIZE="$(ARM64_PRE_START_SIZE)"

> > > > +    ifneq ($(wildcard $(srctree)/arch/arm/kernel/vmlinux.lds),)

> > > > +      # Extract info from lds:

> > > > +      #   . = ((0xC0000000)) + 0x00208000;

> > > > +      # ARM_PRE_START_SIZE := 0x00208000

> > > > +      ARM_PRE_START_SIZE := $(shell egrep ' \. \= \({2}0x[0-9a-fA-F]+\){2}' \

> > > > +        $(srctree)/arch/arm/kernel/vmlinux.lds | \

> > > > +        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \

> > > > +        awk -F' ' '{print "("$$2")"}' 2>/dev/null)

> > > > +    else

> > > > +      ARM_PRE_START_SIZE := 0

> > > > +    endif

> > > > +    CFLAGS += -DARM_PRE_START_SIZE="$(ARM_PRE_START_SIZE)"

> > > >      $(call detected,CONFIG_LIBOPENCSD)

> > > >      ifdef CSTRACE_RAW

> > > >        CFLAGS += -DCS_DEBUG_RAW

> > > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

> > > > index 0c7776b51045..ae831f836c70 100644

> > > > --- a/tools/perf/util/cs-etm.c

> > > > +++ b/tools/perf/util/cs-etm.c

> > > > @@ -613,10 +613,34 @@ static void cs_etm__free(struct perf_session *session)

> > > >  static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)

> > > >  {

> > > >         struct machine *machine;

> > > > +       u64 fixup_kernel_start = 0;

> > > > +       const char *arch;

> > > >

> > > >         machine = etmq->etm->machine;

> > > > +       arch = perf_env__arch(machine->env);

> > > >

> > > > -       if (address >= etmq->etm->kernel_start) {

> > > > +       /*

> > > > +        * Since arm and arm64 specify some memory regions prior to

> > > > +        * 'kernel_start', kernel addresses can be less than 'kernel_start'.

> > > > +        *

> > > > +        * For arm architecture, the 16MB virtual memory space prior to

> > > > +        * 'kernel_start' is allocated to device modules, a PMD table if

> > > > +        * CONFIG_HIGHMEM is enabled and a PGD table.

> > > > +        *

> > > > +        * For arm64 architecture, the root PGD table, device module memory

> > > > +        * region and BPF jit region are prior to 'kernel_start'.

> > > > +        *

> > > > +        * To reflect the complete kernel address space, compensate these

> > > > +        * pre-defined regions for kernel start address.

> > > > +        */

> > > > +       if (!strcmp(arch, "arm64"))

> > > > +               fixup_kernel_start = etmq->etm->kernel_start -

> > > > +                                    ARM64_PRE_START_SIZE;

> > > > +       else if (!strcmp(arch, "arm"))

> > > > +               fixup_kernel_start = etmq->etm->kernel_start -

> > > > +                                    ARM_PRE_START_SIZE;

> > >

> > > I will test your work but from a quick look wouldn't it be better to

> > > have a single define name here?  From looking at the modifications you

> > > did to Makefile.config there doesn't seem to be a reason to have two.

> >

> > Thanks for suggestion.  I changed to use single define

> > ARM_PRE_START_SIZE and sent patch v2 [1].

> >

> > If possible, please test patch v2.

> >

> > Thanks,

> > Leo Yan

>

> So just for the record, I'm waiting for Mathieu on this one, i.e. for

> him to test/ack v3.


Right, please give me some time to test this.  As Leo indicated the
procedure is time consuming.

Thanks,
Mathieu

>

> - Arnaldo

>

> > [1] https://lore.kernel.org/linux-arm-kernel/20190620005428.20883-1-leo.yan@linaro.org/T/#u

> >

> > > > +

> > > > +       if (address >= fixup_kernel_start) {

> > > >                 if (machine__is_host(machine))

> > > >                         return PERF_RECORD_MISC_KERNEL;

> > > >                 else

> > > > --

> > > > 2.17.1

> > > >

>

> --

>

> - Arnaldo
diff mbox series

Patch

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 51dd00f65709..4776c2c1fb6d 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -418,6 +418,30 @@  ifdef CORESIGHT
     endif
     LDFLAGS += $(LIBOPENCSD_LDFLAGS)
     EXTLIBS += $(OPENCSDLIBS)
+    ifneq ($(wildcard $(srctree)/arch/arm64/kernel/vmlinux.lds),)
+      # Extract info from lds:
+      #  . = ((((((((0xffffffffffffffff)) - (((1)) << (48)) + 1) + (0)) + (0x08000000))) + (0x08000000))) + 0x00080000;
+      # ARM64_PRE_START_SIZE := (0x08000000 + 0x08000000 + 0x00080000)
+      ARM64_PRE_START_SIZE := $(shell egrep ' \. \= \({8}0x[0-9a-fA-F]+\){2}' \
+        $(srctree)/arch/arm64/kernel/vmlinux.lds | \
+        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \
+        awk -F' ' '{print "("$$6 "+"  $$7 "+" $$8")"}' 2>/dev/null)
+    else
+      ARM64_PRE_START_SIZE := 0
+    endif
+    CFLAGS += -DARM64_PRE_START_SIZE="$(ARM64_PRE_START_SIZE)"
+    ifneq ($(wildcard $(srctree)/arch/arm/kernel/vmlinux.lds),)
+      # Extract info from lds:
+      #   . = ((0xC0000000)) + 0x00208000;
+      # ARM_PRE_START_SIZE := 0x00208000
+      ARM_PRE_START_SIZE := $(shell egrep ' \. \= \({2}0x[0-9a-fA-F]+\){2}' \
+        $(srctree)/arch/arm/kernel/vmlinux.lds | \
+        sed -e 's/[(|)|.|=|+|<|;|-]//g' -e 's/ \+/ /g' -e 's/^[ \t]*//' | \
+        awk -F' ' '{print "("$$2")"}' 2>/dev/null)
+    else
+      ARM_PRE_START_SIZE := 0
+    endif
+    CFLAGS += -DARM_PRE_START_SIZE="$(ARM_PRE_START_SIZE)"
     $(call detected,CONFIG_LIBOPENCSD)
     ifdef CSTRACE_RAW
       CFLAGS += -DCS_DEBUG_RAW
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 0c7776b51045..ae831f836c70 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -613,10 +613,34 @@  static void cs_etm__free(struct perf_session *session)
 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
 {
 	struct machine *machine;
+	u64 fixup_kernel_start = 0;
+	const char *arch;
 
 	machine = etmq->etm->machine;
+	arch = perf_env__arch(machine->env);
 
-	if (address >= etmq->etm->kernel_start) {
+	/*
+	 * Since arm and arm64 specify some memory regions prior to
+	 * 'kernel_start', kernel addresses can be less than 'kernel_start'.
+	 *
+	 * For arm architecture, the 16MB virtual memory space prior to
+	 * 'kernel_start' is allocated to device modules, a PMD table if
+	 * CONFIG_HIGHMEM is enabled and a PGD table.
+	 *
+	 * For arm64 architecture, the root PGD table, device module memory
+	 * region and BPF jit region are prior to 'kernel_start'.
+	 *
+	 * To reflect the complete kernel address space, compensate these
+	 * pre-defined regions for kernel start address.
+	 */
+	if (!strcmp(arch, "arm64"))
+		fixup_kernel_start = etmq->etm->kernel_start -
+				     ARM64_PRE_START_SIZE;
+	else if (!strcmp(arch, "arm"))
+		fixup_kernel_start = etmq->etm->kernel_start -
+				     ARM_PRE_START_SIZE;
+
+	if (address >= fixup_kernel_start) {
 		if (machine__is_host(machine))
 			return PERF_RECORD_MISC_KERNEL;
 		else