diff mbox series

[RFC,bpf-next,07/16] kallsyms: Use rb tree for kallsyms name search

Message ID 20201022082138.2322434-8-jolsa@kernel.org
State New
Headers show
Series bpf: Speed up trampoline attach | expand

Commit Message

Jiri Olsa Oct. 22, 2020, 8:21 a.m. UTC
The kallsyms_expand_symbol function showed in several bpf related
profiles, because it's doing linear search.

Before:

 Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
   { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):

     2,535,458,767      cycles:k                         ( +-  0.55% )
       940,046,382      cycles:u                         ( +-  0.27% )

             33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )

Loading all the vmlinux symbols in rbtree and and switch to rbtree
search in kallsyms_lookup_name function to save few cycles and time.

After:

 Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
   { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):

     2,199,433,771      cycles:k                         ( +-  0.55% )
       936,105,469      cycles:u                         ( +-  0.37% )

             26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )

Each symbol takes 160 bytes, so for my .config I've got about 18 MBs
used for 115285 symbols.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/kallsyms.c | 95 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 86 insertions(+), 9 deletions(-)

Comments

Jiri Olsa Oct. 28, 2020, 6:25 p.m. UTC | #1
On Thu, Oct 22, 2020 at 10:21:29AM +0200, Jiri Olsa wrote:
> The kallsyms_expand_symbol function showed in several bpf related
> profiles, because it's doing linear search.
> 
> Before:
> 
>  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
>    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> 
>      2,535,458,767      cycles:k                         ( +-  0.55% )
>        940,046,382      cycles:u                         ( +-  0.27% )
> 
>              33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )
> 
> Loading all the vmlinux symbols in rbtree and and switch to rbtree
> search in kallsyms_lookup_name function to save few cycles and time.
> 
> After:
> 
>  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
>    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> 
>      2,199,433,771      cycles:k                         ( +-  0.55% )
>        936,105,469      cycles:u                         ( +-  0.37% )
> 
>              26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )
> 
> Each symbol takes 160 bytes, so for my .config I've got about 18 MBs
> used for 115285 symbols.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>

FYI there's init_kprobes dependency on kallsyms_lookup_name in early
init call, so this won't work as it is :-\ will address this in v2

also I'll switch to sorted array and bsearch, because kallsyms is not
dynamically updated

jirka

> ---
>  kernel/kallsyms.c | 95 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 86 insertions(+), 9 deletions(-)
> 
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 4fb15fa96734..107c8284170e 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -50,6 +50,36 @@ extern const u16 kallsyms_token_index[] __weak;
>  
>  extern const unsigned int kallsyms_markers[] __weak;
>  
> +static struct kmem_cache *symbol_cachep;
> +
> +struct symbol {
> +	char		name[KSYM_NAME_LEN];
> +	unsigned long	addr;
> +	struct rb_node	rb_node;
> +};
> +
> +static struct rb_root symbols_root = RB_ROOT;
> +
> +static struct symbol *find_symbol(const char *name)
> +{
> +	struct symbol *sym;
> +	struct rb_node *n;
> +	int err;
> +
> +	n = symbols_root.rb_node;
> +	while (n) {
> +		sym = rb_entry(n, struct symbol, rb_node);
> +		err = strcmp(name, sym->name);
> +		if (err < 0)
> +			n = n->rb_left;
> +		else if (err > 0)
> +			n = n->rb_right;
> +		else
> +			return sym;
> +	}
> +	return NULL;
> +}
> +
>  /*
>   * Expand a compressed symbol data into the resulting uncompressed string,
>   * if uncompressed string is too long (>= maxlen), it will be truncated,
> @@ -164,16 +194,12 @@ static unsigned long kallsyms_sym_address(int idx)
>  /* Lookup the address for this symbol. Returns 0 if not found. */
>  unsigned long kallsyms_lookup_name(const char *name)
>  {
> -	char namebuf[KSYM_NAME_LEN];
> -	unsigned long i;
> -	unsigned int off;
> +	struct symbol *sym;
>  
> -	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
> -		off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
> +	sym = find_symbol(name);
> +	if (sym)
> +		return sym->addr;
>  
> -		if (strcmp(namebuf, name) == 0)
> -			return kallsyms_sym_address(i);
> -	}
>  	return module_kallsyms_lookup_name(name);
>  }
>  
> @@ -743,9 +769,60 @@ static const struct proc_ops kallsyms_proc_ops = {
>  	.proc_release	= seq_release_private,
>  };
>  
> +static bool __init add_symbol(struct symbol *new)
> +{
> +	struct rb_node *parent = NULL;
> +	struct rb_node **p;
> +	struct symbol *sym;
> +	int err;
> +
> +	p = &symbols_root.rb_node;
> +
> +	while (*p != NULL) {
> +		parent = *p;
> +		sym = rb_entry(parent, struct symbol, rb_node);
> +		err = strcmp(new->name, sym->name);
> +		if (err < 0)
> +			p = &(*p)->rb_left;
> +		else if (err > 0)
> +			p = &(*p)->rb_right;
> +		else
> +			return false;
> +	}
> +
> +	rb_link_node(&new->rb_node, parent, p);
> +	rb_insert_color(&new->rb_node, &symbols_root);
> +	return true;
> +}
> +
> +static int __init kallsyms_name_search_init(void)
> +{
> +	bool sym_added = true;
> +	struct symbol *sym;
> +	unsigned int off;
> +	unsigned long i;
> +
> +	symbol_cachep = KMEM_CACHE(symbol, SLAB_PANIC|SLAB_ACCOUNT);
> +
> +	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
> +		if (sym_added) {
> +			sym = kmem_cache_alloc(symbol_cachep, GFP_KERNEL);
> +			if (!sym)
> +				return -ENOMEM;
> +		}
> +		off = kallsyms_expand_symbol(off, sym->name, ARRAY_SIZE(sym->name));
> +		sym->addr = kallsyms_sym_address(i);
> +		sym_added = add_symbol(sym);
> +	}
> +
> +	if (!sym_added)
> +		kmem_cache_free(symbol_cachep, sym);
> +	return 0;
> +}
> +
>  static int __init kallsyms_init(void)
>  {
>  	proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops);
> -	return 0;
> +	return kallsyms_name_search_init();
>  }
>  device_initcall(kallsyms_init);
> -- 
> 2.26.2
>
Alexei Starovoitov Oct. 28, 2020, 9:15 p.m. UTC | #2
On Wed, Oct 28, 2020 at 07:25:34PM +0100, Jiri Olsa wrote:
> On Thu, Oct 22, 2020 at 10:21:29AM +0200, Jiri Olsa wrote:
> > The kallsyms_expand_symbol function showed in several bpf related
> > profiles, because it's doing linear search.
> > 
> > Before:
> > 
> >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> > 
> >      2,535,458,767      cycles:k                         ( +-  0.55% )
> >        940,046,382      cycles:u                         ( +-  0.27% )
> > 
> >              33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )
> > 
> > Loading all the vmlinux symbols in rbtree and and switch to rbtree
> > search in kallsyms_lookup_name function to save few cycles and time.
> > 
> > After:
> > 
> >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> > 
> >      2,199,433,771      cycles:k                         ( +-  0.55% )
> >        936,105,469      cycles:u                         ( +-  0.37% )
> > 
> >              26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )
> > 
> > Each symbol takes 160 bytes, so for my .config I've got about 18 MBs
> > used for 115285 symbols.
> > 
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> 
> FYI there's init_kprobes dependency on kallsyms_lookup_name in early
> init call, so this won't work as it is :-\ will address this in v2
> 
> also I'll switch to sorted array and bsearch, because kallsyms is not
> dynamically updated

wait wat? kallsyms are dynamically updated. bpf adds and removes from it.
You even worked on some of those patches :)
Andrii Nakryiko Oct. 28, 2020, 10:40 p.m. UTC | #3
On Wed, Oct 28, 2020 at 3:29 PM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Thu, Oct 22, 2020 at 10:21:29AM +0200, Jiri Olsa wrote:
> > The kallsyms_expand_symbol function showed in several bpf related
> > profiles, because it's doing linear search.
> >
> > Before:
> >
> >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> >
> >      2,535,458,767      cycles:k                         ( +-  0.55% )
> >        940,046,382      cycles:u                         ( +-  0.27% )
> >
> >              33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )
> >
> > Loading all the vmlinux symbols in rbtree and and switch to rbtree
> > search in kallsyms_lookup_name function to save few cycles and time.
> >
> > After:
> >
> >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> >
> >      2,199,433,771      cycles:k                         ( +-  0.55% )
> >        936,105,469      cycles:u                         ( +-  0.37% )
> >
> >              26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )
> >
> > Each symbol takes 160 bytes, so for my .config I've got about 18 MBs
> > used for 115285 symbols.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
>
> FYI there's init_kprobes dependency on kallsyms_lookup_name in early
> init call, so this won't work as it is :-\ will address this in v2
>
> also I'll switch to sorted array and bsearch, because kallsyms is not
> dynamically updated

what about kernel modules then?

>
> jirka
>
> > ---
> >  kernel/kallsyms.c | 95 ++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 86 insertions(+), 9 deletions(-)
> >

[...]
Jiri Olsa Oct. 29, 2020, 9:29 a.m. UTC | #4
On Wed, Oct 28, 2020 at 02:15:02PM -0700, Alexei Starovoitov wrote:
> On Wed, Oct 28, 2020 at 07:25:34PM +0100, Jiri Olsa wrote:
> > On Thu, Oct 22, 2020 at 10:21:29AM +0200, Jiri Olsa wrote:
> > > The kallsyms_expand_symbol function showed in several bpf related
> > > profiles, because it's doing linear search.
> > > 
> > > Before:
> > > 
> > >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> > >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> > > 
> > >      2,535,458,767      cycles:k                         ( +-  0.55% )
> > >        940,046,382      cycles:u                         ( +-  0.27% )
> > > 
> > >              33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )
> > > 
> > > Loading all the vmlinux symbols in rbtree and and switch to rbtree
> > > search in kallsyms_lookup_name function to save few cycles and time.
> > > 
> > > After:
> > > 
> > >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> > >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> > > 
> > >      2,199,433,771      cycles:k                         ( +-  0.55% )
> > >        936,105,469      cycles:u                         ( +-  0.37% )
> > > 
> > >              26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )
> > > 
> > > Each symbol takes 160 bytes, so for my .config I've got about 18 MBs
> > > used for 115285 symbols.
> > > 
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > 
> > FYI there's init_kprobes dependency on kallsyms_lookup_name in early
> > init call, so this won't work as it is :-\ will address this in v2
> > 
> > also I'll switch to sorted array and bsearch, because kallsyms is not
> > dynamically updated
> 
> wait wat? kallsyms are dynamically updated. bpf adds and removes from it.
> You even worked on some of those patches :)

yes, it's tricky ;-) kallsyms_lookup_name function goes through builtin
(compiled in) symbols and "standard modules" symbols

we add bpf symbols as "pseudo module" symbol, which is not covered by
this function search, it is covered when displaying /proc/kallsyms
(check get_ksymbol_bpf function), same for ftrace and kprobe symbols

AFAICS we use kallsyms_lookup_name only to search builtin kernel symbols,
so we don't care it does not cover "pseudo modules"

now.. what's even more funny, is that if I switch to sort/bsearch,
performance is back on the same numbers as the current code :-\

jirka
Jiri Olsa Oct. 29, 2020, 9:33 a.m. UTC | #5
On Wed, Oct 28, 2020 at 03:40:46PM -0700, Andrii Nakryiko wrote:
> On Wed, Oct 28, 2020 at 3:29 PM Jiri Olsa <jolsa@redhat.com> wrote:
> >
> > On Thu, Oct 22, 2020 at 10:21:29AM +0200, Jiri Olsa wrote:
> > > The kallsyms_expand_symbol function showed in several bpf related
> > > profiles, because it's doing linear search.
> > >
> > > Before:
> > >
> > >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> > >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> > >
> > >      2,535,458,767      cycles:k                         ( +-  0.55% )
> > >        940,046,382      cycles:u                         ( +-  0.27% )
> > >
> > >              33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )
> > >
> > > Loading all the vmlinux symbols in rbtree and and switch to rbtree
> > > search in kallsyms_lookup_name function to save few cycles and time.
> > >
> > > After:
> > >
> > >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \
> > >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):
> > >
> > >      2,199,433,771      cycles:k                         ( +-  0.55% )
> > >        936,105,469      cycles:u                         ( +-  0.37% )
> > >
> > >              26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )
> > >
> > > Each symbol takes 160 bytes, so for my .config I've got about 18 MBs
> > > used for 115285 symbols.
> > >
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> >
> > FYI there's init_kprobes dependency on kallsyms_lookup_name in early
> > init call, so this won't work as it is :-\ will address this in v2
> >
> > also I'll switch to sorted array and bsearch, because kallsyms is not
> > dynamically updated
> 
> what about kernel modules then?

please check my answer to Alexei, I just answered it there

thanks,
jirka

> 
> >
> > jirka
> >
> > > ---
> > >  kernel/kallsyms.c | 95 ++++++++++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 86 insertions(+), 9 deletions(-)
> > >
> 
> [...]
>
Andrii Nakryiko Oct. 29, 2020, 10:45 p.m. UTC | #6
On Thu, Oct 29, 2020 at 2:31 AM Jiri Olsa <jolsa@redhat.com> wrote:
>

> On Wed, Oct 28, 2020 at 02:15:02PM -0700, Alexei Starovoitov wrote:

> > On Wed, Oct 28, 2020 at 07:25:34PM +0100, Jiri Olsa wrote:

> > > On Thu, Oct 22, 2020 at 10:21:29AM +0200, Jiri Olsa wrote:

> > > > The kallsyms_expand_symbol function showed in several bpf related

> > > > profiles, because it's doing linear search.

> > > >

> > > > Before:

> > > >

> > > >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \

> > > >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):

> > > >

> > > >      2,535,458,767      cycles:k                         ( +-  0.55% )

> > > >        940,046,382      cycles:u                         ( +-  0.27% )

> > > >

> > > >              33.60 +- 3.27 seconds time elapsed  ( +-  9.73% )

> > > >

> > > > Loading all the vmlinux symbols in rbtree and and switch to rbtree

> > > > search in kallsyms_lookup_name function to save few cycles and time.

> > > >

> > > > After:

> > > >

> > > >  Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s* \

> > > >    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):

> > > >

> > > >      2,199,433,771      cycles:k                         ( +-  0.55% )

> > > >        936,105,469      cycles:u                         ( +-  0.37% )

> > > >

> > > >              26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )

> > > >

> > > > Each symbol takes 160 bytes, so for my .config I've got about 18 MBs

> > > > used for 115285 symbols.

> > > >

> > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>

> > >

> > > FYI there's init_kprobes dependency on kallsyms_lookup_name in early

> > > init call, so this won't work as it is :-\ will address this in v2

> > >

> > > also I'll switch to sorted array and bsearch, because kallsyms is not

> > > dynamically updated

> >

> > wait wat? kallsyms are dynamically updated. bpf adds and removes from it.

> > You even worked on some of those patches :)

>

> yes, it's tricky ;-) kallsyms_lookup_name function goes through builtin

> (compiled in) symbols and "standard modules" symbols

>

> we add bpf symbols as "pseudo module" symbol, which is not covered by

> this function search, it is covered when displaying /proc/kallsyms

> (check get_ksymbol_bpf function), same for ftrace and kprobe symbols

>

> AFAICS we use kallsyms_lookup_name only to search builtin kernel symbols,

> so we don't care it does not cover "pseudo modules"

>

> now.. what's even more funny, is that if I switch to sort/bsearch,

> performance is back on the same numbers as the current code :-\


If you do hashmap instead of RB tree or sort+bsearch, it will beat
both (assuming you have an adequate number of hash buckets, of
course).

>

> jirka

>
diff mbox series

Patch

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 4fb15fa96734..107c8284170e 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -50,6 +50,36 @@  extern const u16 kallsyms_token_index[] __weak;
 
 extern const unsigned int kallsyms_markers[] __weak;
 
+static struct kmem_cache *symbol_cachep;
+
+struct symbol {
+	char		name[KSYM_NAME_LEN];
+	unsigned long	addr;
+	struct rb_node	rb_node;
+};
+
+static struct rb_root symbols_root = RB_ROOT;
+
+static struct symbol *find_symbol(const char *name)
+{
+	struct symbol *sym;
+	struct rb_node *n;
+	int err;
+
+	n = symbols_root.rb_node;
+	while (n) {
+		sym = rb_entry(n, struct symbol, rb_node);
+		err = strcmp(name, sym->name);
+		if (err < 0)
+			n = n->rb_left;
+		else if (err > 0)
+			n = n->rb_right;
+		else
+			return sym;
+	}
+	return NULL;
+}
+
 /*
  * Expand a compressed symbol data into the resulting uncompressed string,
  * if uncompressed string is too long (>= maxlen), it will be truncated,
@@ -164,16 +194,12 @@  static unsigned long kallsyms_sym_address(int idx)
 /* Lookup the address for this symbol. Returns 0 if not found. */
 unsigned long kallsyms_lookup_name(const char *name)
 {
-	char namebuf[KSYM_NAME_LEN];
-	unsigned long i;
-	unsigned int off;
+	struct symbol *sym;
 
-	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
-		off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
+	sym = find_symbol(name);
+	if (sym)
+		return sym->addr;
 
-		if (strcmp(namebuf, name) == 0)
-			return kallsyms_sym_address(i);
-	}
 	return module_kallsyms_lookup_name(name);
 }
 
@@ -743,9 +769,60 @@  static const struct proc_ops kallsyms_proc_ops = {
 	.proc_release	= seq_release_private,
 };
 
+static bool __init add_symbol(struct symbol *new)
+{
+	struct rb_node *parent = NULL;
+	struct rb_node **p;
+	struct symbol *sym;
+	int err;
+
+	p = &symbols_root.rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		sym = rb_entry(parent, struct symbol, rb_node);
+		err = strcmp(new->name, sym->name);
+		if (err < 0)
+			p = &(*p)->rb_left;
+		else if (err > 0)
+			p = &(*p)->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, &symbols_root);
+	return true;
+}
+
+static int __init kallsyms_name_search_init(void)
+{
+	bool sym_added = true;
+	struct symbol *sym;
+	unsigned int off;
+	unsigned long i;
+
+	symbol_cachep = KMEM_CACHE(symbol, SLAB_PANIC|SLAB_ACCOUNT);
+
+	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+		if (sym_added) {
+			sym = kmem_cache_alloc(symbol_cachep, GFP_KERNEL);
+			if (!sym)
+				return -ENOMEM;
+		}
+		off = kallsyms_expand_symbol(off, sym->name, ARRAY_SIZE(sym->name));
+		sym->addr = kallsyms_sym_address(i);
+		sym_added = add_symbol(sym);
+	}
+
+	if (!sym_added)
+		kmem_cache_free(symbol_cachep, sym);
+	return 0;
+}
+
 static int __init kallsyms_init(void)
 {
 	proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops);
-	return 0;
+	return kallsyms_name_search_init();
 }
 device_initcall(kallsyms_init);