diff mbox series

[2/3] bpf_encoder: Translate SHN_XINDEX in symbol's st_shndx values

Message ID 20210119221220.1745061-3-jolsa@kernel.org
State New
Headers show
Series dwarves,libbpf: Add support to use optional extended section index table | expand

Commit Message

Jiri Olsa Jan. 19, 2021, 10:12 p.m. UTC
For very large ELF objects (with many sections), we could
get special value SHN_XINDEX (65535) for symbol's st_shndx.

This patch is adding code to detect the optional extended
section index table and use it to resolve symbol's section
index id needed.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 btf_encoder.c | 18 ++++++++++++++++++
 elf_symtab.c  | 31 ++++++++++++++++++++++++++++++-
 elf_symtab.h  |  1 +
 3 files changed, 49 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Jan. 20, 2021, 2:03 a.m. UTC | #1
On Tue, Jan 19, 2021 at 2:16 PM Jiri Olsa <jolsa@kernel.org> wrote:
>

> For very large ELF objects (with many sections), we could

> get special value SHN_XINDEX (65535) for symbol's st_shndx.

>

> This patch is adding code to detect the optional extended

> section index table and use it to resolve symbol's section

> index id needed.

>

> Signed-off-by: Jiri Olsa <jolsa@kernel.org>

> ---

>  btf_encoder.c | 18 ++++++++++++++++++

>  elf_symtab.c  | 31 ++++++++++++++++++++++++++++++-

>  elf_symtab.h  |  1 +

>  3 files changed, 49 insertions(+), 1 deletion(-)

>

> diff --git a/btf_encoder.c b/btf_encoder.c

> index 5557c9efd365..2ab6815dc2b3 100644

> --- a/btf_encoder.c

> +++ b/btf_encoder.c

> @@ -609,6 +609,24 @@ static int collect_symbols(struct btf_elf *btfe, bool collect_percpu_vars)

>

>         /* search within symtab for percpu variables */

>         elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {


How about we introduce elf_symtab__for_each_symbol variant that uses
gelf_getsymshndx undercover and returns a real section index as the
4th macro argument?

> +               struct elf_symtab *symtab = btfe->symtab;

> +

> +               /*

> +                * Symbol's st_shndx needs to be translated with the

> +                * extended section index table.

> +                */

> +               if (sym.st_shndx == SHN_XINDEX) {

> +                       Elf32_Word xndx;

> +

> +                       if (!symtab->syms_shndx) {

> +                               fprintf(stderr, "SHN_XINDEX found, but no symtab shndx section.\n");

> +                               return -1;

> +                       }

> +

> +                       if (gelf_getsymshndx(symtab->syms, symtab->syms_shndx, core_id, &sym, &xndx))

> +                               sym.st_shndx = xndx;


This bit makes me really nervous and it looks very unclean, which is
why I think providing correct section index as part of iterator macro
is better approach. And all this code would just go away.

> +               }

> +

>                 if (collect_percpu_vars && collect_percpu_var(btfe, &sym))

>                         return -1;

>                 if (collect_function(btfe, &sym))

> diff --git a/elf_symtab.c b/elf_symtab.c

> index 741990ea3ed9..c1def0189652 100644

> --- a/elf_symtab.c

> +++ b/elf_symtab.c

> @@ -17,11 +17,13 @@

>

>  struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)

>  {

> +       size_t index;

> +

>         if (name == NULL)

>                 name = ".symtab";

>

>         GElf_Shdr shdr;

> -       Elf_Scn *sec = elf_section_by_name(elf, ehdr, &shdr, name, NULL);

> +       Elf_Scn *sec = elf_section_by_name(elf, ehdr, &shdr, name, &index);

>

>         if (sec == NULL)

>                 return NULL;

> @@ -29,6 +31,8 @@ struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)

>         if (gelf_getshdr(sec, &shdr) == NULL)

>                 return NULL;

>

> +       int xindex = elf_scnshndx(sec);


move this closer to where you check (xindex > 0)? Can you please leave
a small comment that this returns extended section index table's
section index (I know, this is horrible), if it exists. It's
notoriously hard to find anything about libelf's API, especially for
obscure APIs like this.

> +

>         struct elf_symtab *symtab = malloc(sizeof(*symtab));

>         if (symtab == NULL)

>                 return NULL;

> @@ -49,6 +53,31 @@ struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)

>         if (symtab->symstrs == NULL)

>                 goto out_free_name;

>

> +       /*

> +        * The .symtab section has optional extended section index

> +        * table, load its data so it can be used to resolve symbol's

> +        * section index.

> +        **/

> +       if (xindex > 0) {

> +               GElf_Shdr shdr_shndx;

> +               Elf_Scn *sec_shndx;

> +

> +               sec_shndx = elf_getscn(elf, xindex);

> +               if (sec_shndx == NULL)

> +                       goto out_free_name;

> +

> +               if (gelf_getshdr(sec_shndx, &shdr_shndx) == NULL)

> +                       goto out_free_name;

> +

> +               /* Extra check to verify it belongs to the .symtab */

> +               if (index != shdr_shndx.sh_link)

> +                       goto out_free_name;


you can also verify that section type is SHT_SYMTAB_SHNDX

> +

> +               symtab->syms_shndx = elf_getdata(elf_getscn(elf, xindex), NULL);


my mind breaks looking at all those shndxs, especially in this case
where it's not a section number, but rather data. How about we call it
something like symtab->syms_sec_idx_table or something similar but
human-comprehensible?

> +               if (symtab->syms_shndx == NULL)

> +                       goto out_free_name;

> +       }

> +

>         symtab->nr_syms = shdr.sh_size / shdr.sh_entsize;

>

>         return symtab;

> diff --git a/elf_symtab.h b/elf_symtab.h

> index 359add69c8ab..f9277a48ed4c 100644

> --- a/elf_symtab.h

> +++ b/elf_symtab.h

> @@ -16,6 +16,7 @@ struct elf_symtab {

>         uint32_t  nr_syms;

>         Elf_Data  *syms;

>         Elf_Data  *symstrs;

> +       Elf_Data  *syms_shndx;


please add comment mentioning that it's data of SHT_SYMTAB_SHNDX
section, it will make it a bit easier to Google what that is

>         char      *name;

>  };

>

> --

> 2.27.0

>
Jiri Olsa Jan. 20, 2021, 12:25 p.m. UTC | #2
On Tue, Jan 19, 2021 at 06:03:28PM -0800, Andrii Nakryiko wrote:
> On Tue, Jan 19, 2021 at 2:16 PM Jiri Olsa <jolsa@kernel.org> wrote:

> >

> > For very large ELF objects (with many sections), we could

> > get special value SHN_XINDEX (65535) for symbol's st_shndx.

> >

> > This patch is adding code to detect the optional extended

> > section index table and use it to resolve symbol's section

> > index id needed.

> >

> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>

> > ---

> >  btf_encoder.c | 18 ++++++++++++++++++

> >  elf_symtab.c  | 31 ++++++++++++++++++++++++++++++-

> >  elf_symtab.h  |  1 +

> >  3 files changed, 49 insertions(+), 1 deletion(-)

> >

> > diff --git a/btf_encoder.c b/btf_encoder.c

> > index 5557c9efd365..2ab6815dc2b3 100644

> > --- a/btf_encoder.c

> > +++ b/btf_encoder.c

> > @@ -609,6 +609,24 @@ static int collect_symbols(struct btf_elf *btfe, bool collect_percpu_vars)

> >

> >         /* search within symtab for percpu variables */

> >         elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {

> 

> How about we introduce elf_symtab__for_each_symbol variant that uses

> gelf_getsymshndx undercover and returns a real section index as the

> 4th macro argument?


ok, that's good idea

> 

> > +               struct elf_symtab *symtab = btfe->symtab;

> > +

> > +               /*

> > +                * Symbol's st_shndx needs to be translated with the

> > +                * extended section index table.

> > +                */

> > +               if (sym.st_shndx == SHN_XINDEX) {

> > +                       Elf32_Word xndx;

> > +

> > +                       if (!symtab->syms_shndx) {

> > +                               fprintf(stderr, "SHN_XINDEX found, but no symtab shndx section.\n");

> > +                               return -1;

> > +                       }

> > +

> > +                       if (gelf_getsymshndx(symtab->syms, symtab->syms_shndx, core_id, &sym, &xndx))

> > +                               sym.st_shndx = xndx;

> 

> This bit makes me really nervous and it looks very unclean, which is

> why I think providing correct section index as part of iterator macro

> is better approach. And all this code would just go away.


ok

> 

> > +               }

> > +

> >                 if (collect_percpu_vars && collect_percpu_var(btfe, &sym))

> >                         return -1;

> >                 if (collect_function(btfe, &sym))

> > diff --git a/elf_symtab.c b/elf_symtab.c

> > index 741990ea3ed9..c1def0189652 100644

> > --- a/elf_symtab.c

> > +++ b/elf_symtab.c

> > @@ -17,11 +17,13 @@

> >

> >  struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)

> >  {

> > +       size_t index;

> > +

> >         if (name == NULL)

> >                 name = ".symtab";

> >

> >         GElf_Shdr shdr;

> > -       Elf_Scn *sec = elf_section_by_name(elf, ehdr, &shdr, name, NULL);

> > +       Elf_Scn *sec = elf_section_by_name(elf, ehdr, &shdr, name, &index);

> >

> >         if (sec == NULL)

> >                 return NULL;

> > @@ -29,6 +31,8 @@ struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)

> >         if (gelf_getshdr(sec, &shdr) == NULL)

> >                 return NULL;

> >

> > +       int xindex = elf_scnshndx(sec);

> 

> move this closer to where you check (xindex > 0)? Can you please leave

> a small comment that this returns extended section index table's

> section index (I know, this is horrible), if it exists. It's

> notoriously hard to find anything about libelf's API, especially for

> obscure APIs like this.


it's here because 'sec' gets overwitten shortly on with string table
I'll add some comment

> 

> > +

> >         struct elf_symtab *symtab = malloc(sizeof(*symtab));

> >         if (symtab == NULL)

> >                 return NULL;

> > @@ -49,6 +53,31 @@ struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)

> >         if (symtab->symstrs == NULL)

> >                 goto out_free_name;

> >

> > +       /*

> > +        * The .symtab section has optional extended section index

> > +        * table, load its data so it can be used to resolve symbol's

> > +        * section index.

> > +        **/

> > +       if (xindex > 0) {

> > +               GElf_Shdr shdr_shndx;

> > +               Elf_Scn *sec_shndx;

> > +

> > +               sec_shndx = elf_getscn(elf, xindex);

> > +               if (sec_shndx == NULL)

> > +                       goto out_free_name;

> > +

> > +               if (gelf_getshdr(sec_shndx, &shdr_shndx) == NULL)

> > +                       goto out_free_name;

> > +

> > +               /* Extra check to verify it belongs to the .symtab */

> > +               if (index != shdr_shndx.sh_link)

> > +                       goto out_free_name;

> 

> you can also verify that section type is SHT_SYMTAB_SHNDX


ok

> 

> > +

> > +               symtab->syms_shndx = elf_getdata(elf_getscn(elf, xindex), NULL);

> 

> my mind breaks looking at all those shndxs, especially in this case


you better not look at elfutils code then ;-)

> where it's not a section number, but rather data. How about we call it

> something like symtab->syms_sec_idx_table or something similar but

> human-comprehensible?


sure, will change

> 

> > +               if (symtab->syms_shndx == NULL)

> > +                       goto out_free_name;

> > +       }

> > +

> >         symtab->nr_syms = shdr.sh_size / shdr.sh_entsize;

> >

> >         return symtab;

> > diff --git a/elf_symtab.h b/elf_symtab.h

> > index 359add69c8ab..f9277a48ed4c 100644

> > --- a/elf_symtab.h

> > +++ b/elf_symtab.h

> > @@ -16,6 +16,7 @@ struct elf_symtab {

> >         uint32_t  nr_syms;

> >         Elf_Data  *syms;

> >         Elf_Data  *symstrs;

> > +       Elf_Data  *syms_shndx;

> 

> please add comment mentioning that it's data of SHT_SYMTAB_SHNDX

> section, it will make it a bit easier to Google what that is


ok

thanks,
jirka
diff mbox series

Patch

diff --git a/btf_encoder.c b/btf_encoder.c
index 5557c9efd365..2ab6815dc2b3 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -609,6 +609,24 @@  static int collect_symbols(struct btf_elf *btfe, bool collect_percpu_vars)
 
 	/* search within symtab for percpu variables */
 	elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
+		struct elf_symtab *symtab = btfe->symtab;
+
+		/*
+		 * Symbol's st_shndx needs to be translated with the
+		 * extended section index table.
+		 */
+		if (sym.st_shndx == SHN_XINDEX) {
+			Elf32_Word xndx;
+
+			if (!symtab->syms_shndx) {
+				fprintf(stderr, "SHN_XINDEX found, but no symtab shndx section.\n");
+				return -1;
+			}
+
+			if (gelf_getsymshndx(symtab->syms, symtab->syms_shndx, core_id, &sym, &xndx))
+				sym.st_shndx = xndx;
+		}
+
 		if (collect_percpu_vars && collect_percpu_var(btfe, &sym))
 			return -1;
 		if (collect_function(btfe, &sym))
diff --git a/elf_symtab.c b/elf_symtab.c
index 741990ea3ed9..c1def0189652 100644
--- a/elf_symtab.c
+++ b/elf_symtab.c
@@ -17,11 +17,13 @@ 
 
 struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)
 {
+	size_t index;
+
 	if (name == NULL)
 		name = ".symtab";
 
 	GElf_Shdr shdr;
-	Elf_Scn *sec = elf_section_by_name(elf, ehdr, &shdr, name, NULL);
+	Elf_Scn *sec = elf_section_by_name(elf, ehdr, &shdr, name, &index);
 
 	if (sec == NULL)
 		return NULL;
@@ -29,6 +31,8 @@  struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)
 	if (gelf_getshdr(sec, &shdr) == NULL)
 		return NULL;
 
+	int xindex = elf_scnshndx(sec);
+
 	struct elf_symtab *symtab = malloc(sizeof(*symtab));
 	if (symtab == NULL)
 		return NULL;
@@ -49,6 +53,31 @@  struct elf_symtab *elf_symtab__new(const char *name, Elf *elf, GElf_Ehdr *ehdr)
 	if (symtab->symstrs == NULL)
 		goto out_free_name;
 
+	/*
+	 * The .symtab section has optional extended section index
+	 * table, load its data so it can be used to resolve symbol's
+	 * section index.
+	 **/
+	if (xindex > 0) {
+		GElf_Shdr shdr_shndx;
+		Elf_Scn *sec_shndx;
+
+		sec_shndx = elf_getscn(elf, xindex);
+		if (sec_shndx == NULL)
+			goto out_free_name;
+
+		if (gelf_getshdr(sec_shndx, &shdr_shndx) == NULL)
+			goto out_free_name;
+
+		/* Extra check to verify it belongs to the .symtab */
+		if (index != shdr_shndx.sh_link)
+			goto out_free_name;
+
+		symtab->syms_shndx = elf_getdata(elf_getscn(elf, xindex), NULL);
+		if (symtab->syms_shndx == NULL)
+			goto out_free_name;
+	}
+
 	symtab->nr_syms = shdr.sh_size / shdr.sh_entsize;
 
 	return symtab;
diff --git a/elf_symtab.h b/elf_symtab.h
index 359add69c8ab..f9277a48ed4c 100644
--- a/elf_symtab.h
+++ b/elf_symtab.h
@@ -16,6 +16,7 @@  struct elf_symtab {
 	uint32_t  nr_syms;
 	Elf_Data  *syms;
 	Elf_Data  *symstrs;
+	Elf_Data  *syms_shndx;
 	char	  *name;
 };