diff mbox series

libbpf hashmap: Fix undefined behavior in hash_bits

Message ID 20201029160938.154084-1-irogers@google.com
State New
Headers show
Series libbpf hashmap: Fix undefined behavior in hash_bits | expand

Commit Message

Ian Rogers Oct. 29, 2020, 4:09 p.m. UTC
If bits is 0, the case when the map is empty, then the >> is the size of
the register which is undefined behavior - on x86 it is the same as a
shift by 0. Fix by handling the 0 case explicitly when running with
address sanitizer.

A variant of this patch was posted previously as:
https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/bpf/hashmap.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Song Liu Oct. 29, 2020, 5:45 p.m. UTC | #1
> On Oct 29, 2020, at 9:09 AM, Ian Rogers <irogers@google.com> wrote:

> 

> If bits is 0, the case when the map is empty, then the >> is the size of

> the register which is undefined behavior - on x86 it is the same as a

> shift by 0. Fix by handling the 0 case explicitly when running with

> address sanitizer.

> 

> A variant of this patch was posted previously as:

> https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/

> 

> Signed-off-by: Ian Rogers <irogers@google.com>

> ---

> tools/lib/bpf/hashmap.h | 14 ++++++++++++++

> 1 file changed, 14 insertions(+)

> 

> diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h

> index d9b385fe808c..27d0556527d3 100644

> --- a/tools/lib/bpf/hashmap.h

> +++ b/tools/lib/bpf/hashmap.h

> @@ -12,9 +12,23 @@

> #include <stddef.h>

> #include <limits.h>

> 

> +#ifdef __has_feature

> +#define HAVE_FEATURE(f) __has_feature(f)

> +#else

> +#define HAVE_FEATURE(f) 0

> +#endif

> +

> static inline size_t hash_bits(size_t h, int bits)

> {

> 	/* shuffle bits and return requested number of upper bits */

> +#if defined(ADDRESS_SANITIZER) || HAVE_FEATURE(address_sanitizer)


I am not very familiar with these features. Is address sanitizer same
as undefined behavior sanitizer (mentioned in previous version)?

> +	/*

> +	 * If the requested bits == 0 avoid undefined behavior from a

> +	 * greater-than bit width shift right (aka invalid-shift-exponent).

> +	 */

> +	if (bits == 0)

> +		return -1;


Shall we return 0 or -1 (0xffffffff) here? 

Also, we have HASHMAP_MIN_CAP_BITS == 2. Shall we just make sure we
never feed bits == 0 into hash_bits()?

Thanks,
Song


> +#endif

> #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)

> 	/* LP64 case */

> 	return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);

> -- 

> 2.29.1.341.ge80a0c044ae-goog

>
Ian Rogers Oct. 29, 2020, 7:37 p.m. UTC | #2
On Thu, Oct 29, 2020 at 10:45 AM Song Liu <songliubraving@fb.com> wrote:
>
> > On Oct 29, 2020, at 9:09 AM, Ian Rogers <irogers@google.com> wrote:
> >
> > If bits is 0, the case when the map is empty, then the >> is the size of
> > the register which is undefined behavior - on x86 it is the same as a
> > shift by 0. Fix by handling the 0 case explicitly when running with
> > address sanitizer.
> >
> > A variant of this patch was posted previously as:
> > https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/lib/bpf/hashmap.h | 14 ++++++++++++++
> > 1 file changed, 14 insertions(+)
> >
> > diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
> > index d9b385fe808c..27d0556527d3 100644
> > --- a/tools/lib/bpf/hashmap.h
> > +++ b/tools/lib/bpf/hashmap.h
> > @@ -12,9 +12,23 @@
> > #include <stddef.h>
> > #include <limits.h>
> >
> > +#ifdef __has_feature
> > +#define HAVE_FEATURE(f) __has_feature(f)
> > +#else
> > +#define HAVE_FEATURE(f) 0
> > +#endif
> > +
> > static inline size_t hash_bits(size_t h, int bits)
> > {
> >       /* shuffle bits and return requested number of upper bits */
> > +#if defined(ADDRESS_SANITIZER) || HAVE_FEATURE(address_sanitizer)
>
> I am not very familiar with these features. Is address sanitizer same
> as undefined behavior sanitizer (mentioned in previous version)?

My preference would be to special case bits == 0 without the feature
guards as per the original change, this is the most correct. There is
some feature support for detecting ubsan:
https://github.com/google/sanitizers/issues/765
In my case I see this with address sanitizer and older versions of
clang don't expose ubsan as a feature.

> > +     /*
> > +      * If the requested bits == 0 avoid undefined behavior from a
> > +      * greater-than bit width shift right (aka invalid-shift-exponent).
> > +      */
> > +     if (bits == 0)
> > +             return -1;
>
> Shall we return 0 or -1 (0xffffffff) here?

The value isn't used and so doesn't matter. -1 seemed less likely to
silently succeed.

> Also, we have HASHMAP_MIN_CAP_BITS == 2. Shall we just make sure we
> never feed bits == 0 into hash_bits()?

I think that'd be a different change. I'd be happy to see it.

Thanks,
Ian

> Thanks,
> Song
>
>
> > +#endif
> > #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
> >       /* LP64 case */
> >       return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
> > --
> > 2.29.1.341.ge80a0c044ae-goog
> >
>
Andrii Nakryiko Oct. 29, 2020, 8:16 p.m. UTC | #3
On Thu, Oct 29, 2020 at 9:11 AM Ian Rogers <irogers@google.com> wrote:
>

> If bits is 0, the case when the map is empty, then the >> is the size of

> the register which is undefined behavior - on x86 it is the same as a

> shift by 0. Fix by handling the 0 case explicitly when running with

> address sanitizer.

>

> A variant of this patch was posted previously as:

> https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/

>

> Signed-off-by: Ian Rogers <irogers@google.com>

> ---

>  tools/lib/bpf/hashmap.h | 14 ++++++++++++++

>  1 file changed, 14 insertions(+)

>

> diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h

> index d9b385fe808c..27d0556527d3 100644

> --- a/tools/lib/bpf/hashmap.h

> +++ b/tools/lib/bpf/hashmap.h

> @@ -12,9 +12,23 @@

>  #include <stddef.h>

>  #include <limits.h>

>

> +#ifdef __has_feature

> +#define HAVE_FEATURE(f) __has_feature(f)

> +#else

> +#define HAVE_FEATURE(f) 0

> +#endif

> +

>  static inline size_t hash_bits(size_t h, int bits)

>  {

>         /* shuffle bits and return requested number of upper bits */

> +#if defined(ADDRESS_SANITIZER) || HAVE_FEATURE(address_sanitizer)

> +       /*

> +        * If the requested bits == 0 avoid undefined behavior from a

> +        * greater-than bit width shift right (aka invalid-shift-exponent).

> +        */

> +       if (bits == 0)

> +               return -1;

> +#endif


Oh, just too much # magic here :(... If we want to prevent hash_bits()
from being called with bits == 0 (despite the result never used),
let's just adjust hashmap__for_each_key_entry and
hashmap__for_each_key_entry_safe macros:

diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index d9b385fe808c..488e0ef236cb 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -174,9 +174,9 @@ bool hashmap__find(const struct hashmap *map,
const void *key, void **value);
  * @key: key to iterate entries for
  */
 #define hashmap__for_each_key_entry(map, cur, _key)                        \
-       for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-                                            map->cap_bits);                \
-                    map->buckets ? map->buckets[bkt] : NULL; });           \
+       for (cur = map->buckets                                             \
+                  ? map->buckets[hash_bits(map->hash_fn((_key),
map->ctx), map->cap_bits)] \
+                  : NULL;                                                  \
             cur;                                                           \
             cur = cur->next)                                               \
                if (map->equal_fn(cur->key, (_key), map->ctx))

Either way it's a bit ugly and long, but at least we don't have extra
#-driven ugliness.


>  #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)

>         /* LP64 case */

>         return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);

> --

> 2.29.1.341.ge80a0c044ae-goog

>
Ian Rogers Oct. 29, 2020, 8:58 p.m. UTC | #4
On Thu, Oct 29, 2020 at 1:16 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Oct 29, 2020 at 9:11 AM Ian Rogers <irogers@google.com> wrote:
> >
> > If bits is 0, the case when the map is empty, then the >> is the size of
> > the register which is undefined behavior - on x86 it is the same as a
> > shift by 0. Fix by handling the 0 case explicitly when running with
> > address sanitizer.
> >
> > A variant of this patch was posted previously as:
> > https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@google.com/
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/lib/bpf/hashmap.h | 14 ++++++++++++++
> >  1 file changed, 14 insertions(+)
> >
> > diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
> > index d9b385fe808c..27d0556527d3 100644
> > --- a/tools/lib/bpf/hashmap.h
> > +++ b/tools/lib/bpf/hashmap.h
> > @@ -12,9 +12,23 @@
> >  #include <stddef.h>
> >  #include <limits.h>
> >
> > +#ifdef __has_feature
> > +#define HAVE_FEATURE(f) __has_feature(f)
> > +#else
> > +#define HAVE_FEATURE(f) 0
> > +#endif
> > +
> >  static inline size_t hash_bits(size_t h, int bits)
> >  {
> >         /* shuffle bits and return requested number of upper bits */
> > +#if defined(ADDRESS_SANITIZER) || HAVE_FEATURE(address_sanitizer)
> > +       /*
> > +        * If the requested bits == 0 avoid undefined behavior from a
> > +        * greater-than bit width shift right (aka invalid-shift-exponent).
> > +        */
> > +       if (bits == 0)
> > +               return -1;
> > +#endif
>
> Oh, just too much # magic here :(... If we want to prevent hash_bits()
> from being called with bits == 0 (despite the result never used),
> let's just adjust hashmap__for_each_key_entry and
> hashmap__for_each_key_entry_safe macros:
>
> diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
> index d9b385fe808c..488e0ef236cb 100644
> --- a/tools/lib/bpf/hashmap.h
> +++ b/tools/lib/bpf/hashmap.h
> @@ -174,9 +174,9 @@ bool hashmap__find(const struct hashmap *map,
> const void *key, void **value);
>   * @key: key to iterate entries for
>   */
>  #define hashmap__for_each_key_entry(map, cur, _key)                        \
> -       for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
> -                                            map->cap_bits);                \
> -                    map->buckets ? map->buckets[bkt] : NULL; });           \
> +       for (cur = map->buckets                                             \
> +                  ? map->buckets[hash_bits(map->hash_fn((_key),
> map->ctx), map->cap_bits)] \
> +                  : NULL;                                                  \
>              cur;                                                           \
>              cur = cur->next)                                               \
>                 if (map->equal_fn(cur->key, (_key), map->ctx))
>
> Either way it's a bit ugly and long, but at least we don't have extra
> #-driven ugliness.


This can work with the following changes in hashmap.c. I'll resend
this as a whole patch.

Thanks,
Ian

--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -156,7 +156,7 @@ int hashmap__insert(struct hashmap *map,
                    const void **old_key, void **old_value)
 {
        struct hashmap_entry *entry;
-       size_t h;
+       size_t h = 0;
        int err;

        if (old_key)
@@ -164,7 +164,9 @@ int hashmap__insert(struct hashmap *map,
        if (old_value)
                *old_value = NULL;

-       h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+        if (map->buckets)
+          h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+
        if (strategy != HASHMAP_APPEND &&
            hashmap_find_entry(map, key, h, NULL, &entry)) {
                if (old_key)
@@ -208,6 +210,9 @@ bool hashmap__find(const struct hashmap
        struct hashmap_entry *entry;
        size_t h;

+        if (!map->buckets)
+          return false;
+
        h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
        if (!hashmap_find_entry(map, key, h, NULL, &entry))
                return false;
@@ -223,6 +228,9 @@ bool hashmap__delete(struct hashmap *map
        struct hashmap_entry **pprev, *entry;
        size_t h;

+        if (!map->buckets)
+          return false;
+
        h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
        if (!hashmap_find_entry(map, key, h, &pprev, &entry))
                return false;


> >  #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
> >         /* LP64 case */
> >         return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
> > --
> > 2.29.1.341.ge80a0c044ae-goog
> >
diff mbox series

Patch

diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index d9b385fe808c..27d0556527d3 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -12,9 +12,23 @@ 
 #include <stddef.h>
 #include <limits.h>
 
+#ifdef __has_feature
+#define HAVE_FEATURE(f) __has_feature(f)
+#else
+#define HAVE_FEATURE(f) 0
+#endif
+
 static inline size_t hash_bits(size_t h, int bits)
 {
 	/* shuffle bits and return requested number of upper bits */
+#if defined(ADDRESS_SANITIZER) || HAVE_FEATURE(address_sanitizer)
+	/*
+	 * If the requested bits == 0 avoid undefined behavior from a
+	 * greater-than bit width shift right (aka invalid-shift-exponent).
+	 */
+	if (bits == 0)
+		return -1;
+#endif
 #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
 	/* LP64 case */
 	return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);