diff mbox

[RFC,5/7] perf tools: Support setting different slots in a BPF map separately

Message ID 1445078910-73699-6-git-send-email-wangnan0@huawei.com
State New
Headers show

Commit Message

Wang Nan Oct. 17, 2015, 10:48 a.m. UTC
This patch introduces basic facilities to support config different
slots in a BPF map one by one.

nr_indics and indics are introduced into 'struct parse_events_term',
where indics is an array of indics which will be configured by this
config term, nr_indics is the size of the array. The array is passed
to 'struct bpf_map_priv'. To indicate the new type of configuration,
BPF_MAP_PRIV_KEY_INDICS is added as a new key type.
bpf_map_config_foreach_key() is extended to iterate over those indics
instead of all possible keys.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 tools/perf/util/bpf-loader.c   | 68 +++++++++++++++++++++++++++++++++++++++++-
 tools/perf/util/parse-events.c |  4 ++-
 tools/perf/util/parse-events.h |  2 ++
 3 files changed, 72 insertions(+), 2 deletions(-)

Comments

Wang Nan Nov. 20, 2015, 1:25 p.m. UTC | #1
On 2015/10/17 18:48, Wang Nan wrote:
> This patch introduces basic facilities to support config different

> slots in a BPF map one by one.

>

> nr_indics and indics are introduced into 'struct parse_events_term',

> where indics is an array of indics which will be configured by this

> config term, nr_indics is the size of the array. The array is passed

> to 'struct bpf_map_priv'. To indicate the new type of configuration,

> BPF_MAP_PRIV_KEY_INDICS is added as a new key type.

> bpf_map_config_foreach_key() is extended to iterate over those indics

> instead of all possible keys.

>

> Signed-off-by: Wang Nan <wangnan0@huawei.com>

> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>

> Cc: Alexei Starovoitov <ast@plumgrid.com>

> Cc: Brendan Gregg <brendan.d.gregg@gmail.com>

> Cc: Daniel Borkmann <daniel@iogearbox.net>

> Cc: David Ahern <dsahern@gmail.com>

> Cc: He Kuang <hekuang@huawei.com>

> Cc: Jiri Olsa <jolsa@kernel.org>

> Cc: Kaixu Xia <xiakaixu@huawei.com>

> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>

> Cc: Namhyung Kim <namhyung@kernel.org>

> Cc: Paul Mackerras <paulus@samba.org>

> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>

> Cc: Zefan Li <lizefan@huawei.com>

> Cc: pi3orama@163.com

> ---

>   tools/perf/util/bpf-loader.c   | 68 +++++++++++++++++++++++++++++++++++++++++-

>   tools/perf/util/parse-events.c |  4 ++-

>   tools/perf/util/parse-events.h |  2 ++

>   3 files changed, 72 insertions(+), 2 deletions(-)

>

> diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c

> index 15cf27a..023fc12 100644

> --- a/tools/perf/util/bpf-loader.c

> +++ b/tools/perf/util/bpf-loader.c

> @@ -638,6 +638,7 @@ int bpf__foreach_tev(struct bpf_object *obj,

>   

>   enum bpf_map_priv_key_type {

>   	BPF_MAP_PRIV_KEY_ALL,

> +	BPF_MAP_PRIV_KEY_INDICS,

>   };

>   

>   enum bpf_map_priv_value_type {

> @@ -647,6 +648,12 @@ enum bpf_map_priv_value_type {

>   struct bpf_map_priv {

>   	struct {

>   		enum bpf_map_priv_key_type type;

> +		union {

> +			struct {

> +				size_t nr_indics;

> +				u64 *indics;

> +			} indics;

> +		};

>   	} key;

>   

>   	struct {

> @@ -663,6 +670,8 @@ bpf_map_priv__clear(struct bpf_map *map __maybe_unused,

>   {

>   	struct bpf_map_priv *priv = _priv;

>   

> +	if (priv->key.type == BPF_MAP_PRIV_KEY_INDICS)

> +		zfree(&priv->key.indics.indics);

>   	free(priv);

>   }

>   

> @@ -718,6 +727,20 @@ bpf_map_config_foreach_key(struct bpf_map *map,

>   				}

>   			}

>   			return 0;

> +		case BPF_MAP_PRIV_KEY_INDICS:

> +			for (i = 0; i < priv->key.indics.nr_indics; i++) {

> +				u64 _idx = priv->key.indics.indics[i];

> +				unsigned int idx = (unsigned int)(_idx);

> +

> +				err = (*func)(name, map_fd, &def,

> +					      priv, &idx, arg);

> +				if (err) {

> +					pr_debug("ERROR: failed to insert value to %s[%u]\n",

> +						 name, idx);

> +					return err;

> +				}

> +			}


This for-loop has a potential problem that, if perf's user want to
set a very big array using indices, for example:

  # perf record -e 
mybpf.c/maps:mymap:values[1,2,3,10-100000,200000-400000]=3/ 
mybpf.c/maps:mymap:values[100000-200000]=3/ ...

Perf would alloc nearly 300000 slots for indices array, consume too much 
memory.

I will fix this problem by reinterprete indices array, makes negative
value represent range start and use next slot to store range size. For
example, the above perf cmdline can be converted to:

{1,2,3,-10, 99991,-200000,200001} and {-100000,100001}.

Thank you.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Wang Nan Nov. 23, 2015, 2:01 a.m. UTC | #2
On 2015/11/20 23:34, Arnaldo Carvalho de Melo wrote:
> Em Fri, Nov 20, 2015 at 09:25:36PM +0800, Wangnan (F) escreveu:

>>> +		case BPF_MAP_PRIV_KEY_INDICS:

>>> +			for (i = 0; i < priv->key.indics.nr_indics; i++) {

>>> +				u64 _idx = priv->key.indics.indics[i];

>>> +				unsigned int idx = (unsigned int)(_idx);

>>> +

>>> +				err = (*func)(name, map_fd, &def,

>>> +					      priv, &idx, arg);

>>> +				if (err) {

>>> +					pr_debug("ERROR: failed to insert value to %s[%u]\n",

>>> +						 name, idx);

>>> +					return err;

>>> +				}

>>> +			}

>> This for-loop has a potential problem that, if perf's user want to

>> set a very big array using indices, for example:

>>

>>   # perf record -e

>> mybpf.c/maps:mymap:values[1,2,3,10-100000,200000-400000]=3/

>> mybpf.c/maps:mymap:values[100000-200000]=3/ ...

>>

>> Perf would alloc nearly 300000 slots for indices array, consume too much

>> memory.

>>

>> I will fix this problem by reinterprete indices array, makes negative

>> value represent range start and use next slot to store range size. For

>> example, the above perf cmdline can be converted to:

>>

>> {1,2,3,-10, 99991,-200000,200001} and {-100000,100001}.

> Why is that changing the way you specify what entries should be set to

> a value will make it not allocate too much memory?


It is actually a problem in the next patch, in which it expand all range
into a series of indices. If user wants 1-10000, it creates an array as
[1,2,3,4,...10000], so user is possible to use a simple cmdline to consume
all of available memory.

However, the method I described above is not the best way to solve this 
probelm.
I thought yesterday that we should not insist on indices array. We can
make parser always return ranges. For example, [1,2,3-5] can be represent
using [(1,1), (2,1), (3,3)], so we don't need the above ugly negative
indicators.

> I found the first form of representing ( start-end ) to be better than (

> -start, size ), but I would use what the C language uses for expressing

> ranges in switch case ranges, which is familiar and doesn't reuses the

> minus arithmetic operator to express a range, i.e.:

>

>   # perf record -e \

>     mybpf.c/maps:mymap:values[1,2,3,10..100000,200000..400000]=3/

>

>   # perf record -e \

>     mybpf.c/maps:mymap:values[100000..200000]=3/ ...


'..' is better.

Thank you.

> - Arnaldo



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Wang Nan Nov. 23, 2015, 5:45 a.m. UTC | #3
On 2015/11/23 10:01, Wangnan (F) wrote:
>

>

> On 2015/11/20 23:34, Arnaldo Carvalho de Melo wrote:

>> Em Fri, Nov 20, 2015 at 09:25:36PM +0800, Wangnan (F) escreveu:

>>>> +        case BPF_MAP_PRIV_KEY_INDICS:

>>>> +            for (i = 0; i < priv->key.indics.nr_indics; i++) {

>>>> +                u64 _idx = priv->key.indics.indics[i];

>>>> +                unsigned int idx = (unsigned int)(_idx);

>>>> +

>>>> +                err = (*func)(name, map_fd, &def,

>>>> +                          priv, &idx, arg);

>>>> +                if (err) {

>>>> +                    pr_debug("ERROR: failed to insert value to 

>>>> %s[%u]\n",

>>>> +                         name, idx);

>>>> +                    return err;

>>>> +                }

>>>> +            }

>>> This for-loop has a potential problem that, if perf's user want to

>>> set a very big array using indices, for example:

>>>

>>>   # perf record -e

>>> mybpf.c/maps:mymap:values[1,2,3,10-100000,200000-400000]=3/

>>> mybpf.c/maps:mymap:values[100000-200000]=3/ ...

>>>

>>> Perf would alloc nearly 300000 slots for indices array, consume too 

>>> much

>>> memory.

>>>

>>> I will fix this problem by reinterprete indices array, makes negative

>>> value represent range start and use next slot to store range size. For

>>> example, the above perf cmdline can be converted to:

>>>

>>> {1,2,3,-10, 99991,-200000,200001} and {-100000,100001}.

>> Why is that changing the way you specify what entries should be set to

>> a value will make it not allocate too much memory?

>

> It is actually a problem in the next patch, in which it expand all range

> into a series of indices. If user wants 1-10000, it creates an array as

> [1,2,3,4,...10000], so user is possible to use a simple cmdline to 

> consume

> all of available memory.

>

> However, the method I described above is not the best way to solve 

> this probelm.

> I thought yesterday that we should not insist on indices array. We can

> make parser always return ranges. For example, [1,2,3-5] can be represent

> using [(1,1), (2,1), (3,3)], so we don't need the above ugly negative

> indicators.

>

>> I found the first form of representing ( start-end ) to be better than (

>> -start, size ), but I would use what the C language uses for expressing

>> ranges in switch case ranges, which is familiar and doesn't reuses the

>> minus arithmetic operator to express a range, i.e.:

>>

>>   # perf record -e \

>> mybpf.c/maps:mymap:values[1,2,3,10..100000,200000..400000]=3/

>>

>>   # perf record -e \

>>     mybpf.c/maps:mymap:values[100000..200000]=3/ ...

>

> '..' is better.

>


One problem: the case range syntax is introduced by gcc extension, not a
part of standard, and should be '...'.

Please see: https://gcc.gnu.org/onlinedocs/gcc/Case-Ranges.html

So I'll use '...' also.

Thank you.



> Thank you.

>

>> - Arnaldo

>

>

> -- 

> To unsubscribe from this list: send the line "unsubscribe 

> linux-kernel" in

> the body of a message to majordomo@vger.kernel.org

> More majordomo info at  http://vger.kernel.org/majordomo-info.html

> Please read the FAQ at  http://www.tux.org/lkml/



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
diff mbox

Patch

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 15cf27a..023fc12 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -638,6 +638,7 @@  int bpf__foreach_tev(struct bpf_object *obj,
 
 enum bpf_map_priv_key_type {
 	BPF_MAP_PRIV_KEY_ALL,
+	BPF_MAP_PRIV_KEY_INDICS,
 };
 
 enum bpf_map_priv_value_type {
@@ -647,6 +648,12 @@  enum bpf_map_priv_value_type {
 struct bpf_map_priv {
 	struct {
 		enum bpf_map_priv_key_type type;
+		union {
+			struct {
+				size_t nr_indics;
+				u64 *indics;
+			} indics;
+		};
 	} key;
 
 	struct {
@@ -663,6 +670,8 @@  bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
 {
 	struct bpf_map_priv *priv = _priv;
 
+	if (priv->key.type == BPF_MAP_PRIV_KEY_INDICS)
+		zfree(&priv->key.indics.indics);
 	free(priv);
 }
 
@@ -718,6 +727,20 @@  bpf_map_config_foreach_key(struct bpf_map *map,
 				}
 			}
 			return 0;
+		case BPF_MAP_PRIV_KEY_INDICS:
+			for (i = 0; i < priv->key.indics.nr_indics; i++) {
+				u64 _idx = priv->key.indics.indics[i];
+				unsigned int idx = (unsigned int)(_idx);
+
+				err = (*func)(name, map_fd, &def,
+					      priv, &idx, arg);
+				if (err) {
+					pr_debug("ERROR: failed to insert value to %s[%u]\n",
+						 name, idx);
+					return err;
+				}
+			}
+			return 0;
 		default:
 			pr_debug("ERROR: keytype for map '%s' invalid\n", name);
 			return -EINVAL;
@@ -728,6 +751,28 @@  bpf_map_config_foreach_key(struct bpf_map *map,
 	}
 }
 
+static int
+bpf_map_priv_setkey(struct bpf_map_priv *priv,
+		    struct parse_events_term *term,
+		    const char *map_name)
+{
+	if (!term->nr_indics)
+		priv->key.type = BPF_MAP_PRIV_KEY_ALL;
+	else {
+		size_t memsz = term->nr_indics * sizeof(term->indics[0]);
+
+		priv->key.indics.indics = malloc(memsz);
+		if (!priv->key.indics.indics) {
+			pr_debug("No enough memory to alloc indics for %s\n",
+				 map_name);
+			return -ENOMEM;
+		}
+		memcpy(priv->key.indics.indics, term->indics, memsz);
+		priv->key.type = BPF_MAP_PRIV_KEY_INDICS;
+		priv->key.indics.nr_indics = term->nr_indics;
+	}
+	return 0;
+}
 
 static int
 bpf__config_obj_map_array_value(struct bpf_map *map,
@@ -773,7 +818,9 @@  bpf__config_obj_map_array_value(struct bpf_map *map,
 		return -ENOMEM;
 	}
 
-	priv->key.type = BPF_MAP_PRIV_KEY_ALL;
+	err = bpf_map_priv_setkey(priv, term, map_name);
+	if (err)
+		return err;
 	priv->value.type = BPF_MAP_PRIV_VAL_VALUE;
 	priv->value.val = term->val.num;
 	return bpf_map__set_private(map, priv, bpf_map_priv__clear);
@@ -834,6 +881,24 @@  bpf__config_obj_map(struct bpf_object *obj,
 		goto out;
 	}
 
+	if (term->nr_indics) {
+		struct bpf_map_def def;
+
+		err = bpf_map__get_def(map, &def);
+		if (err) {
+			pr_debug("ERROR: Unable to get map definition from '%s'\n",
+				 map_name);
+			goto out;
+		}
+		for (i = 0; i < term->nr_indics; i++)
+			if (term->indics[i] >= def.max_entries) {
+				pr_debug("ERROR: index %d too large\n",
+					 (int)term->indics[i]);
+				err = -E2BIG;
+				goto out;
+			}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(bpf_config_map_funcs); i++) {
 		struct bpf_config_map_func *func = &bpf_config_map_funcs[i];
 
@@ -1004,6 +1069,7 @@  int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
 {
 	bpf__strerror_head(err, buf, size);
 	bpf__strerror_entry(ENODEV, "Invalid config option: '%s'", term->config)
+	bpf__strerror_entry(E2BIG,  "Index in '%s' too big", term->config)
 	bpf__strerror_entry(ENOENT, "Config target in '%s' is invalid", term->config)
 	bpf__strerror_entry(EBADF,  "Map type mismatch in '%s'", term->config)
 	bpf__strerror_entry(EINVAL, "Invalid config value")
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9f081a1..42ac1cb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2123,8 +2123,10 @@  void parse_events__free_terms(struct list_head *terms)
 {
 	struct parse_events_term *term, *h;
 
-	list_for_each_entry_safe(term, h, terms, list)
+	list_for_each_entry_safe(term, h, terms, list) {
+		free(term->indics);
 		free(term);
+	}
 }
 
 void parse_events_evlist_error(struct parse_events_evlist *data,
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d4aa88e..5ba1d3e 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,8 @@  enum {
 
 struct parse_events_term {
 	char *config;
+	size_t nr_indics;
+	u64 *indics;
 	union {
 		char *str;
 		u64  num;