diff mbox series

[RFC,net-next,9/9] genetlink: allow dumping command-specific policy

Message ID 20201001000518.685243-10-kuba@kernel.org
State New
Headers show
Series genetlink: support per-command policy dump | expand

Commit Message

Jakub Kicinski Oct. 1, 2020, 12:05 a.m. UTC
Right now CTRL_CMD_GETPOLICY can only dump the family-wide
policy. Support dumping policy of a specific op.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/genetlink.h |  1 +
 net/netlink/genetlink.c        | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

Comments

Johannes Berg Oct. 1, 2020, 7:59 a.m. UTC | #1
On Wed, 2020-09-30 at 17:05 -0700, Jakub Kicinski wrote:
> Right now CTRL_CMD_GETPOLICY can only dump the family-wide
> policy. Support dumping policy of a specific op.

So, hmm.

Yeah, I guess this is fine, but you could end up having to do a lot of
dumps, and with e.g. ethtool you'd end up with a lot of duplicate data
too, since it's structured as


common_policy = { ... }

cmd1_policy = {
	[CMD1_COMMON] = NLA_NESTED_POLICY(common_policy),
	...
};

cmd2_policy = {
	[CMD2_COMMON] = NLA_NESTED_POLICY(common_policy),
	...
};

etc.


So you end up dumping per command (and in practice, since they can be
different, you now *have to* unless you know out-of-band that there are
no per-command policies).


Even if I don't have a good idea now on how to avoid the duplication, it
might be nicer to have a (flag) attribute here for "CTRL_ATTR_ALL_OPS"?

johannes

> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  include/uapi/linux/genetlink.h |  1 +
>  net/netlink/genetlink.c        | 23 +++++++++++++++++++++--
>  2 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
> index 9c0636ec2286..7dbe2d5d7d46 100644
> --- a/include/uapi/linux/genetlink.h
> +++ b/include/uapi/linux/genetlink.h
> @@ -64,6 +64,7 @@ enum {
>  	CTRL_ATTR_OPS,
>  	CTRL_ATTR_MCAST_GROUPS,
>  	CTRL_ATTR_POLICY,
> +	CTRL_ATTR_OP,
>  	__CTRL_ATTR_MAX,
>  };
>  
> diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
> index f2833e9165c7..12e9f323af35 100644
> --- a/net/netlink/genetlink.c
> +++ b/net/netlink/genetlink.c
> @@ -1113,12 +1113,14 @@ static int genl_ctrl_event(int event, const struct genl_family *family,
>  struct ctrl_dump_policy_ctx {
>  	unsigned long state;
>  	unsigned int fam_id;
> +	u8 cmd;
>  };
>  
>  static const struct nla_policy ctrl_policy_policy[] = {
>  	[CTRL_ATTR_FAMILY_ID]	= { .type = NLA_U16 },
>  	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_NUL_STRING,
>  				    .len = GENL_NAMSIZ - 1 },
> +	[CTRL_ATTR_OP]		= { .type = NLA_U8 },
>  };
>  
>  static int ctrl_dumppolicy_start(struct netlink_callback *cb)
> @@ -1127,6 +1129,8 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
>  	struct ctrl_dump_policy_ctx *ctx = (void *)cb->args;
>  	struct nlattr **tb = info->attrs;
>  	const struct genl_family *rt;
> +	struct genl_ops op;
> +	int err;
>  
>  	if (!tb[CTRL_ATTR_FAMILY_ID] && !tb[CTRL_ATTR_FAMILY_NAME])
>  		return -EINVAL;
> @@ -1145,10 +1149,23 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
>  	if (!rt)
>  		return -ENOENT;
>  
> -	if (!rt->policy)
> +	if (tb[CTRL_ATTR_OP]) {
> +		ctx->cmd = nla_get_u8(tb[CTRL_ATTR_OP]);
> +
> +		err = genl_get_cmd(ctx->cmd, rt, &op);
> +		if (err) {
> +			NL_SET_BAD_ATTR(cb->extack, tb[CTRL_ATTR_OP]);
> +			return err;
> +		}
> +	} else {
> +		op.policy = rt->policy;
> +		op.maxattr = rt->maxattr;
> +	}
> +
> +	if (!op.policy)
>  		return -ENODATA;
>  
> -	return netlink_policy_dump_start(rt->policy, rt->maxattr, &ctx->state);
> +	return netlink_policy_dump_start(op.policy, op.maxattr, &ctx->state);
>  }
>  
>  static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
> @@ -1167,6 +1184,8 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
>  
>  		if (nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, ctx->fam_id))
>  			goto nla_put_failure;
> +		if (ctx->cmd && nla_put_u8(skb, CTRL_ATTR_OP, ctx->cmd))
> +			goto nla_put_failure;
>  
>  		nest = nla_nest_start(skb, CTRL_ATTR_POLICY);
>  		if (!nest)
Jakub Kicinski Oct. 1, 2020, 3:41 p.m. UTC | #2
On Thu, 01 Oct 2020 09:59:47 +0200 Johannes Berg wrote:
> On Wed, 2020-09-30 at 17:05 -0700, Jakub Kicinski wrote:
> > Right now CTRL_CMD_GETPOLICY can only dump the family-wide
> > policy. Support dumping policy of a specific op.  
> 
> So, hmm.
> 
> Yeah, I guess this is fine, but you could end up having to do a lot of
> dumps, and with e.g. ethtool you'd end up with a lot of duplicate data
> too, since it's structured as
> 
> 
> common_policy = { ... }
> 
> cmd1_policy = {
> 	[CMD1_COMMON] = NLA_NESTED_POLICY(common_policy),
> 	...
> };
> 
> cmd2_policy = {
> 	[CMD2_COMMON] = NLA_NESTED_POLICY(common_policy),
> 	...
> };
> 
> etc.
> 
> 
> So you end up dumping per command (and in practice, since they can be
> different, you now *have to* unless you know out-of-band that there are
> no per-command policies).
> 
> 
> Even if I don't have a good idea now on how to avoid the duplication, it
> might be nicer to have a (flag) attribute here for "CTRL_ATTR_ALL_OPS"?

Hm. How would you see the dump structured? We need to annotate the root
policies with the command. Right now I have:

 [ATTR_FAMILY_ID]
 [ATTR_OP]
 [ATTR_POLICY]
   [policy idx]
     [attr idx]
       [bla]
       [bla]
       [bla]

But if we're dumping _all_ the policy to op mapping is actually 1:n,
so we'd need to restructure the dump a lil' bit and have OP only
reported on root of the policy and make it a nested array.

Alternatively we could report OP -> policy mapping in a separate
message?

WDYT?
Johannes Berg Oct. 1, 2020, 4 p.m. UTC | #3
On Thu, 2020-10-01 at 08:41 -0700, Jakub Kicinski wrote:

> > Even if I don't have a good idea now on how to avoid the duplication, it
> > might be nicer to have a (flag) attribute here for "CTRL_ATTR_ALL_OPS"?
> 
> Hm. How would you see the dump structured? 

Yeah, that's the tricky part ... Hence why I said "I don't have a good
idea now" :)

> We need to annotate the root
> policies with the command. Right now I have:
> 
>  [ATTR_FAMILY_ID]
>  [ATTR_OP]
>  [ATTR_POLICY]
>    [policy idx]
>      [attr idx]
>        [bla]
>        [bla]
>        [bla]
> 
> But if we're dumping _all_ the policy to op mapping is actually 1:n,
> so we'd need to restructure the dump a lil' bit and have OP only
> reported on root of the policy and make it a nested array.

So today you see something like

[ATTR_FAMILY_ID]
[ATTR_POLICY]
  [policy idx, 0 = main policy]
    [bla]
    ...
  ...


I guess the most compact representation, that also preserves the most
data about sharing, would be to do something like

[ATTR_FAMILY_ID]
[ATTR_POLICY]
  [policy idx, 0 = main policy]
    [bla]
    ...
  ...
[ATTR_OP_POLICY]
  [op] = [policy idx]
  ...

This preserves all the information because it tells you which policies
actually are identical (shared), each per-op policy can have nested
policies referring to common ones, like in the ethtool case, etc.


OTOH, it's a lot trickier to implement - I haven't really come up with a
good way of doing it "generically" with the net/netlink/policy.c code.
I'm sure it can be solved, but I haven't really given it enough thought.
Perhaps by passing a "policy iterator" to netlink_policy_dump_start(),
instead of just a single policy (i.e. a function & a data ptr or so),
and then it can walk all the policies using that, assign the idxes etc.,
and dump them out in netlink_policy_dump_write()?

But then we'd still have to get the policy idx for a given policy, and
not clean up all the state when netlink_policy_dump_loop() returns
false, because you still need it for ATTR_OP_POLICY to find the idx from
the pointer?

I guess it's doable. Just seems a bit more complex. OTOH, it may be that
such complexity also completely makes sense for non-generic netlink
families anyway, I haven't looked at them much at all.

johannes
Jakub Kicinski Oct. 1, 2020, 4:24 p.m. UTC | #4
On Thu, 01 Oct 2020 18:00:58 +0200 Johannes Berg wrote:
> On Thu, 2020-10-01 at 08:41 -0700, Jakub Kicinski wrote:
> 
> > > Even if I don't have a good idea now on how to avoid the duplication, it
> > > might be nicer to have a (flag) attribute here for "CTRL_ATTR_ALL_OPS"?  
> > 
> > Hm. How would you see the dump structured?   
> 
> Yeah, that's the tricky part ... Hence why I said "I don't have a good
> idea now" :)

You say that, yet your idea below seems pretty good :P

> > We need to annotate the root
> > policies with the command. Right now I have:
> > 
> >  [ATTR_FAMILY_ID]
> >  [ATTR_OP]
> >  [ATTR_POLICY]
> >    [policy idx]
> >      [attr idx]
> >        [bla]
> >        [bla]
> >        [bla]
> > 
> > But if we're dumping _all_ the policy to op mapping is actually 1:n,
> > so we'd need to restructure the dump a lil' bit and have OP only
> > reported on root of the policy and make it a nested array.  
> 
> So today you see something like
> 
> [ATTR_FAMILY_ID]
> [ATTR_POLICY]
>   [policy idx, 0 = main policy]
>     [bla]
>     ...
>   ...
> 
> 
> I guess the most compact representation, that also preserves the most
> data about sharing, would be to do something like
> 
> [ATTR_FAMILY_ID]
> [ATTR_POLICY]
>   [policy idx, 0 = main policy]
>     [bla]
>     ...
>   ...
> [ATTR_OP_POLICY]
>   [op] = [policy idx]
>   ...
> 
> This preserves all the information because it tells you which policies
> actually are identical (shared), each per-op policy can have nested
> policies referring to common ones, like in the ethtool case, etc.

Only comment I have is - can we make sure to put the ATTR_OP_POLICY
first? That way user space can parse the stream an pick out the info
it needs rather than recording all the policies only to find out later
which one is which.

> OTOH, it's a lot trickier to implement - I haven't really come up with a
> good way of doing it "generically" with the net/netlink/policy.c code.
> I'm sure it can be solved, but I haven't really given it enough thought.
> Perhaps by passing a "policy iterator" to netlink_policy_dump_start(),
> instead of just a single policy (i.e. a function & a data ptr or so),
> and then it can walk all the policies using that, assign the idxes etc.,
> and dump them out in netlink_policy_dump_write()?
> 
> But then we'd still have to get the policy idx for a given policy, and
> not clean up all the state when netlink_policy_dump_loop() returns
> false, because you still need it for ATTR_OP_POLICY to find the idx from
> the pointer?
> 
> I guess it's doable. Just seems a bit more complex. OTOH, it may be that
> such complexity also completely makes sense for non-generic netlink
> families anyway, I haven't looked at them much at all.

IDK, doesn't seem crazy hard. We can create some iterator or expand the
API with "begin" "add" "end" calls. Then once dumper state is build we
can ask it which ids it assigned.

OTOH I don't think we have a use for this in ethtool, because user
space usually does just one op per execution. So I'm thinking to use
your structure for the dump, but leave the actual implementation of
"dump all" for "later".

How does that sound?
Johannes Berg Oct. 1, 2020, 4:57 p.m. UTC | #5
On Thu, 2020-10-01 at 09:24 -0700, Jakub Kicinski wrote:

> > I guess the most compact representation, that also preserves the most
> > data about sharing, would be to do something like
> > 
> > [ATTR_FAMILY_ID]
> > [ATTR_POLICY]
> >   [policy idx, 0 = main policy]
> >     [bla]
> >     ...
> >   ...
> > [ATTR_OP_POLICY]
> >   [op] = [policy idx]
> >   ...

> Only comment I have is - can we make sure to put the ATTR_OP_POLICY
> first? That way user space can parse the stream an pick out the info
> it needs rather than recording all the policies only to find out later
> which one is which.

Hmm. Yes, that makes sense. But I don't see why not - you could go do
the netlink_policy_dump_start() which that assigns the indexes, then
dump out ATTR_OP_POLICY looking up the indexes in the table that it
created, and then dump out all the policies?

> > I guess it's doable. Just seems a bit more complex. OTOH, it may be that
> > such complexity also completely makes sense for non-generic netlink
> > families anyway, I haven't looked at them much at all.
> 
> IDK, doesn't seem crazy hard. We can create some iterator or expand the
> API with "begin" "add" "end" calls. Then once dumper state is build we
> can ask it which ids it assigned.

Yeah. Seems feasible. Maybe I'll take a stab at it (later, when I can).

> OTOH I don't think we have a use for this in ethtool, because user
> space usually does just one op per execution. So I'm thinking to use
> your structure for the dump, but leave the actual implementation of
> "dump all" for "later".
> 
> How does that sound?

I'm not sure you even need that structure if you have the "filter by
op"? I mean, then just stick to what you had?

When I started down this road I more had in mind "sniffer-like" tools
that want to understand the messages better, etc. without really having
any domain-specific "knowledge" encoded in them. And then you'd probably
really want to build the entire policy representation in the tool side
first.

Or perhaps even tools you could run on the latest kernel to generate
code (e.g. python code was discussed) that would be able to build
messages. You'd want to generate the code once on the latest kernel when
you need a new feature, and then actually use it instead of redoing it
at runtime, but still, could be done.

I suppose you have a completely different use case in mind :-)

johannes
Jakub Kicinski Oct. 1, 2020, 5:09 p.m. UTC | #6
On Thu, 01 Oct 2020 18:57:35 +0200 Johannes Berg wrote:
> On Thu, 2020-10-01 at 09:24 -0700, Jakub Kicinski wrote:
> > > I guess the most compact representation, that also preserves the most
> > > data about sharing, would be to do something like
> > > 
> > > [ATTR_FAMILY_ID]
> > > [ATTR_POLICY]
> > >   [policy idx, 0 = main policy]
> > >     [bla]
> > >     ...
> > >   ...
> > > [ATTR_OP_POLICY]
> > >   [op] = [policy idx]
> > >   ...  
> 
> > Only comment I have is - can we make sure to put the ATTR_OP_POLICY
> > first? That way user space can parse the stream an pick out the info
> > it needs rather than recording all the policies only to find out later
> > which one is which.  
> 
> Hmm. Yes, that makes sense. But I don't see why not - you could go do
> the netlink_policy_dump_start() which that assigns the indexes, then
> dump out ATTR_OP_POLICY looking up the indexes in the table that it
> created, and then dump out all the policies?

Ack.

> > > I guess it's doable. Just seems a bit more complex. OTOH, it may be that
> > > such complexity also completely makes sense for non-generic netlink
> > > families anyway, I haven't looked at them much at all.  
> > 
> > IDK, doesn't seem crazy hard. We can create some iterator or expand the
> > API with "begin" "add" "end" calls. Then once dumper state is build we
> > can ask it which ids it assigned.  
> 
> Yeah. Seems feasible. Maybe I'll take a stab at it (later, when I can).
> 
> > OTOH I don't think we have a use for this in ethtool, because user
> > space usually does just one op per execution. So I'm thinking to use
> > your structure for the dump, but leave the actual implementation of
> > "dump all" for "later".
> > 
> > How does that sound?  
> 
> I'm not sure you even need that structure if you have the "filter by
> op"? I mean, then just stick to what you had?

I was adding OP as an attribute to each message. I will just ditch that
given user space should know what it asked for.

> When I started down this road I more had in mind "sniffer-like" tools
> that want to understand the messages better, etc. without really having
> any domain-specific "knowledge" encoded in them. And then you'd probably
> really want to build the entire policy representation in the tool side
> first.
> 
> Or perhaps even tools you could run on the latest kernel to generate
> code (e.g. python code was discussed) that would be able to build
> messages. You'd want to generate the code once on the latest kernel when
> you need a new feature, and then actually use it instead of redoing it
> at runtime, but still, could be done.
> 
> I suppose you have a completely different use case in mind :-)

I see. Yes, I'm trying to avoid having to probe the kernel for features.
We added new flags to ethtool to include extra info in the output, and
older kernels with return EOPNOTSUPP for the entire operation if those
are set (due to strict checking). While user would probably expect the
information to just not be there if kernel can't provide it. New
kernels can't provide it all the time either (it's extra stats from the
driver).

I'm hoping Michal will accept this as a solution :) Retrying on
EOPNOTSUPP gets a little hairy for my taste.

That should have been in the cover letter, I guess.
diff mbox series

Patch

diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 9c0636ec2286..7dbe2d5d7d46 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -64,6 +64,7 @@  enum {
 	CTRL_ATTR_OPS,
 	CTRL_ATTR_MCAST_GROUPS,
 	CTRL_ATTR_POLICY,
+	CTRL_ATTR_OP,
 	__CTRL_ATTR_MAX,
 };
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f2833e9165c7..12e9f323af35 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1113,12 +1113,14 @@  static int genl_ctrl_event(int event, const struct genl_family *family,
 struct ctrl_dump_policy_ctx {
 	unsigned long state;
 	unsigned int fam_id;
+	u8 cmd;
 };
 
 static const struct nla_policy ctrl_policy_policy[] = {
 	[CTRL_ATTR_FAMILY_ID]	= { .type = NLA_U16 },
 	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_NUL_STRING,
 				    .len = GENL_NAMSIZ - 1 },
+	[CTRL_ATTR_OP]		= { .type = NLA_U8 },
 };
 
 static int ctrl_dumppolicy_start(struct netlink_callback *cb)
@@ -1127,6 +1129,8 @@  static int ctrl_dumppolicy_start(struct netlink_callback *cb)
 	struct ctrl_dump_policy_ctx *ctx = (void *)cb->args;
 	struct nlattr **tb = info->attrs;
 	const struct genl_family *rt;
+	struct genl_ops op;
+	int err;
 
 	if (!tb[CTRL_ATTR_FAMILY_ID] && !tb[CTRL_ATTR_FAMILY_NAME])
 		return -EINVAL;
@@ -1145,10 +1149,23 @@  static int ctrl_dumppolicy_start(struct netlink_callback *cb)
 	if (!rt)
 		return -ENOENT;
 
-	if (!rt->policy)
+	if (tb[CTRL_ATTR_OP]) {
+		ctx->cmd = nla_get_u8(tb[CTRL_ATTR_OP]);
+
+		err = genl_get_cmd(ctx->cmd, rt, &op);
+		if (err) {
+			NL_SET_BAD_ATTR(cb->extack, tb[CTRL_ATTR_OP]);
+			return err;
+		}
+	} else {
+		op.policy = rt->policy;
+		op.maxattr = rt->maxattr;
+	}
+
+	if (!op.policy)
 		return -ENODATA;
 
-	return netlink_policy_dump_start(rt->policy, rt->maxattr, &ctx->state);
+	return netlink_policy_dump_start(op.policy, op.maxattr, &ctx->state);
 }
 
 static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1167,6 +1184,8 @@  static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, ctx->fam_id))
 			goto nla_put_failure;
+		if (ctx->cmd && nla_put_u8(skb, CTRL_ATTR_OP, ctx->cmd))
+			goto nla_put_failure;
 
 		nest = nla_nest_start(skb, CTRL_ATTR_POLICY);
 		if (!nest)