diff mbox series

[net-next,04/14] nexthop: Add netlink defines and enumerators for resilient NH groups

Message ID 674ece8e7d2fcdad3538d34e1db641e0b616cfae.1615387786.git.petrm@nvidia.com
State Superseded
Headers show
Series None | expand

Commit Message

Petr Machata March 10, 2021, 3:02 p.m. UTC
From: Ido Schimmel <idosch@nvidia.com>

- RTM_NEWNEXTHOP et.al. that handle resilient groups will have a new nested
  attribute, NHA_RES_GROUP, whose elements are attributes NHA_RES_GROUP_*.

- RTM_NEWNEXTHOPBUCKET et.al. is a suite of new messages that will
  currently serve only for dumping of individual buckets of resilient next
  hop groups. For nexthop group buckets, these messages will carry a nested
  attribute NHA_RES_BUCKET, whose elements are attributes NHA_RES_BUCKET_*.

  There are several reasons why a new suite of messages is created for
  nexthop buckets instead of overloading the information on the existing
  RTM_{NEW,DEL,GET}NEXTHOP messages.

  First, a nexthop group can contain a large number of nexthop buckets (4k
  is not unheard of). This imposes limits on the amount of information that
  can be encoded for each nexthop bucket given a netlink message is limited
  to 64k bytes.

  Second, while RTM_NEWNEXTHOPBUCKET is only used for notifications at
  this point, in the future it can be extended to provide user space with
  control over nexthop buckets configuration.

- The new group type is NEXTHOP_GRP_TYPE_RES. Note that nexthop code is
  adjusted to bounce groups with that type for now.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
---

Notes:
    v1 (changes since RFC):
    - u32 -> u16 for bucket counts / indices

 include/uapi/linux/nexthop.h   | 43 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/rtnetlink.h |  7 ++++++
 net/ipv4/nexthop.c             |  2 ++
 security/selinux/nlmsgtab.c    |  5 +++-
 4 files changed, 56 insertions(+), 1 deletion(-)

Comments

David Ahern March 11, 2021, 3:34 p.m. UTC | #1
On 3/10/21 8:02 AM, Petr Machata wrote:
> diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h

> index 2d4a1e784cf0..8efebf3cb9c7 100644

> --- a/include/uapi/linux/nexthop.h

> +++ b/include/uapi/linux/nexthop.h

> @@ -22,6 +22,7 @@ struct nexthop_grp {

>  

>  enum {

>  	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */


Update the above comment that it is for legacy, hash based multipath.

> +	NEXTHOP_GRP_TYPE_RES,    /* resilient nexthop group */

>  	__NEXTHOP_GRP_TYPE_MAX,

>  };

>  


besides the request for an updated comment, this looks fine:
Reviewed-by: David Ahern <dsahern@kernel.org>
Petr Machata March 11, 2021, 3:45 p.m. UTC | #2
David Ahern <dsahern@gmail.com> writes:

> On 3/10/21 8:02 AM, Petr Machata wrote:

>> diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h

>> index 2d4a1e784cf0..8efebf3cb9c7 100644

>> --- a/include/uapi/linux/nexthop.h

>> +++ b/include/uapi/linux/nexthop.h

>> @@ -22,6 +22,7 @@ struct nexthop_grp {

>>  

>>  enum {

>>  	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */

>

> Update the above comment that it is for legacy, hash based multipath.


Maybe this would make sense?

	NEXTHOP_GRP_TYPE_MPATH,  /* hash-threshold nexthop group */
David Ahern March 11, 2021, 3:49 p.m. UTC | #3
On 3/11/21 8:45 AM, Petr Machata wrote:
> 

> David Ahern <dsahern@gmail.com> writes:

> 

>> On 3/10/21 8:02 AM, Petr Machata wrote:

>>> diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h

>>> index 2d4a1e784cf0..8efebf3cb9c7 100644

>>> --- a/include/uapi/linux/nexthop.h

>>> +++ b/include/uapi/linux/nexthop.h

>>> @@ -22,6 +22,7 @@ struct nexthop_grp {

>>>  

>>>  enum {

>>>  	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */

>>

>> Update the above comment that it is for legacy, hash based multipath.

> 

> Maybe this would make sense?

> 

> 	NEXTHOP_GRP_TYPE_MPATH,  /* hash-threshold nexthop group */

> 


yes, the description is fine. keep the comment about 'default type'.
Petr Machata March 11, 2021, 4 p.m. UTC | #4
David Ahern <dsahern@gmail.com> writes:

> On 3/11/21 8:45 AM, Petr Machata wrote:

>> 

>> David Ahern <dsahern@gmail.com> writes:

>> 

>>> On 3/10/21 8:02 AM, Petr Machata wrote:

>>>> diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h

>>>> index 2d4a1e784cf0..8efebf3cb9c7 100644

>>>> --- a/include/uapi/linux/nexthop.h

>>>> +++ b/include/uapi/linux/nexthop.h

>>>> @@ -22,6 +22,7 @@ struct nexthop_grp {

>>>>  

>>>>  enum {

>>>>  	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */

>>>

>>> Update the above comment that it is for legacy, hash based multipath.

>> 

>> Maybe this would make sense?

>> 

>> 	NEXTHOP_GRP_TYPE_MPATH,  /* hash-threshold nexthop group */

>> 

>

> yes, the description is fine. keep the comment about 'default type'.


OK.
diff mbox series

Patch

diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index 2d4a1e784cf0..8efebf3cb9c7 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -22,6 +22,7 @@  struct nexthop_grp {
 
 enum {
 	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */
+	NEXTHOP_GRP_TYPE_RES,    /* resilient nexthop group */
 	__NEXTHOP_GRP_TYPE_MAX,
 };
 
@@ -52,8 +53,50 @@  enum {
 	NHA_FDB,	/* flag; nexthop belongs to a bridge fdb */
 	/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
 
+	/* nested; resilient nexthop group attributes */
+	NHA_RES_GROUP,
+	/* nested; nexthop bucket attributes */
+	NHA_RES_BUCKET,
+
 	__NHA_MAX,
 };
 
 #define NHA_MAX	(__NHA_MAX - 1)
+
+enum {
+	NHA_RES_GROUP_UNSPEC,
+	/* Pad attribute for 64-bit alignment. */
+	NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
+
+	/* u16; number of nexthop buckets in a resilient nexthop group */
+	NHA_RES_GROUP_BUCKETS,
+	/* clock_t as u32; nexthop bucket idle timer (per-group) */
+	NHA_RES_GROUP_IDLE_TIMER,
+	/* clock_t as u32; nexthop unbalanced timer */
+	NHA_RES_GROUP_UNBALANCED_TIMER,
+	/* clock_t as u64; nexthop unbalanced time */
+	NHA_RES_GROUP_UNBALANCED_TIME,
+
+	__NHA_RES_GROUP_MAX,
+};
+
+#define NHA_RES_GROUP_MAX	(__NHA_RES_GROUP_MAX - 1)
+
+enum {
+	NHA_RES_BUCKET_UNSPEC,
+	/* Pad attribute for 64-bit alignment. */
+	NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC,
+
+	/* u16; nexthop bucket index */
+	NHA_RES_BUCKET_INDEX,
+	/* clock_t as u64; nexthop bucket idle time */
+	NHA_RES_BUCKET_IDLE_TIME,
+	/* u32; nexthop id assigned to the nexthop bucket */
+	NHA_RES_BUCKET_NH_ID,
+
+	__NHA_RES_BUCKET_MAX,
+};
+
+#define NHA_RES_BUCKET_MAX	(__NHA_RES_BUCKET_MAX - 1)
+
 #endif
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 91e4ca064d61..d35953bc7d53 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -178,6 +178,13 @@  enum {
 	RTM_GETVLAN,
 #define RTM_GETVLAN	RTM_GETVLAN
 
+	RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET	RTM_NEWNEXTHOPBUCKET
+	RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET	RTM_DELNEXTHOPBUCKET
+	RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET	RTM_GETNEXTHOPBUCKET
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 56c54d0fbacc..7a94591da856 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1492,6 +1492,8 @@  static struct nexthop *nexthop_create_group(struct net *net,
 	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
 		nhg->mpath = 1;
 		nhg->is_multipath = true;
+	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
+		goto out_no_nh;
 	}
 
 	WARN_ON_ONCE(nhg->mpath != 1);
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index b69231918686..d59276f48d4f 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -88,6 +88,9 @@  static const struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_NEWVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_DELVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_GETVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_NEWNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_DELNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -171,7 +174,7 @@  int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 		 * structures at the top of this file with the new mappings
 		 * before updating the BUILD_BUG_ON() macro!
 		 */
-		BUILD_BUG_ON(RTM_MAX != (RTM_NEWVLAN + 3));
+		BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
 		break;