diff mbox series

[net-next,v9,3/3] net: add sysfs attribute to control napi threaded mode

Message ID 20210129181812.256216-4-weiwan@google.com
State New
Headers show
Series implement kthread based napi poll | expand

Commit Message

Wei Wang Jan. 29, 2021, 6:18 p.m. UTC
This patch adds a new sysfs attribute to the network device class.
Said attribute provides a per-device control to enable/disable the
threaded mode for all the napi instances of the given network device,
without the need for a device up/down.
User sets it to 1 or 0 to enable or disable threaded mode.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Co-developed-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Wei Wang <weiwan@google.com>
---
 Documentation/ABI/testing/sysfs-class-net | 15 ++++++
 include/linux/netdevice.h                 |  2 +
 net/core/dev.c                            | 61 ++++++++++++++++++++++-
 net/core/net-sysfs.c                      | 50 +++++++++++++++++++
 4 files changed, 126 insertions(+), 2 deletions(-)

Comments

Jakub Kicinski Feb. 3, 2021, 12:28 a.m. UTC | #1
On Fri, 29 Jan 2021 10:18:12 -0800 Wei Wang wrote:
> This patch adds a new sysfs attribute to the network device class.

> Said attribute provides a per-device control to enable/disable the

> threaded mode for all the napi instances of the given network device,

> without the need for a device up/down.

> User sets it to 1 or 0 to enable or disable threaded mode.

> 

> Co-developed-by: Paolo Abeni <pabeni@redhat.com>

> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

> Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

> Co-developed-by: Felix Fietkau <nbd@nbd.name>

> Signed-off-by: Felix Fietkau <nbd@nbd.name>

> Signed-off-by: Wei Wang <weiwan@google.com>


> +static int napi_set_threaded(struct napi_struct *n, bool threaded)

> +{

> +	int err = 0;

> +

> +	if (threaded == !!test_bit(NAPI_STATE_THREADED, &n->state))

> +		return 0;

> +

> +	if (!threaded) {

> +		clear_bit(NAPI_STATE_THREADED, &n->state);


Can we put a note in the commit message saying that stopping the
threads is slightly tricky but we'll do it if someone complains?

Or is there a stronger reason than having to wait for thread to finish
up with the NAPI not to stop them?

> +		return 0;

> +	}

> +

> +	if (!n->thread) {

> +		err = napi_kthread_create(n);

> +		if (err)

> +			return err;

> +	}

> +

> +	/* Make sure kthread is created before THREADED bit

> +	 * is set.

> +	 */

> +	smp_mb__before_atomic();

> +	set_bit(NAPI_STATE_THREADED, &n->state);

> +

> +	return 0;

> +}

> +

> +static void dev_disable_threaded_all(struct net_device *dev)

> +{

> +	struct napi_struct *napi;

> +

> +	list_for_each_entry(napi, &dev->napi_list, dev_list)

> +		napi_set_threaded(napi, false);

> +	dev->threaded = 0;

> +}

> +

> +int dev_set_threaded(struct net_device *dev, bool threaded)

> +{

> +	struct napi_struct *napi;

> +	int ret;

> +

> +	dev->threaded = threaded;

> +	list_for_each_entry(napi, &dev->napi_list, dev_list) {

> +		ret = napi_set_threaded(napi, threaded);

> +		if (ret) {

> +			/* Error occurred on one of the napi,

> +			 * reset threaded mode on all napi.

> +			 */

> +			dev_disable_threaded_all(dev);

> +			break;

> +		}

> +	}

> +

> +	return ret;

> +}

> +

>  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

>  		    int (*poll)(struct napi_struct *, int), int weight)

>  {

> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c

> index daf502c13d6d..884f049ee395 100644

> --- a/net/core/net-sysfs.c

> +++ b/net/core/net-sysfs.c

> @@ -538,6 +538,55 @@ static ssize_t phys_switch_id_show(struct device *dev,

>  }

>  static DEVICE_ATTR_RO(phys_switch_id);

>  

> +static ssize_t threaded_show(struct device *dev,

> +			     struct device_attribute *attr, char *buf)

> +{

> +	struct net_device *netdev = to_net_dev(dev);

> +	int ret;

> +

> +	if (!rtnl_trylock())

> +		return restart_syscall();

> +

> +	if (!dev_isalive(netdev)) {

> +		ret = -EINVAL;

> +		goto unlock;

> +	}

> +

> +	if (list_empty(&netdev->napi_list)) {

> +		ret = -EOPNOTSUPP;

> +		goto unlock;

> +	}


Maybe others disagree but I'd take this check out. What's wrong with
letting users see that threaded napi is disabled for devices without
NAPI?

This will also help a little devices which remove NAPIs when they are
down.

I've been caught off guard in the past by the fact that kernel returns
-ENOENT for XPS map when device has a single queue.

> +	ret = sprintf(buf, fmt_dec, netdev->threaded);

> +

> +unlock:

> +	rtnl_unlock();

> +	return ret;

> +}

> +

> +static int modify_napi_threaded(struct net_device *dev, unsigned long val)

> +{

> +	int ret;

> +

> +	if (list_empty(&dev->napi_list))

> +		return -EOPNOTSUPP;

> +

> +	if (val != 0 && val != 1)

> +		return -EOPNOTSUPP;

> +

> +	ret = dev_set_threaded(dev, val);

> +

> +	return ret;


return dev_set_threaded(dev, val);

> +}
Wei Wang Feb. 3, 2021, 1:32 a.m. UTC | #2
On Tue, Feb 2, 2021 at 4:28 PM Jakub Kicinski <kuba@kernel.org> wrote:
>

> On Fri, 29 Jan 2021 10:18:12 -0800 Wei Wang wrote:

> > This patch adds a new sysfs attribute to the network device class.

> > Said attribute provides a per-device control to enable/disable the

> > threaded mode for all the napi instances of the given network device,

> > without the need for a device up/down.

> > User sets it to 1 or 0 to enable or disable threaded mode.

> >

> > Co-developed-by: Paolo Abeni <pabeni@redhat.com>

> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>

> > Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

> > Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

> > Co-developed-by: Felix Fietkau <nbd@nbd.name>

> > Signed-off-by: Felix Fietkau <nbd@nbd.name>

> > Signed-off-by: Wei Wang <weiwan@google.com>

>

> > +static int napi_set_threaded(struct napi_struct *n, bool threaded)

> > +{

> > +     int err = 0;

> > +

> > +     if (threaded == !!test_bit(NAPI_STATE_THREADED, &n->state))

> > +             return 0;

> > +

> > +     if (!threaded) {

> > +             clear_bit(NAPI_STATE_THREADED, &n->state);

>

> Can we put a note in the commit message saying that stopping the

> threads is slightly tricky but we'll do it if someone complains?

>

> Or is there a stronger reason than having to wait for thread to finish

> up with the NAPI not to stop them?

Yes. The main reason is the napi might be polled at the moment when
clearing this bit. We have to wait for the thread to finish this round
of polling.
Will add a comment on this.

>

> > +             return 0;

> > +     }

> > +

> > +     if (!n->thread) {

> > +             err = napi_kthread_create(n);

> > +             if (err)

> > +                     return err;

> > +     }

> > +

> > +     /* Make sure kthread is created before THREADED bit

> > +      * is set.

> > +      */

> > +     smp_mb__before_atomic();

> > +     set_bit(NAPI_STATE_THREADED, &n->state);

> > +

> > +     return 0;

> > +}

> > +

> > +static void dev_disable_threaded_all(struct net_device *dev)

> > +{

> > +     struct napi_struct *napi;

> > +

> > +     list_for_each_entry(napi, &dev->napi_list, dev_list)

> > +             napi_set_threaded(napi, false);

> > +     dev->threaded = 0;

> > +}

> > +

> > +int dev_set_threaded(struct net_device *dev, bool threaded)

> > +{

> > +     struct napi_struct *napi;

> > +     int ret;

> > +

> > +     dev->threaded = threaded;

> > +     list_for_each_entry(napi, &dev->napi_list, dev_list) {

> > +             ret = napi_set_threaded(napi, threaded);

> > +             if (ret) {

> > +                     /* Error occurred on one of the napi,

> > +                      * reset threaded mode on all napi.

> > +                      */

> > +                     dev_disable_threaded_all(dev);

> > +                     break;

> > +             }

> > +     }

> > +

> > +     return ret;

> > +}

> > +

> >  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

> >                   int (*poll)(struct napi_struct *, int), int weight)

> >  {

> > diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c

> > index daf502c13d6d..884f049ee395 100644

> > --- a/net/core/net-sysfs.c

> > +++ b/net/core/net-sysfs.c

> > @@ -538,6 +538,55 @@ static ssize_t phys_switch_id_show(struct device *dev,

> >  }

> >  static DEVICE_ATTR_RO(phys_switch_id);

> >

> > +static ssize_t threaded_show(struct device *dev,

> > +                          struct device_attribute *attr, char *buf)

> > +{

> > +     struct net_device *netdev = to_net_dev(dev);

> > +     int ret;

> > +

> > +     if (!rtnl_trylock())

> > +             return restart_syscall();

> > +

> > +     if (!dev_isalive(netdev)) {

> > +             ret = -EINVAL;

> > +             goto unlock;

> > +     }

> > +

> > +     if (list_empty(&netdev->napi_list)) {

> > +             ret = -EOPNOTSUPP;

> > +             goto unlock;

> > +     }

>

> Maybe others disagree but I'd take this check out. What's wrong with

> letting users see that threaded napi is disabled for devices without

> NAPI?

>

> This will also help a little devices which remove NAPIs when they are

> down.

>

> I've been caught off guard in the past by the fact that kernel returns

> -ENOENT for XPS map when device has a single queue.

>

Ack.

> > +     ret = sprintf(buf, fmt_dec, netdev->threaded);

> > +

> > +unlock:

> > +     rtnl_unlock();

> > +     return ret;

> > +}

> > +

> > +static int modify_napi_threaded(struct net_device *dev, unsigned long val)

> > +{

> > +     int ret;

> > +

> > +     if (list_empty(&dev->napi_list))

> > +             return -EOPNOTSUPP;

> > +

> > +     if (val != 0 && val != 1)

> > +             return -EOPNOTSUPP;

> > +

> > +     ret = dev_set_threaded(dev, val);

> > +

> > +     return ret;

>

> return dev_set_threaded(dev, val);

>

Ack.
> > +}
Alexander Duyck Feb. 3, 2021, 5:30 p.m. UTC | #3
On Tue, Feb 2, 2021 at 5:01 PM Jakub Kicinski <kuba@kernel.org> wrote:
>

> On Fri, 29 Jan 2021 10:18:12 -0800 Wei Wang wrote:

> > This patch adds a new sysfs attribute to the network device class.

> > Said attribute provides a per-device control to enable/disable the

> > threaded mode for all the napi instances of the given network device,

> > without the need for a device up/down.

> > User sets it to 1 or 0 to enable or disable threaded mode.

> >

> > Co-developed-by: Paolo Abeni <pabeni@redhat.com>

> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>

> > Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

> > Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

> > Co-developed-by: Felix Fietkau <nbd@nbd.name>

> > Signed-off-by: Felix Fietkau <nbd@nbd.name>

> > Signed-off-by: Wei Wang <weiwan@google.com>

>

> > +static int napi_set_threaded(struct napi_struct *n, bool threaded)

> > +{

> > +     int err = 0;

> > +

> > +     if (threaded == !!test_bit(NAPI_STATE_THREADED, &n->state))

> > +             return 0;

> > +

> > +     if (!threaded) {

> > +             clear_bit(NAPI_STATE_THREADED, &n->state);

>

> Can we put a note in the commit message saying that stopping the

> threads is slightly tricky but we'll do it if someone complains?

>

> Or is there a stronger reason than having to wait for thread to finish

> up with the NAPI not to stop them?


Normally if we are wanting to shut down NAPI we would have to go
through a coordinated process with us setting the NAPI_STATE_DISABLE
bit and then having to sit on NAPI_STATE_SCHED. Doing that would
likely cause a traffic hiccup if somebody toggles this while the NIC
is active so probably best to not interfere.

I suspect this should be more than enough to have us switch in and out
of the threaded setup. I don't think leaving the threads allocated
after someone has enabled it once should be much of an issue. As far
as using just the bit to do the disable, I think the most it would
probably take is a second or so for the queues to switch over from
threaded to normal NAPI again.

> > +             return 0;

> > +     }

> > +

> > +     if (!n->thread) {

> > +             err = napi_kthread_create(n);

> > +             if (err)

> > +                     return err;

> > +     }

> > +

> > +     /* Make sure kthread is created before THREADED bit

> > +      * is set.

> > +      */

> > +     smp_mb__before_atomic();

> > +     set_bit(NAPI_STATE_THREADED, &n->state);

> > +

> > +     return 0;

> > +}

> > +

> > +static void dev_disable_threaded_all(struct net_device *dev)

> > +{

> > +     struct napi_struct *napi;

> > +

> > +     list_for_each_entry(napi, &dev->napi_list, dev_list)

> > +             napi_set_threaded(napi, false);

> > +     dev->threaded = 0;

> > +}

> > +

> > +int dev_set_threaded(struct net_device *dev, bool threaded)

> > +{

> > +     struct napi_struct *napi;

> > +     int ret;

> > +

> > +     dev->threaded = threaded;

> > +     list_for_each_entry(napi, &dev->napi_list, dev_list) {

> > +             ret = napi_set_threaded(napi, threaded);

> > +             if (ret) {

> > +                     /* Error occurred on one of the napi,

> > +                      * reset threaded mode on all napi.

> > +                      */

> > +                     dev_disable_threaded_all(dev);

> > +                     break;

> > +             }

> > +     }

> > +

> > +     return ret;

> > +}

> > +

> >  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

> >                   int (*poll)(struct napi_struct *, int), int weight)

> >  {

> > diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c

> > index daf502c13d6d..884f049ee395 100644

> > --- a/net/core/net-sysfs.c

> > +++ b/net/core/net-sysfs.c

> > @@ -538,6 +538,55 @@ static ssize_t phys_switch_id_show(struct device *dev,

> >  }

> >  static DEVICE_ATTR_RO(phys_switch_id);

> >

> > +static ssize_t threaded_show(struct device *dev,

> > +                          struct device_attribute *attr, char *buf)

> > +{

> > +     struct net_device *netdev = to_net_dev(dev);

> > +     int ret;

> > +

> > +     if (!rtnl_trylock())

> > +             return restart_syscall();

> > +

> > +     if (!dev_isalive(netdev)) {

> > +             ret = -EINVAL;

> > +             goto unlock;

> > +     }

> > +

> > +     if (list_empty(&netdev->napi_list)) {

> > +             ret = -EOPNOTSUPP;

> > +             goto unlock;

> > +     }

>

> Maybe others disagree but I'd take this check out. What's wrong with

> letting users see that threaded napi is disabled for devices without

> NAPI?

>

> This will also help a little devices which remove NAPIs when they are

> down.

>

> I've been caught off guard in the past by the fact that kernel returns

> -ENOENT for XPS map when device has a single queue.


I agree there isn't any point to the check. I think this is a
hold-over from the original code that was querying each napi structure
assigned to the device.

> > +     ret = sprintf(buf, fmt_dec, netdev->threaded);

> > +

> > +unlock:

> > +     rtnl_unlock();

> > +     return ret;

> > +}

> > +

> > +static int modify_napi_threaded(struct net_device *dev, unsigned long val)

> > +{

> > +     int ret;

> > +

> > +     if (list_empty(&dev->napi_list))

> > +             return -EOPNOTSUPP;

> > +

> > +     if (val != 0 && val != 1)

> > +             return -EOPNOTSUPP;

> > +

> > +     ret = dev_set_threaded(dev, val);

> > +

> > +     return ret;

>

> return dev_set_threaded(dev, val);

>

> > +}
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 1f2002df5ba2..1419103d11f9 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -337,3 +337,18 @@  Contact:	netdev@vger.kernel.org
 Description:
 		32-bit unsigned integer counting the number of times the link has
 		been down
+
+What:		/sys/class/net/<iface>/threaded
+Date:		Jan 2021
+KernelVersion:	5.12
+Contact:	netdev@vger.kernel.org
+Description:
+		Boolean value to control the threaded mode per device. User could
+		set this value to enable/disable threaded mode for all napi
+		belonging to this device, without the need to do device up/down.
+
+		Possible values:
+		== ==================================
+		0  threaded mode disabled for this dev
+		1  threaded mode enabled for this dev
+		== ==================================
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f1e9fe9017ac..8ac2db361ae3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -497,6 +497,8 @@  static inline bool napi_complete(struct napi_struct *n)
 	return napi_complete_done(n, 0);
 }
 
+int dev_set_threaded(struct net_device *dev, bool threaded);
+
 /**
  *	napi_disable - prevent NAPI from scheduling
  *	@n: NAPI context
diff --git a/net/core/dev.c b/net/core/dev.c
index 743dd69fba19..1897af6a46eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4288,8 +4288,9 @@  static inline void ____napi_schedule(struct softnet_data *sd,
 
 	if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
 		/* Paired with smp_mb__before_atomic() in
-		 * napi_enable(). Use READ_ONCE() to guarantee
-		 * a complete read on napi->thread. Only call
+		 * napi_enable()/napi_set_threaded().
+		 * Use READ_ONCE() to guarantee a complete
+		 * read on napi->thread. Only call
 		 * wake_up_process() when it's not NULL.
 		 */
 		thread = READ_ONCE(napi->thread);
@@ -6740,6 +6741,62 @@  static void init_gro_hash(struct napi_struct *napi)
 	napi->gro_bitmask = 0;
 }
 
+static int napi_set_threaded(struct napi_struct *n, bool threaded)
+{
+	int err = 0;
+
+	if (threaded == !!test_bit(NAPI_STATE_THREADED, &n->state))
+		return 0;
+
+	if (!threaded) {
+		clear_bit(NAPI_STATE_THREADED, &n->state);
+		return 0;
+	}
+
+	if (!n->thread) {
+		err = napi_kthread_create(n);
+		if (err)
+			return err;
+	}
+
+	/* Make sure kthread is created before THREADED bit
+	 * is set.
+	 */
+	smp_mb__before_atomic();
+	set_bit(NAPI_STATE_THREADED, &n->state);
+
+	return 0;
+}
+
+static void dev_disable_threaded_all(struct net_device *dev)
+{
+	struct napi_struct *napi;
+
+	list_for_each_entry(napi, &dev->napi_list, dev_list)
+		napi_set_threaded(napi, false);
+	dev->threaded = 0;
+}
+
+int dev_set_threaded(struct net_device *dev, bool threaded)
+{
+	struct napi_struct *napi;
+	int ret;
+
+	dev->threaded = threaded;
+	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+		ret = napi_set_threaded(napi, threaded);
+		if (ret) {
+			/* Error occurred on one of the napi,
+			 * reset threaded mode on all napi.
+			 */
+			dev_disable_threaded_all(dev);
+			break;
+		}
+	}
+
+	return ret;
+}
+
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index daf502c13d6d..884f049ee395 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -538,6 +538,55 @@  static ssize_t phys_switch_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_switch_id);
 
+static ssize_t threaded_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (!dev_isalive(netdev)) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (list_empty(&netdev->napi_list)) {
+		ret = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	ret = sprintf(buf, fmt_dec, netdev->threaded);
+
+unlock:
+	rtnl_unlock();
+	return ret;
+}
+
+static int modify_napi_threaded(struct net_device *dev, unsigned long val)
+{
+	int ret;
+
+	if (list_empty(&dev->napi_list))
+		return -EOPNOTSUPP;
+
+	if (val != 0 && val != 1)
+		return -EOPNOTSUPP;
+
+	ret = dev_set_threaded(dev, val);
+
+	return ret;
+}
+
+static ssize_t threaded_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, modify_napi_threaded);
+}
+static DEVICE_ATTR_RW(threaded);
+
 static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_netdev_group.attr,
 	&dev_attr_type.attr,
@@ -570,6 +619,7 @@  static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_proto_down.attr,
 	&dev_attr_carrier_up_count.attr,
 	&dev_attr_carrier_down_count.attr,
+	&dev_attr_threaded.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(net_class);