diff mbox series

[net-next,1/3] net: sched: update default qdisc visibility after Tx queue cnt changes

Message ID 20210913225332.662291-2-kuba@kernel.org
State New
Headers show
Series net: sched: update default qdisc visibility after Tx queue cnt changes | expand

Commit Message

Jakub Kicinski Sept. 13, 2021, 10:53 p.m. UTC
mq / mqprio make the default child qdiscs visible. They only do
so for the qdiscs which are within real_num_tx_queues when the
device is registered. Depending on order of calls in the driver,
or if user space changes config via ethtool -L the number of
qdiscs visible under tc qdisc show will differ from the number
of queues. This is confusing to users and potentially to system
configuration scripts which try to make sure qdiscs have the
right parameters.

Add a new Qdisc_ops callback and make relevant qdiscs TTRT.

Note that this uncovers the "shortcut" created by
commit 1f27cde313d7 ("net: sched: use pfifo_fast for non real queues")
The default child qdiscs beyond initial real_num_tx are always
pfifo_fast, no matter what the sysfs setting is. Fixing this
gets a little tricky because we'd need to keep a reference
on whatever the default qdisc was at the time of creation.
In practice this is likely an non-issue the qdiscs likely have
to be configured to non-default settings, so whatever user space
is doing such configuration can replace the pfifos... now that
it will see them.

Reported-by: Matthew Massey <matthewmassey@fb.com>
Reviewed-by: Dave Taht <dave.taht@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sch_generic.h |  4 ++++
 net/core/dev.c            |  2 ++
 net/sched/sch_generic.c   |  9 +++++++++
 net/sched/sch_mq.c        | 24 ++++++++++++++++++++++++
 net/sched/sch_mqprio.c    | 23 +++++++++++++++++++++++
 5 files changed, 62 insertions(+)

Comments

Cong Wang Sept. 15, 2021, 4:31 p.m. UTC | #1
On Mon, Sep 13, 2021 at 3:53 PM Jakub Kicinski <kuba@kernel.org> wrote:
>

> mq / mqprio make the default child qdiscs visible. They only do

> so for the qdiscs which are within real_num_tx_queues when the

> device is registered. Depending on order of calls in the driver,

> or if user space changes config via ethtool -L the number of

> qdiscs visible under tc qdisc show will differ from the number

> of queues. This is confusing to users and potentially to system

> configuration scripts which try to make sure qdiscs have the

> right parameters.

>

> Add a new Qdisc_ops callback and make relevant qdiscs TTRT.

>

> Note that this uncovers the "shortcut" created by

> commit 1f27cde313d7 ("net: sched: use pfifo_fast for non real queues")

> The default child qdiscs beyond initial real_num_tx are always

> pfifo_fast, no matter what the sysfs setting is. Fixing this

> gets a little tricky because we'd need to keep a reference

> on whatever the default qdisc was at the time of creation.

> In practice this is likely an non-issue the qdiscs likely have

> to be configured to non-default settings, so whatever user space

> is doing such configuration can replace the pfifos... now that

> it will see them.

>


Looks reasonable.

> diff --git a/net/core/dev.c b/net/core/dev.c

> index 74fd402d26dd..f930329f0dc2 100644

> --- a/net/core/dev.c

> +++ b/net/core/dev.c

> @@ -2921,6 +2921,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

>                 if (dev->num_tc)

>                         netif_setup_tc(dev, txq);

>

> +               dev_qdisc_change_real_num_tx(dev, txq);

> +


Don't we need to flip the device with dev_deactivate()+dev_activate()?
It looks like the only thing this function resets is qdisc itself, and only
partially.


>                 dev->real_num_tx_queues = txq;

>

>                 if (disabling) {

> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

> index a8dd06c74e31..66d2fbe9ef50 100644

> --- a/net/sched/sch_generic.c

> +++ b/net/sched/sch_generic.c

> @@ -1330,6 +1330,15 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,

>         return 0;

>  }

>

> +void dev_qdisc_change_real_num_tx(struct net_device *dev,

> +                                 unsigned int new_real_tx)

> +{

> +       struct Qdisc *qdisc = dev->qdisc;

> +

> +       if (qdisc->ops->change_real_num_tx)

> +               qdisc->ops->change_real_num_tx(qdisc, new_real_tx);

> +}

> +

>  int dev_qdisc_change_tx_queue_len(struct net_device *dev)

>  {

>         bool up = dev->flags & IFF_UP;

> diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c

> index e79f1afe0cfd..db18d8a860f9 100644

> --- a/net/sched/sch_mq.c

> +++ b/net/sched/sch_mq.c

> @@ -125,6 +125,29 @@ static void mq_attach(struct Qdisc *sch)

>         priv->qdiscs = NULL;

>  }

>

> +static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)


This is nearly identical to mqprio_change_real_num_tx(), can we reuse
it?

Thanks.
Jakub Kicinski Sept. 15, 2021, 7:36 p.m. UTC | #2
On Wed, 15 Sep 2021 09:31:08 -0700 Cong Wang wrote:
> On Mon, Sep 13, 2021 at 3:53 PM Jakub Kicinski <kuba@kernel.org> wrote:

> > diff --git a/net/core/dev.c b/net/core/dev.c

> > index 74fd402d26dd..f930329f0dc2 100644

> > --- a/net/core/dev.c

> > +++ b/net/core/dev.c

> > @@ -2921,6 +2921,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

> >                 if (dev->num_tc)

> >                         netif_setup_tc(dev, txq);

> >

> > +               dev_qdisc_change_real_num_tx(dev, txq);

> > +  

> 

> Don't we need to flip the device with dev_deactivate()+dev_activate()?

> It looks like the only thing this function resets is qdisc itself, and only

> partially.


We're only making the qdiscs visible, there should be 
no datapath-visible change.

> >                 dev->real_num_tx_queues = txq;

> >

> >                 if (disabling) {


> > diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c

> > index e79f1afe0cfd..db18d8a860f9 100644

> > --- a/net/sched/sch_mq.c

> > +++ b/net/sched/sch_mq.c

> > @@ -125,6 +125,29 @@ static void mq_attach(struct Qdisc *sch)

> >         priv->qdiscs = NULL;

> >  }

> >

> > +static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)  

> 

> This is nearly identical to mqprio_change_real_num_tx(), can we reuse

> it?


Indeed, I was a little unsure where best to place the helper.
Since mq is always built if mqprio is my instinct would be to
export mq_change_real_num_tx and use it in mqprio. But I didn't 
see any existing exports (mq_attach(), mq_queue_get() are also
identical and are not shared) so I just copy&pasted the logic.

LMK if (a) that's fine; (b) I should share the new code; 
(c) I should post a patch to share all the code that's identical;...
Cong Wang Sept. 17, 2021, 5:46 a.m. UTC | #3
On Wed, Sep 15, 2021 at 12:36 PM Jakub Kicinski <kuba@kernel.org> wrote:
>

> On Wed, 15 Sep 2021 09:31:08 -0700 Cong Wang wrote:

> > On Mon, Sep 13, 2021 at 3:53 PM Jakub Kicinski <kuba@kernel.org> wrote:

> > > diff --git a/net/core/dev.c b/net/core/dev.c

> > > index 74fd402d26dd..f930329f0dc2 100644

> > > --- a/net/core/dev.c

> > > +++ b/net/core/dev.c

> > > @@ -2921,6 +2921,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

> > >                 if (dev->num_tc)

> > >                         netif_setup_tc(dev, txq);

> > >

> > > +               dev_qdisc_change_real_num_tx(dev, txq);

> > > +

> >

> > Don't we need to flip the device with dev_deactivate()+dev_activate()?

> > It looks like the only thing this function resets is qdisc itself, and only

> > partially.

>

> We're only making the qdiscs visible, there should be

> no datapath-visible change.


Isn't every qdisc under mq visible to datapath?

Packets can be pending in qdisc's, and qdisc's can be scheduled
in TX softirq, so essentially we need to flip the device like other
places.

>

> > >                 dev->real_num_tx_queues = txq;

> > >

> > >                 if (disabling) {

>

> > > diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c

> > > index e79f1afe0cfd..db18d8a860f9 100644

> > > --- a/net/sched/sch_mq.c

> > > +++ b/net/sched/sch_mq.c

> > > @@ -125,6 +125,29 @@ static void mq_attach(struct Qdisc *sch)

> > >         priv->qdiscs = NULL;

> > >  }

> > >

> > > +static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)

> >

> > This is nearly identical to mqprio_change_real_num_tx(), can we reuse

> > it?

>

> Indeed, I was a little unsure where best to place the helper.

> Since mq is always built if mqprio is my instinct would be to

> export mq_change_real_num_tx and use it in mqprio. But I didn't

> see any existing exports (mq_attach(), mq_queue_get() are also

> identical and are not shared) so I just copy&pasted the logic.


What about net/sched/sch_generic.c?

>

> LMK if (a) that's fine; (b) I should share the new code;

> (c) I should post a patch to share all the code that's identical;...


I think you can put the code in net/sched/sch_generic.c and export
it for mqprio (mq is built-in so can just call it).

Thanks.
Jakub Kicinski Sept. 17, 2021, 1:08 p.m. UTC | #4
On Thu, 16 Sep 2021 22:46:42 -0700 Cong Wang wrote:
> On Wed, Sep 15, 2021 at 12:36 PM Jakub Kicinski <kuba@kernel.org> wrote:

> > On Wed, 15 Sep 2021 09:31:08 -0700 Cong Wang wrote:  

> > > Don't we need to flip the device with dev_deactivate()+dev_activate()?

> > > It looks like the only thing this function resets is qdisc itself, and only

> > > partially.  

> >

> > We're only making the qdiscs visible, there should be

> > no datapath-visible change.  

> 

> Isn't every qdisc under mq visible to datapath?

> 

> Packets can be pending in qdisc's, and qdisc's can be scheduled

> in TX softirq, so essentially we need to flip the device like other


By visible I mean tc qdisc dump shows them. I'm adding/removing 
the qdiscs to netdev->qdisc_hash. That's only used by control 
paths to dump or find qdiscs by handle.

> > > This is nearly identical to mqprio_change_real_num_tx(), can we reuse

> > > it?  

> >

> > Indeed, I was a little unsure where best to place the helper.

> > Since mq is always built if mqprio is my instinct would be to

> > export mq_change_real_num_tx and use it in mqprio. But I didn't

> > see any existing exports (mq_attach(), mq_queue_get() are also

> > identical and are not shared) so I just copy&pasted the logic.  

> 

> What about net/sched/sch_generic.c?

> 

> > LMK if (a) that's fine; (b) I should share the new code;

> > (c) I should post a patch to share all the code that's identical;...  

> 

> I think you can put the code in net/sched/sch_generic.c and export

> it for mqprio (mq is built-in so can just call it).


Will do, thanks!
Cong Wang Sept. 19, 2021, 11:38 p.m. UTC | #5
On Fri, Sep 17, 2021 at 6:09 AM Jakub Kicinski <kuba@kernel.org> wrote:
>

> On Thu, 16 Sep 2021 22:46:42 -0700 Cong Wang wrote:

> > On Wed, Sep 15, 2021 at 12:36 PM Jakub Kicinski <kuba@kernel.org> wrote:

> > > On Wed, 15 Sep 2021 09:31:08 -0700 Cong Wang wrote:

> > > > Don't we need to flip the device with dev_deactivate()+dev_activate()?

> > > > It looks like the only thing this function resets is qdisc itself, and only

> > > > partially.

> > >

> > > We're only making the qdiscs visible, there should be

> > > no datapath-visible change.

> >

> > Isn't every qdisc under mq visible to datapath?

> >

> > Packets can be pending in qdisc's, and qdisc's can be scheduled

> > in TX softirq, so essentially we need to flip the device like other

>

> By visible I mean tc qdisc dump shows them. I'm adding/removing

> the qdiscs to netdev->qdisc_hash. That's only used by control

> paths to dump or find qdiscs by handle.


Oh, I see, I missed this difference. If datapath can't see this change
immediately, we don't need to flip it.

Thanks.
diff mbox series

Patch

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c0069ac00e62..8c2d611639fc 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -308,6 +308,8 @@  struct Qdisc_ops {
 					  struct netlink_ext_ack *extack);
 	void			(*attach)(struct Qdisc *sch);
 	int			(*change_tx_queue_len)(struct Qdisc *, unsigned int);
+	void			(*change_real_num_tx)(struct Qdisc *sch,
+						      unsigned int new_real_tx);
 
 	int			(*dump)(struct Qdisc *, struct sk_buff *);
 	int			(*dump_stats)(struct Qdisc *, struct gnet_dump *);
@@ -684,6 +686,8 @@  void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
 void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
 
 int dev_qdisc_change_tx_queue_len(struct net_device *dev);
+void dev_qdisc_change_real_num_tx(struct net_device *dev,
+				  unsigned int new_real_tx);
 void dev_init_scheduler(struct net_device *dev);
 void dev_shutdown(struct net_device *dev);
 void dev_activate(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26dd..f930329f0dc2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2921,6 +2921,8 @@  int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 
+		dev_qdisc_change_real_num_tx(dev, txq);
+
 		dev->real_num_tx_queues = txq;
 
 		if (disabling) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a8dd06c74e31..66d2fbe9ef50 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1330,6 +1330,15 @@  static int qdisc_change_tx_queue_len(struct net_device *dev,
 	return 0;
 }
 
+void dev_qdisc_change_real_num_tx(struct net_device *dev,
+				  unsigned int new_real_tx)
+{
+	struct Qdisc *qdisc = dev->qdisc;
+
+	if (qdisc->ops->change_real_num_tx)
+		qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
+}
+
 int dev_qdisc_change_tx_queue_len(struct net_device *dev)
 {
 	bool up = dev->flags & IFF_UP;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index e79f1afe0cfd..db18d8a860f9 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -125,6 +125,29 @@  static void mq_attach(struct Qdisc *sch)
 	priv->qdiscs = NULL;
 }
 
+static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
+{
+#ifdef CONFIG_NET_SCHED
+	struct net_device *dev = qdisc_dev(sch);
+	struct Qdisc *qdisc;
+	unsigned int i;
+
+	for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+		/* Only update the default qdiscs we created,
+		 * qdiscs with handles are always hashed.
+		 */
+		if (qdisc != &noop_qdisc && !qdisc->handle)
+			qdisc_hash_del(qdisc);
+	}
+	for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+		if (qdisc != &noop_qdisc && !qdisc->handle)
+			qdisc_hash_add(qdisc, false);
+	}
+#endif
+}
+
 static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct net_device *dev = qdisc_dev(sch);
@@ -288,6 +311,7 @@  struct Qdisc_ops mq_qdisc_ops __read_mostly = {
 	.init		= mq_init,
 	.destroy	= mq_destroy,
 	.attach		= mq_attach,
+	.change_real_num_tx = mq_change_real_num_tx,
 	.dump		= mq_dump,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 8766ab5b8788..7f23a92849d5 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -306,6 +306,28 @@  static void mqprio_attach(struct Qdisc *sch)
 	priv->qdiscs = NULL;
 }
 
+static void mqprio_change_real_num_tx(struct Qdisc *sch,
+				      unsigned int new_real_tx)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct Qdisc *qdisc;
+	unsigned int i;
+
+	for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+		/* Only update the default qdiscs we created,
+		 * qdiscs with handles are always hashed.
+		 */
+		if (qdisc != &noop_qdisc && !qdisc->handle)
+			qdisc_hash_del(qdisc);
+	}
+	for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+		if (qdisc != &noop_qdisc && !qdisc->handle)
+			qdisc_hash_add(qdisc, false);
+	}
+}
+
 static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
 					     unsigned long cl)
 {
@@ -623,6 +645,7 @@  static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
 	.init		= mqprio_init,
 	.destroy	= mqprio_destroy,
 	.attach		= mqprio_attach,
+	.change_real_num_tx = mqprio_change_real_num_tx,
 	.dump		= mqprio_dump,
 	.owner		= THIS_MODULE,
 };