diff mbox series

[net-next,v2,3/7] net: dsa: free skb->cb usage in core driver

Message ID 20210426093802.38652-4-yangbo.lu@nxp.com
State Superseded
Headers show
Series Support Ocelot PTP Sync one-step timestamping | expand

Commit Message

Y.b. Lu April 26, 2021, 9:37 a.m. UTC
Free skb->cb usage in core driver and let device drivers decide to
use or not. The reason having a DSA_SKB_CB(skb)->clone was because
dsa_skb_tx_timestamp() which may set the clone pointer was called
before p->xmit() which would use the clone if any, and the device
driver has no way to initialize the clone pointer.

Although for now putting memset(skb->cb, 0, 48) at beginning of
dsa_slave_xmit() by this patch is not very good, there is still way
to improve this. Otherwise, some other new features, like one-step
timestamp which needs a flag of skb marked in dsa_skb_tx_timestamp(),
and handles as one-step timestamp in p->xmit() will face same
situation.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
Changes for v2:
	- Added this patch.
---
 drivers/net/dsa/ocelot/felix.c         |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c |  2 +-
 drivers/net/dsa/sja1105/sja1105_ptp.c  |  4 +++-
 drivers/net/ethernet/mscc/ocelot.c     |  6 +++---
 drivers/net/ethernet/mscc/ocelot_net.c |  2 +-
 include/linux/dsa/sja1105.h            |  3 ++-
 include/net/dsa.h                      | 14 --------------
 include/soc/mscc/ocelot.h              |  8 ++++++++
 net/dsa/slave.c                        |  3 +--
 net/dsa/tag_ocelot.c                   |  8 ++++----
 net/dsa/tag_ocelot_8021q.c             |  8 ++++----
 11 files changed, 28 insertions(+), 31 deletions(-)

Comments

Vladimir Oltean April 26, 2021, 6:39 p.m. UTC | #1
On Mon, Apr 26, 2021 at 06:38:46AM -0700, Richard Cochran wrote:
> On Mon, Apr 26, 2021 at 05:37:58PM +0800, Yangbo Lu wrote:
> > @@ -624,7 +623,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
> >  
> >  	dev_sw_netstats_tx_add(dev, 1, skb->len);
> >  
> > -	DSA_SKB_CB(skb)->clone = NULL;
> > +	memset(skb->cb, 0, 48);
> 
> Replace hard coded 48 with sizeof() please.

You mean just a trivial change like this, right?

	memset(skb->cb, 0, sizeof(skb->cb));

And not what I had suggested in v1, which would have looked something
like this:

-----------------------------[cut here]-----------------------------
diff --git a/include/net/dsa.h b/include/net/dsa.h
index e1a2610a0e06..c75b249e846f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -92,6 +92,7 @@ struct dsa_device_ops {
 	 */
 	bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
 	unsigned int overhead;
+	unsigned int skb_cb_size;
 	const char *name;
 	enum dsa_tag_protocol proto;
 	/* Some tagging protocols either mangle or shift the destination MAC
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2033d8bac23d..2230596b48b7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -610,11 +610,14 @@ static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev)
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
+	const struct dsa_device_ops *tag_ops;
 	struct sk_buff *nskb;
 
 	dev_sw_netstats_tx_add(dev, 1, skb->len);
 
-	memset(skb->cb, 0, 48);
+	tag_ops = p->dp->cpu_dp->tag_ops;
+	if (tag_ops->skb_cb_size)
+		memset(skb->cb, 0, tag_ops->skb_cb_size);
 
 	/* Handle tx timestamp if any */
 	dsa_skb_tx_timestamp(p, skb);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 50496013cdb7..1b337fa104dc 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -365,6 +365,7 @@ static const struct dsa_device_ops sja1105_netdev_ops = {
 	.overhead = VLAN_HLEN,
 	.flow_dissect = sja1105_flow_dissect,
 	.promisc_on_master = true,
+	.skb_cb_size = sizeof(struct sja1105_skb_cb),
 };
 
 MODULE_LICENSE("GPL v2");
-----------------------------[cut here]-----------------------------

I wanted to see how badly impacted would the performance be, so I
created an IPv4 forwarding setup on the NXP LS1021A-TSN board (gianfar +
sja1105):

#!/bin/bash

ETH0=swp3
ETH1=swp2

systemctl stop ptp4l # runs a BPF classifier on every packet
systemctl stop phc2sys

echo 1 > /proc/sys/net/ipv4/ip_forward
ip addr flush $ETH0 && ip addr add 192.168.100.1/24 dev $ETH0 && ip link set $ETH0 up
ip addr flush $ETH1 && ip addr add 192.168.200.1/24 dev $ETH1 && ip link set $ETH1 up

arp -s 192.168.100.2 00:04:9f:06:00:09 dev $ETH0
arp -s 192.168.200.2 00:04:9f:06:00:0a dev $ETH1

ethtool --config-nfc eth2 flow-type ether dst 00:1f:7b:63:01:d4 m ff:ff:ff:ff:ff:ff action 0

and I got the following results on 1 CPU, 64B UDP packets (yes, I know
the baseline results suck, I haven't investigated why that is, but
nonetheless, it should still be relevant as far as comparative results
go):

Unpatched net-next:
proto 17:      65695 pkt/s
proto 17:      65725 pkt/s
proto 17:      65732 pkt/s
proto 17:      65720 pkt/s
proto 17:      65695 pkt/s
proto 17:      65725 pkt/s
proto 17:      65732 pkt/s
proto 17:      65720 pkt/s


After patch 1:
proto 17:      72679 pkt/s
proto 17:      72677 pkt/s
proto 17:      72669 pkt/s
proto 17:      72707 pkt/s
proto 17:      72696 pkt/s
proto 17:      72699 pkt/s

After patch 2:
proto 17:      72292 pkt/s
proto 17:      72425 pkt/s
proto 17:      72485 pkt/s
proto 17:      72478 pkt/s

After patch 4 (as 3 doesn't build):
proto 17:      72437 pkt/s
proto 17:      72510 pkt/s
proto 17:      72479 pkt/s
proto 17:      72499 pkt/s
proto 17:      72497 pkt/s
proto 17:      72427 pkt/s

With the change I pasted above:
proto 17:      71891 pkt/s
proto 17:      71810 pkt/s
proto 17:      71850 pkt/s
proto 17:      71826 pkt/s
proto 17:      71798 pkt/s
proto 17:      71786 pkt/s
proto 17:      71814 pkt/s
proto 17:      71814 pkt/s
proto 17:      72010 pkt/s

So basically, not only are we better off just zero-initializing the
complete skb->cb instead of looking up the tagger's skb_cb_size, but
zero-initializing the skb->cb isn't even all that bad. Yangbo's change
is an overall win anyway, all things considered. So just change the
memset as Richard suggested, make sure all patches compile, and we
should be good to go.
Y.b. Lu April 27, 2021, 4:13 a.m. UTC | #2
> -----Original Message-----

> From: Vladimir Oltean <olteanv@gmail.com>

> Sent: 2021年4月27日 2:17

> To: Y.b. Lu <yangbo.lu@nxp.com>

> Cc: netdev@vger.kernel.org; Richard Cochran <richardcochran@gmail.com>;

> Vladimir Oltean <vladimir.oltean@nxp.com>; David S . Miller

> <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>; Jonathan Corbet

> <corbet@lwn.net>; Kurt Kanzenbach <kurt@linutronix.de>; Andrew Lunn

> <andrew@lunn.ch>; Vivien Didelot <vivien.didelot@gmail.com>; Florian

> Fainelli <f.fainelli@gmail.com>; Claudiu Manoil <claudiu.manoil@nxp.com>;

> Alexandre Belloni <alexandre.belloni@bootlin.com>;

> UNGLinuxDriver@microchip.com; linux-doc@vger.kernel.org;

> linux-kernel@vger.kernel.org

> Subject: Re: [net-next, v2, 3/7] net: dsa: free skb->cb usage in core driver

> 

> On Mon, Apr 26, 2021 at 05:37:58PM +0800, Yangbo Lu wrote:

> > Free skb->cb usage in core driver and let device drivers decide to use

> > or not. The reason having a DSA_SKB_CB(skb)->clone was because

> > dsa_skb_tx_timestamp() which may set the clone pointer was called

> > before p->xmit() which would use the clone if any, and the device

> > driver has no way to initialize the clone pointer.

> >

> > Although for now putting memset(skb->cb, 0, 48) at beginning of

> > dsa_slave_xmit() by this patch is not very good, there is still way to

> > improve this. Otherwise, some other new features, like one-step

> > timestamp which needs a flag of skb marked in dsa_skb_tx_timestamp(),

> > and handles as one-step timestamp in p->xmit() will face same

> > situation.

> >

> > Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>

> > ---

> > Changes for v2:

> > 	- Added this patch.

> > ---

> >  drivers/net/dsa/ocelot/felix.c         |  1 +

> >  drivers/net/dsa/sja1105/sja1105_main.c |  2 +-

> > drivers/net/dsa/sja1105/sja1105_ptp.c  |  4 +++-

> >  drivers/net/ethernet/mscc/ocelot.c     |  6 +++---

> >  drivers/net/ethernet/mscc/ocelot_net.c |  2 +-

> >  include/linux/dsa/sja1105.h            |  3 ++-

> >  include/net/dsa.h                      | 14 --------------

> >  include/soc/mscc/ocelot.h              |  8 ++++++++

> >  net/dsa/slave.c                        |  3 +--

> >  net/dsa/tag_ocelot.c                   |  8 ++++----

> >  net/dsa/tag_ocelot_8021q.c             |  8 ++++----

> >  11 files changed, 28 insertions(+), 31 deletions(-)

> >

> > diff --git a/drivers/net/dsa/ocelot/felix.c

> > b/drivers/net/dsa/ocelot/felix.c index d679f023dc00..8980d56ee793

> > 100644

> > --- a/drivers/net/dsa/ocelot/felix.c

> > +++ b/drivers/net/dsa/ocelot/felix.c

> > @@ -1403,6 +1403,7 @@ static bool felix_txtstamp(struct dsa_switch

> > *ds, int port,

> >

> >  	if (ocelot->ptp && ocelot_port->ptp_cmd ==

> IFH_REW_OP_TWO_STEP_PTP) {

> >  		ocelot_port_add_txtstamp_skb(ocelot, port, clone);

> > +		OCELOT_SKB_CB(skb)->clone = clone;

> >  		return true;

> >  	}

> >

> 

> Uh-oh, this patch fails to build:

> 

> In file included from ./include/soc/mscc/ocelot_vcap.h:9:0,

>                  from drivers/net/dsa/ocelot/felix.c:9:

> drivers/net/dsa/ocelot/felix.c: In function ‘felix_txtstamp’:

> drivers/net/dsa/ocelot/felix.c:1406:17: error: ‘skb’ undeclared (first use in

> this function)

>    OCELOT_SKB_CB(skb)->clone = clone;

>                  ^

> ./include/soc/mscc/ocelot.h:698:29: note: in definition of macro

> ‘OCELOT_SKB_CB’

>   ((struct ocelot_skb_cb *)((skb)->cb))

>                              ^~~

> drivers/net/dsa/ocelot/felix.c:1406:17: note: each undeclared identifier is

> reported only once for each function it appears in

>    OCELOT_SKB_CB(skb)->clone = clone;

>                  ^

> ./include/soc/mscc/ocelot.h:698:29: note: in definition of macro

> ‘OCELOT_SKB_CB’

>   ((struct ocelot_skb_cb *)((skb)->cb))

>                              ^~~

> 

> It depends on changes made in patch 3.


Oh.. That's a sequence problem.
I switched patch #3 and patch #4 with rebasing to fix up in v3 version.
Thanks.
Y.b. Lu April 27, 2021, 4:25 a.m. UTC | #3
> -----Original Message-----

> From: Vladimir Oltean <olteanv@gmail.com>

> Sent: 2021年4月27日 2:40

> To: Richard Cochran <richardcochran@gmail.com>

> Cc: Y.b. Lu <yangbo.lu@nxp.com>; netdev@vger.kernel.org; Vladimir Oltean

> <vladimir.oltean@nxp.com>; David S . Miller <davem@davemloft.net>; Jakub

> Kicinski <kuba@kernel.org>; Jonathan Corbet <corbet@lwn.net>; Kurt

> Kanzenbach <kurt@linutronix.de>; Andrew Lunn <andrew@lunn.ch>; Vivien

> Didelot <vivien.didelot@gmail.com>; Florian Fainelli <f.fainelli@gmail.com>;

> Claudiu Manoil <claudiu.manoil@nxp.com>; Alexandre Belloni

> <alexandre.belloni@bootlin.com>; UNGLinuxDriver@microchip.com;

> linux-doc@vger.kernel.org; linux-kernel@vger.kernel.org

> Subject: Re: [net-next, v2, 3/7] net: dsa: free skb->cb usage in core driver

> 

> On Mon, Apr 26, 2021 at 06:38:46AM -0700, Richard Cochran wrote:

> > On Mon, Apr 26, 2021 at 05:37:58PM +0800, Yangbo Lu wrote:

> > > @@ -624,7 +623,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff

> > > *skb, struct net_device *dev)

> > >

> > >  	dev_sw_netstats_tx_add(dev, 1, skb->len);

> > >

> > > -	DSA_SKB_CB(skb)->clone = NULL;

> > > +	memset(skb->cb, 0, 48);

> >

> > Replace hard coded 48 with sizeof() please.

> 

> You mean just a trivial change like this, right?

> 

> 	memset(skb->cb, 0, sizeof(skb->cb));

> 

> And not what I had suggested in v1, which would have looked something like

> this:

> 

> -----------------------------[cut here]-----------------------------

> diff --git a/include/net/dsa.h b/include/net/dsa.h index

> e1a2610a0e06..c75b249e846f 100644

> --- a/include/net/dsa.h

> +++ b/include/net/dsa.h

> @@ -92,6 +92,7 @@ struct dsa_device_ops {

>  	 */

>  	bool (*filter)(const struct sk_buff *skb, struct net_device *dev);

>  	unsigned int overhead;

> +	unsigned int skb_cb_size;

>  	const char *name;

>  	enum dsa_tag_protocol proto;

>  	/* Some tagging protocols either mangle or shift the destination MAC diff

> --git a/net/dsa/slave.c b/net/dsa/slave.c index 2033d8bac23d..2230596b48b7

> 100644

> --- a/net/dsa/slave.c

> +++ b/net/dsa/slave.c

> @@ -610,11 +610,14 @@ static int dsa_realloc_skb(struct sk_buff *skb, struct

> net_device *dev)  static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb,

> struct net_device *dev)  {

>  	struct dsa_slave_priv *p = netdev_priv(dev);

> +	const struct dsa_device_ops *tag_ops;

>  	struct sk_buff *nskb;

> 

>  	dev_sw_netstats_tx_add(dev, 1, skb->len);

> 

> -	memset(skb->cb, 0, 48);

> +	tag_ops = p->dp->cpu_dp->tag_ops;

> +	if (tag_ops->skb_cb_size)

> +		memset(skb->cb, 0, tag_ops->skb_cb_size);

> 

>  	/* Handle tx timestamp if any */

>  	dsa_skb_tx_timestamp(p, skb);

> diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index

> 50496013cdb7..1b337fa104dc 100644

> --- a/net/dsa/tag_sja1105.c

> +++ b/net/dsa/tag_sja1105.c

> @@ -365,6 +365,7 @@ static const struct dsa_device_ops

> sja1105_netdev_ops = {

>  	.overhead = VLAN_HLEN,

>  	.flow_dissect = sja1105_flow_dissect,

>  	.promisc_on_master = true,

> +	.skb_cb_size = sizeof(struct sja1105_skb_cb),

>  };

> 

>  MODULE_LICENSE("GPL v2");

> -----------------------------[cut here]-----------------------------

> 

> I wanted to see how badly impacted would the performance be, so I created

> an IPv4 forwarding setup on the NXP LS1021A-TSN board (gianfar +

> sja1105):

> 

> #!/bin/bash

> 

> ETH0=swp3

> ETH1=swp2

> 

> systemctl stop ptp4l # runs a BPF classifier on every packet systemctl stop

> phc2sys

> 

> echo 1 > /proc/sys/net/ipv4/ip_forward

> ip addr flush $ETH0 && ip addr add 192.168.100.1/24 dev $ETH0 && ip link

> set $ETH0 up ip addr flush $ETH1 && ip addr add 192.168.200.1/24 dev $ETH1

> && ip link set $ETH1 up

> 

> arp -s 192.168.100.2 00:04:9f:06:00:09 dev $ETH0 arp -s 192.168.200.2

> 00:04:9f:06:00:0a dev $ETH1

> 

> ethtool --config-nfc eth2 flow-type ether dst 00:1f:7b:63:01:d4 m ff:ff:ff:ff:ff:ff

> action 0

> 

> and I got the following results on 1 CPU, 64B UDP packets (yes, I know the

> baseline results suck, I haven't investigated why that is, but nonetheless, it

> should still be relevant as far as comparative results

> go):

> 

> Unpatched net-next:

> proto 17:      65695 pkt/s

> proto 17:      65725 pkt/s

> proto 17:      65732 pkt/s

> proto 17:      65720 pkt/s

> proto 17:      65695 pkt/s

> proto 17:      65725 pkt/s

> proto 17:      65732 pkt/s

> proto 17:      65720 pkt/s

> 

> 

> After patch 1:

> proto 17:      72679 pkt/s

> proto 17:      72677 pkt/s

> proto 17:      72669 pkt/s

> proto 17:      72707 pkt/s

> proto 17:      72696 pkt/s

> proto 17:      72699 pkt/s

> 

> After patch 2:

> proto 17:      72292 pkt/s

> proto 17:      72425 pkt/s

> proto 17:      72485 pkt/s

> proto 17:      72478 pkt/s

> 

> After patch 4 (as 3 doesn't build):

> proto 17:      72437 pkt/s

> proto 17:      72510 pkt/s

> proto 17:      72479 pkt/s

> proto 17:      72499 pkt/s

> proto 17:      72497 pkt/s

> proto 17:      72427 pkt/s

> 

> With the change I pasted above:

> proto 17:      71891 pkt/s

> proto 17:      71810 pkt/s

> proto 17:      71850 pkt/s

> proto 17:      71826 pkt/s

> proto 17:      71798 pkt/s

> proto 17:      71786 pkt/s

> proto 17:      71814 pkt/s

> proto 17:      71814 pkt/s

> proto 17:      72010 pkt/s

> 

> So basically, not only are we better off just zero-initializing the complete

> skb->cb instead of looking up the tagger's skb_cb_size, but zero-initializing the

> skb->cb isn't even all that bad. Yangbo's change is an overall win anyway, all

> things considered. So just change the memset as Richard suggested, make sure

> all patches compile, and we should be good to go.


Ah... I had thought 48bytes memset was acceptable for now by you.
I actually didn't consider the performance affected by this. I just did this because I believed the direction was right:)
That's really good testing. Thank you very much, Vladimir.
With the data, we can feel free to use the changes.
Y.b. Lu April 27, 2021, 4:26 a.m. UTC | #4
> -----Original Message-----

> From: Richard Cochran <richardcochran@gmail.com>

> Sent: 2021年4月26日 21:39

> To: Y.b. Lu <yangbo.lu@nxp.com>

> Cc: netdev@vger.kernel.org; Vladimir Oltean <vladimir.oltean@nxp.com>;

> David S . Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>;

> Jonathan Corbet <corbet@lwn.net>; Kurt Kanzenbach <kurt@linutronix.de>;

> Andrew Lunn <andrew@lunn.ch>; Vivien Didelot <vivien.didelot@gmail.com>;

> Florian Fainelli <f.fainelli@gmail.com>; Claudiu Manoil

> <claudiu.manoil@nxp.com>; Alexandre Belloni

> <alexandre.belloni@bootlin.com>; UNGLinuxDriver@microchip.com;

> linux-doc@vger.kernel.org; linux-kernel@vger.kernel.org

> Subject: Re: [net-next, v2, 3/7] net: dsa: free skb->cb usage in core driver

> 

> On Mon, Apr 26, 2021 at 05:37:58PM +0800, Yangbo Lu wrote:

> > @@ -624,7 +623,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff

> *skb, struct net_device *dev)

> >

> >  	dev_sw_netstats_tx_add(dev, 1, skb->len);

> >

> > -	DSA_SKB_CB(skb)->clone = NULL;

> > +	memset(skb->cb, 0, 48);

> 

> Replace hard coded 48 with sizeof() please.


Fixed in v3.
Thank you!

> 

> Thanks,

> Richard
Richard Cochran April 27, 2021, 3:56 p.m. UTC | #5
On Mon, Apr 26, 2021 at 09:39:44PM +0300, Vladimir Oltean wrote:
> On Mon, Apr 26, 2021 at 06:38:46AM -0700, Richard Cochran wrote:

> > On Mon, Apr 26, 2021 at 05:37:58PM +0800, Yangbo Lu wrote:

> > > @@ -624,7 +623,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)

> > >  

> > >  	dev_sw_netstats_tx_add(dev, 1, skb->len);

> > >  

> > > -	DSA_SKB_CB(skb)->clone = NULL;

> > > +	memset(skb->cb, 0, 48);

> > 

> > Replace hard coded 48 with sizeof() please.

> 

> You mean just a trivial change like this, right?

> 

> 	memset(skb->cb, 0, sizeof(skb->cb));


Yes.

Thanks,
Richard
Tobias Waldekranz April 28, 2021, 8:29 p.m. UTC | #6
On Mon, Apr 26, 2021 at 17:37, Yangbo Lu <yangbo.lu@nxp.com> wrote:
> Free skb->cb usage in core driver and let device drivers decide to

> use or not. The reason having a DSA_SKB_CB(skb)->clone was because

> dsa_skb_tx_timestamp() which may set the clone pointer was called

> before p->xmit() which would use the clone if any, and the device

> driver has no way to initialize the clone pointer.

>

> Although for now putting memset(skb->cb, 0, 48) at beginning of

> dsa_slave_xmit() by this patch is not very good, there is still way

> to improve this. Otherwise, some other new features, like one-step


Could you please expand on this improvement?

This memset makes it impossible to carry control buffer information from
driver callbacks that run before .ndo_start_xmit, for example
.ndo_select_queue, to a tagger's .xmit.

It seems to me that if the drivers are to handle the CB internally from
now on, that should go for both setting and clearing of the required
fields.

> timestamp which needs a flag of skb marked in dsa_skb_tx_timestamp(),

> and handles as one-step timestamp in p->xmit() will face same

> situation.

>

> Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>

> ---

> Changes for v2:

> 	- Added this patch.

> ---

>  drivers/net/dsa/ocelot/felix.c         |  1 +

>  drivers/net/dsa/sja1105/sja1105_main.c |  2 +-

>  drivers/net/dsa/sja1105/sja1105_ptp.c  |  4 +++-

>  drivers/net/ethernet/mscc/ocelot.c     |  6 +++---

>  drivers/net/ethernet/mscc/ocelot_net.c |  2 +-

>  include/linux/dsa/sja1105.h            |  3 ++-

>  include/net/dsa.h                      | 14 --------------

>  include/soc/mscc/ocelot.h              |  8 ++++++++

>  net/dsa/slave.c                        |  3 +--

>  net/dsa/tag_ocelot.c                   |  8 ++++----

>  net/dsa/tag_ocelot_8021q.c             |  8 ++++----

>  11 files changed, 28 insertions(+), 31 deletions(-)

>

> diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c

> index d679f023dc00..8980d56ee793 100644

> --- a/drivers/net/dsa/ocelot/felix.c

> +++ b/drivers/net/dsa/ocelot/felix.c

> @@ -1403,6 +1403,7 @@ static bool felix_txtstamp(struct dsa_switch *ds, int port,

>  

>  	if (ocelot->ptp && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {

>  		ocelot_port_add_txtstamp_skb(ocelot, port, clone);

> +		OCELOT_SKB_CB(skb)->clone = clone;

>  		return true;

>  	}

>  

> diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c

> index d9c198ca0197..405024b637d6 100644

> --- a/drivers/net/dsa/sja1105/sja1105_main.c

> +++ b/drivers/net/dsa/sja1105/sja1105_main.c

> @@ -3137,7 +3137,7 @@ static void sja1105_port_deferred_xmit(struct kthread_work *work)

>  	struct sk_buff *skb;

>  

>  	while ((skb = skb_dequeue(&sp->xmit_queue)) != NULL) {

> -		struct sk_buff *clone = DSA_SKB_CB(skb)->clone;

> +		struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;

>  

>  		mutex_lock(&priv->mgmt_lock);

>  

> diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c

> index 72d052de82d8..0832368aaa96 100644

> --- a/drivers/net/dsa/sja1105/sja1105_ptp.c

> +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c

> @@ -432,7 +432,7 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,

>  }

>  

>  /* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone

> - * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit

> + * the skb and have it available in SJA1105_SKB_CB in the .port_deferred_xmit

>   * callback, where we will timestamp it synchronously.

>   */

>  bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)

> @@ -443,6 +443,8 @@ bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)

>  	if (!sp->hwts_tx_en)

>  		return false;

>  

> +	SJA1105_SKB_CB(skb)->clone = clone;

> +

>  	return true;

>  }

>  

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index 8d06ffaf318a..7da2dd1632b1 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -538,8 +538,8 @@ void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,

>  	spin_lock(&ocelot_port->ts_id_lock);

>  

>  	skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;

> -	/* Store timestamp ID in cb[0] of sk_buff */

> -	clone->cb[0] = ocelot_port->ts_id;

> +	/* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */

> +	OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id;

>  	ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;

>  	skb_queue_tail(&ocelot_port->tx_skbs, clone);

>  

> @@ -604,7 +604,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot)

>  		spin_lock_irqsave(&port->tx_skbs.lock, flags);

>  

>  		skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {

> -			if (skb->cb[0] != id)

> +			if (OCELOT_SKB_CB(skb)->ts_id != id)

>  				continue;

>  			__skb_unlink(skb, &port->tx_skbs);

>  			skb_match = skb;

> diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c

> index 36f32a4d9b0f..789a5fba146c 100644

> --- a/drivers/net/ethernet/mscc/ocelot_net.c

> +++ b/drivers/net/ethernet/mscc/ocelot_net.c

> @@ -520,7 +520,7 @@ static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)

>  

>  			ocelot_port_add_txtstamp_skb(ocelot, port, clone);

>  

> -			rew_op |= clone->cb[0] << 3;

> +			rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;

>  		}

>  	}

>  

> diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h

> index dd93735ae228..1eb84562b311 100644

> --- a/include/linux/dsa/sja1105.h

> +++ b/include/linux/dsa/sja1105.h

> @@ -47,11 +47,12 @@ struct sja1105_tagger_data {

>  };

>  

>  struct sja1105_skb_cb {

> +	struct sk_buff *clone;

>  	u32 meta_tstamp;

>  };

>  

>  #define SJA1105_SKB_CB(skb) \

> -	((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))

> +	((struct sja1105_skb_cb *)((skb)->cb))

>  

>  struct sja1105_port {

>  	u16 subvlan_map[DSA_8021Q_N_SUBVLAN];

> diff --git a/include/net/dsa.h b/include/net/dsa.h

> index 905066055b08..5685e60cb082 100644

> --- a/include/net/dsa.h

> +++ b/include/net/dsa.h

> @@ -117,20 +117,6 @@ struct dsa_netdevice_ops {

>  #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto)				\

>  	MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE))

>  

> -struct dsa_skb_cb {

> -	struct sk_buff *clone;

> -};

> -

> -struct __dsa_skb_cb {

> -	struct dsa_skb_cb cb;

> -	u8 priv[48 - sizeof(struct dsa_skb_cb)];

> -};

> -

> -#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb))

> -

> -#define DSA_SKB_CB_PRIV(skb)			\

> -	((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv))

> -

>  struct dsa_switch_tree {

>  	struct list_head	list;

>  

> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h

> index 68cdc7ceaf4d..f075aaf70eee 100644

> --- a/include/soc/mscc/ocelot.h

> +++ b/include/soc/mscc/ocelot.h

> @@ -689,6 +689,14 @@ struct ocelot_policer {

>  	u32 burst; /* bytes */

>  };

>  

> +struct ocelot_skb_cb {

> +	struct sk_buff *clone;

> +	u8 ts_id;

> +};

> +

> +#define OCELOT_SKB_CB(skb) \

> +	((struct ocelot_skb_cb *)((skb)->cb))

> +

>  #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))

>  #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))

>  #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))

> diff --git a/net/dsa/slave.c b/net/dsa/slave.c

> index acaa52e60d7f..2211894c2381 100644

> --- a/net/dsa/slave.c

> +++ b/net/dsa/slave.c

> @@ -568,7 +568,6 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,

>  		return;

>  

>  	if (ds->ops->port_txtstamp(ds, p->dp->index, clone)) {

> -		DSA_SKB_CB(skb)->clone = clone;

>  		return;

>  	}

>  

> @@ -624,7 +623,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)

>  

>  	dev_sw_netstats_tx_add(dev, 1, skb->len);

>  

> -	DSA_SKB_CB(skb)->clone = NULL;

> +	memset(skb->cb, 0, 48);

>  

>  	/* Handle tx timestamp if any */

>  	dsa_skb_tx_timestamp(p, skb);

> diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c

> index f9df9cac81c5..1100a16f1032 100644

> --- a/net/dsa/tag_ocelot.c

> +++ b/net/dsa/tag_ocelot.c

> @@ -15,11 +15,11 @@ static void ocelot_xmit_ptp(struct dsa_port *dp, void *injection,

>  	ocelot_port = ocelot->ports[dp->index];

>  	rew_op = ocelot_port->ptp_cmd;

>  

> -	/* Retrieve timestamp ID populated inside skb->cb[0] of the

> -	 * clone by ocelot_port_add_txtstamp_skb

> +	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id

> +	 * by ocelot_port_add_txtstamp_skb

>  	 */

>  	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)

> -		rew_op |= clone->cb[0] << 3;

> +		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;

>  

>  	ocelot_ifh_set_rew_op(injection, rew_op);

>  }

> @@ -28,7 +28,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,

>  			       __be32 ifh_prefix, void **ifh)

>  {

>  	struct dsa_port *dp = dsa_slave_to_port(netdev);

> -	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;

> +	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;

>  	struct dsa_switch *ds = dp->ds;

>  	void *injection;

>  	__be32 *prefix;

> diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c

> index 5f3e8e124a82..a001a7e3f575 100644

> --- a/net/dsa/tag_ocelot_8021q.c

> +++ b/net/dsa/tag_ocelot_8021q.c

> @@ -28,11 +28,11 @@ static struct sk_buff *ocelot_xmit_ptp(struct dsa_port *dp,

>  	ocelot_port = ocelot->ports[port];

>  	rew_op = ocelot_port->ptp_cmd;

>  

> -	/* Retrieve timestamp ID populated inside skb->cb[0] of the

> -	 * clone by ocelot_port_add_txtstamp_skb

> +	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id

> +	 * by ocelot_port_add_txtstamp_skb

>  	 */

>  	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)

> -		rew_op |= clone->cb[0] << 3;

> +		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;

>  

>  	ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);

>  

> @@ -46,7 +46,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,

>  	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);

>  	u16 queue_mapping = skb_get_queue_mapping(skb);

>  	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);

> -	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;

> +	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;

>  

>  	/* TX timestamping was requested, so inject through MMIO */

>  	if (clone)

> -- 

> 2.25.1
Y.b. Lu May 7, 2021, 11:26 a.m. UTC | #7
> -----Original Message-----

> From: Tobias Waldekranz <tobias@waldekranz.com>

> Sent: 2021年4月29日 4:30

> To: Y.b. Lu <yangbo.lu@nxp.com>; netdev@vger.kernel.org

> Cc: Y.b. Lu <yangbo.lu@nxp.com>; Richard Cochran

> <richardcochran@gmail.com>; Vladimir Oltean <vladimir.oltean@nxp.com>;

> David S . Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>;

> Jonathan Corbet <corbet@lwn.net>; Kurt Kanzenbach <kurt@linutronix.de>;

> Andrew Lunn <andrew@lunn.ch>; Vivien Didelot <vivien.didelot@gmail.com>;

> Florian Fainelli <f.fainelli@gmail.com>; Claudiu Manoil

> <claudiu.manoil@nxp.com>; Alexandre Belloni

> <alexandre.belloni@bootlin.com>; UNGLinuxDriver@microchip.com;

> linux-doc@vger.kernel.org; linux-kernel@vger.kernel.org

> Subject: Re: [net-next, v2, 3/7] net: dsa: free skb->cb usage in core driver

> 

> On Mon, Apr 26, 2021 at 17:37, Yangbo Lu <yangbo.lu@nxp.com> wrote:

> > Free skb->cb usage in core driver and let device drivers decide to use

> > or not. The reason having a DSA_SKB_CB(skb)->clone was because

> > dsa_skb_tx_timestamp() which may set the clone pointer was called

> > before p->xmit() which would use the clone if any, and the device

> > driver has no way to initialize the clone pointer.

> >

> > Although for now putting memset(skb->cb, 0, 48) at beginning of

> > dsa_slave_xmit() by this patch is not very good, there is still way to

> > improve this. Otherwise, some other new features, like one-step

> 

> Could you please expand on this improvement?

> 

> This memset makes it impossible to carry control buffer information from

> driver callbacks that run before .ndo_start_xmit, for

> example .ndo_select_queue, to a tagger's .xmit.

> 

> It seems to me that if the drivers are to handle the CB internally from now on,

> that should go for both setting and clearing of the required fields.

> 


For the timestamping case, dsa_skb_tx_timestamp may not touch .port_txtstamp callback, so we had to put skb->cb initialization at beginning of dsa_slave_xmit.
To avoid breaking future skb->cb usage you mentioned, do you think we can back to Vladimir's idea initializing only field required, or even just add a callback for cb initialization for timestamping?

Thanks.


> > timestamp which needs a flag of skb marked in dsa_skb_tx_timestamp(),

> > and handles as one-step timestamp in p->xmit() will face same

> > situation.

> >

> > Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Vladimir Oltean May 7, 2021, 11:48 a.m. UTC | #8
On Fri, May 07, 2021 at 11:26:32AM +0000, Y.b. Lu wrote:
> > From: Tobias Waldekranz <tobias@waldekranz.com>

> > On Mon, Apr 26, 2021 at 17:37, Yangbo Lu <yangbo.lu@nxp.com> wrote:

> > > Free skb->cb usage in core driver and let device drivers decide to use

> > > or not. The reason having a DSA_SKB_CB(skb)->clone was because

> > > dsa_skb_tx_timestamp() which may set the clone pointer was called

> > > before p->xmit() which would use the clone if any, and the device

> > > driver has no way to initialize the clone pointer.

> > >

> > > Although for now putting memset(skb->cb, 0, 48) at beginning of

> > > dsa_slave_xmit() by this patch is not very good, there is still way to

> > > improve this. Otherwise, some other new features, like one-step

> >

> > Could you please expand on this improvement?

> >

> > This memset makes it impossible to carry control buffer information from

> > driver callbacks that run before .ndo_start_xmit, for

> > example .ndo_select_queue, to a tagger's .xmit.

> >

> > It seems to me that if the drivers are to handle the CB internally from now on,

> > that should go for both setting and clearing of the required fields.

>

> For the timestamping case, dsa_skb_tx_timestamp may not touch

> .port_txtstamp callback, so we had to put skb->cb initialization at

> beginning of dsa_slave_xmit.

> To avoid breaking future skb->cb usage you mentioned, do you think we

> can back to Vladimir's idea initializing only field required, or even

> just add a callback for cb initialization for timestamping?


I would like to avoid overengineering, which a callback for skb->cb
initialization would introduce, given the needs we have now.

FWIW, we may not even be able to touch skb->cb in .ndo_select_queue for
Tobias's use case, that discussion is here:
https://patchwork.kernel.org/project/netdevbpf/patch/20210426170411.1789186-7-tobias@waldekranz.com/
diff mbox series

Patch

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index d679f023dc00..8980d56ee793 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1403,6 +1403,7 @@  static bool felix_txtstamp(struct dsa_switch *ds, int port,
 
 	if (ocelot->ptp && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
 		ocelot_port_add_txtstamp_skb(ocelot, port, clone);
+		OCELOT_SKB_CB(skb)->clone = clone;
 		return true;
 	}
 
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index d9c198ca0197..405024b637d6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3137,7 +3137,7 @@  static void sja1105_port_deferred_xmit(struct kthread_work *work)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&sp->xmit_queue)) != NULL) {
-		struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+		struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
 
 		mutex_lock(&priv->mgmt_lock);
 
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 72d052de82d8..0832368aaa96 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -432,7 +432,7 @@  bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 }
 
 /* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone
- * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit
+ * the skb and have it available in SJA1105_SKB_CB in the .port_deferred_xmit
  * callback, where we will timestamp it synchronously.
  */
 bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
@@ -443,6 +443,8 @@  bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 	if (!sp->hwts_tx_en)
 		return false;
 
+	SJA1105_SKB_CB(skb)->clone = clone;
+
 	return true;
 }
 
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 8d06ffaf318a..7da2dd1632b1 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -538,8 +538,8 @@  void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
 	spin_lock(&ocelot_port->ts_id_lock);
 
 	skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
-	/* Store timestamp ID in cb[0] of sk_buff */
-	clone->cb[0] = ocelot_port->ts_id;
+	/* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */
+	OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id;
 	ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
 	skb_queue_tail(&ocelot_port->tx_skbs, clone);
 
@@ -604,7 +604,7 @@  void ocelot_get_txtstamp(struct ocelot *ocelot)
 		spin_lock_irqsave(&port->tx_skbs.lock, flags);
 
 		skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
-			if (skb->cb[0] != id)
+			if (OCELOT_SKB_CB(skb)->ts_id != id)
 				continue;
 			__skb_unlink(skb, &port->tx_skbs);
 			skb_match = skb;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 36f32a4d9b0f..789a5fba146c 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -520,7 +520,7 @@  static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 
 			ocelot_port_add_txtstamp_skb(ocelot, port, clone);
 
-			rew_op |= clone->cb[0] << 3;
+			rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 		}
 	}
 
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index dd93735ae228..1eb84562b311 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -47,11 +47,12 @@  struct sja1105_tagger_data {
 };
 
 struct sja1105_skb_cb {
+	struct sk_buff *clone;
 	u32 meta_tstamp;
 };
 
 #define SJA1105_SKB_CB(skb) \
-	((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))
+	((struct sja1105_skb_cb *)((skb)->cb))
 
 struct sja1105_port {
 	u16 subvlan_map[DSA_8021Q_N_SUBVLAN];
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 905066055b08..5685e60cb082 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -117,20 +117,6 @@  struct dsa_netdevice_ops {
 #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto)				\
 	MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE))
 
-struct dsa_skb_cb {
-	struct sk_buff *clone;
-};
-
-struct __dsa_skb_cb {
-	struct dsa_skb_cb cb;
-	u8 priv[48 - sizeof(struct dsa_skb_cb)];
-};
-
-#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb))
-
-#define DSA_SKB_CB_PRIV(skb)			\
-	((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv))
-
 struct dsa_switch_tree {
 	struct list_head	list;
 
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 68cdc7ceaf4d..f075aaf70eee 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -689,6 +689,14 @@  struct ocelot_policer {
 	u32 burst; /* bytes */
 };
 
+struct ocelot_skb_cb {
+	struct sk_buff *clone;
+	u8 ts_id;
+};
+
+#define OCELOT_SKB_CB(skb) \
+	((struct ocelot_skb_cb *)((skb)->cb))
+
 #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
 #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
 #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index acaa52e60d7f..2211894c2381 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -568,7 +568,6 @@  static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
 		return;
 
 	if (ds->ops->port_txtstamp(ds, p->dp->index, clone)) {
-		DSA_SKB_CB(skb)->clone = clone;
 		return;
 	}
 
@@ -624,7 +623,7 @@  static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev_sw_netstats_tx_add(dev, 1, skb->len);
 
-	DSA_SKB_CB(skb)->clone = NULL;
+	memset(skb->cb, 0, 48);
 
 	/* Handle tx timestamp if any */
 	dsa_skb_tx_timestamp(p, skb);
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index f9df9cac81c5..1100a16f1032 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -15,11 +15,11 @@  static void ocelot_xmit_ptp(struct dsa_port *dp, void *injection,
 	ocelot_port = ocelot->ports[dp->index];
 	rew_op = ocelot_port->ptp_cmd;
 
-	/* Retrieve timestamp ID populated inside skb->cb[0] of the
-	 * clone by ocelot_port_add_txtstamp_skb
+	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id
+	 * by ocelot_port_add_txtstamp_skb
 	 */
 	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
-		rew_op |= clone->cb[0] << 3;
+		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 
 	ocelot_ifh_set_rew_op(injection, rew_op);
 }
@@ -28,7 +28,7 @@  static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
 			       __be32 ifh_prefix, void **ifh)
 {
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
-	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
 	struct dsa_switch *ds = dp->ds;
 	void *injection;
 	__be32 *prefix;
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 5f3e8e124a82..a001a7e3f575 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -28,11 +28,11 @@  static struct sk_buff *ocelot_xmit_ptp(struct dsa_port *dp,
 	ocelot_port = ocelot->ports[port];
 	rew_op = ocelot_port->ptp_cmd;
 
-	/* Retrieve timestamp ID populated inside skb->cb[0] of the
-	 * clone by ocelot_port_add_txtstamp_skb
+	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id
+	 * by ocelot_port_add_txtstamp_skb
 	 */
 	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
-		rew_op |= clone->cb[0] << 3;
+		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 
 	ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
 
@@ -46,7 +46,7 @@  static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
-	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
 
 	/* TX timestamping was requested, so inject through MMIO */
 	if (clone)