diff mbox series

[iproute2-next] tc/skbmod: Introduce SKBMOD_F_ECN option

Message ID 20210721232053.39077-1-yepeilin.cs@gmail.com
State Superseded
Headers show
Series [iproute2-next] tc/skbmod: Introduce SKBMOD_F_ECN option | expand

Commit Message

Peilin Ye July 21, 2021, 11:20 p.m. UTC
From: Peilin Ye <peilin.ye@bytedance.com>

Recently we added SKBMOD_F_ECN option support to the kernel; support it in
the tc-skbmod(8) front end, and update its man page accordingly.

The 2 least significant bits of the Traffic Class field in IPv4 and IPv6
headers are used to represent different ECN states [1]:

	0b00: "Non ECN-Capable Transport", Non-ECT
	0b10: "ECN Capable Transport", ECT(0)
	0b01: "ECN Capable Transport", ECT(1)
	0b11: "Congestion Encountered", CE

This new option, "ecn", marks ECT(0) and ECT(1) IPv{4,6} packets as CE,
which is useful for ECN-based rate limiting.  For example:

	$ tc filter add dev eth0 parent 1: protocol ip prio 10 \
		u32 match ip protocol 1 0xff flowid 1:2 \
		action skbmod \
		ecn

The updated tc-skbmod SYNOPSIS looks like the following:

	tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ...

Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod
command.  Trying to use more than one of them at a time is considered
undefined behavior; pipe multiple tc-skbmod commands together instead.
"set" and "swap" only affect Ethernet packets, while "ecn" only affects
IP packets.

Depends on kernel patch "net/sched: act_skbmod: Add SKBMOD_F_ECN option
support", as well as iproute2 patch "tc/skbmod: Remove misinformation
about the swap action".

[1] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification

Reviewed-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
---
Hi all,

The corresponding kernel patch is here, which is currently pending
for review:
https://lore.kernel.org/netdev/f5c5a81d6674a8f4838684ac52ed66da83f92499.1626899889.git.peilin.ye@bytedance.com/T/#u

It also depends on this iproute2 patch, which is also pending:
https://lore.kernel.org/netdev/20210720192145.20166-1-yepeilin.cs@gmail.com/

Thanks,
Peilin Ye

 include/uapi/linux/tc_act/tc_skbmod.h |  1 +
 man/man8/tc-skbmod.8                  | 38 ++++++++++++++++++++-------
 tc/m_skbmod.c                         |  8 +++++-
 3 files changed, 37 insertions(+), 10 deletions(-)

Comments

David Ahern Aug. 2, 2021, 4:28 p.m. UTC | #1
On 7/21/21 5:20 PM, Peilin Ye wrote:
> From: Peilin Ye <peilin.ye@bytedance.com>

> 

> Recently we added SKBMOD_F_ECN option support to the kernel; support it in

> the tc-skbmod(8) front end, and update its man page accordingly.

> 

> The 2 least significant bits of the Traffic Class field in IPv4 and IPv6

> headers are used to represent different ECN states [1]:

> 

> 	0b00: "Non ECN-Capable Transport", Non-ECT

> 	0b10: "ECN Capable Transport", ECT(0)

> 	0b01: "ECN Capable Transport", ECT(1)

> 	0b11: "Congestion Encountered", CE

> 

> This new option, "ecn", marks ECT(0) and ECT(1) IPv{4,6} packets as CE,

> which is useful for ECN-based rate limiting.  For example:

> 

> 	$ tc filter add dev eth0 parent 1: protocol ip prio 10 \

> 		u32 match ip protocol 1 0xff flowid 1:2 \

> 		action skbmod \

> 		ecn

> 

> The updated tc-skbmod SYNOPSIS looks like the following:

> 

> 	tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ...

> 

> Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod

> command.  Trying to use more than one of them at a time is considered

> undefined behavior; pipe multiple tc-skbmod commands together instead.

> "set" and "swap" only affect Ethernet packets, while "ecn" only affects

> IP packets.

> 

> Depends on kernel patch "net/sched: act_skbmod: Add SKBMOD_F_ECN option

> support", as well as iproute2 patch "tc/skbmod: Remove misinformation

> about the swap action".

> 

> [1] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification

> 

> Reviewed-by: Cong Wang <cong.wang@bytedance.com>

> Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>

> ---

> Hi all,

> 

> The corresponding kernel patch is here, which is currently pending

> for review:

> https://lore.kernel.org/netdev/f5c5a81d6674a8f4838684ac52ed66da83f92499.1626899889.git.peilin.ye@bytedance.com/T/#u

> 

> It also depends on this iproute2 patch, which is also pending:

> https://lore.kernel.org/netdev/20210720192145.20166-1-yepeilin.cs@gmail.com/

> 

> Thanks,

> Peilin Ye

> 


man page update has conflicts; please rebase.
Thanks,
diff mbox series

Patch

diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
index c525b3503797..af6ef2cfbf3d 100644
--- a/include/uapi/linux/tc_act/tc_skbmod.h
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -17,6 +17,7 @@ 
 #define SKBMOD_F_SMAC	0x2
 #define SKBMOD_F_ETYPE	0x4
 #define SKBMOD_F_SWAPMAC 0x8
+#define SKBMOD_F_ECN	0x10
 
 struct tc_skbmod {
 	tc_gen;
diff --git a/man/man8/tc-skbmod.8 b/man/man8/tc-skbmod.8
index 76512311b17d..52eaf989e80b 100644
--- a/man/man8/tc-skbmod.8
+++ b/man/man8/tc-skbmod.8
@@ -8,7 +8,8 @@  skbmod - user-friendly packet editor action
 .BR tc " ... " "action skbmod " "{ " "set "
 .IR SETTABLE " | "
 .BI swap " SWAPPABLE"
-.RI " } [ " CONTROL " ] [ "
+.RB " | " ecn
+.RI "} [ " CONTROL " ] [ "
 .BI index " INDEX "
 ]
 
@@ -37,6 +38,12 @@  action. Instead of having to manually edit 8-, 16-, or 32-bit chunks of an
 ethernet header,
 .B skbmod
 allows complete substitution of supported elements.
+Action must be one of
+.BR set ", " swap " and " ecn "."
+.BR set " and " swap
+only affect Ethernet packets, while
+.B ecn
+only affects IP packets.
 .SH OPTIONS
 .TP
 .BI dmac " DMAC"
@@ -51,6 +58,10 @@  Change the ethertype to the specified value.
 .BI mac
 Used to swap mac addresses.
 .TP
+.B ecn
+Used to mark ECN Capable Transport (ECT) IP packets as Congestion Encountered (CE).
+Does not affect Non ECN-Capable Transport (Non-ECT) packets.
+.TP
 .I CONTROL
 The following keywords allow to control how the tree of qdisc, classes,
 filters and actions is further traversed after this action.
@@ -115,7 +126,7 @@  tc filter add dev eth5 parent 1: protocol ip prio 10 \\
 .EE
 .RE
 
-Finally, swap the destination and source mac addresses in the header:
+To swap the destination and source mac addresses in the Ethernet header:
 
 .RS
 .EX
@@ -126,13 +137,22 @@  tc filter add dev eth3 parent 1: protocol ip prio 10 \\
 .EE
 .RE
 
-However, trying to
-.B set
-and
-.B swap
-in a single
-.B skbmod
-command will cause undefined behavior.
+Finally, to mark the CE codepoint in the IP header for ECN Capable Transport (ECT) packets:
+
+.RS
+.EX
+tc filter add dev eth0 parent 1: protocol ip prio 10 \\
+	u32 match ip protocol 1 0xff flowid 1:2 \\
+	action skbmod \\
+	ecn
+.EE
+.RE
+
+Only one of
+.BR set ", " swap " and " ecn
+shall be used in a single command.
+Trying to use more than one of them in a single command is considered undefined behavior; pipe
+multiple commands together instead.
 
 .SH SEE ALSO
 .BR tc (8),
diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c
index 3fe30651a7d8..8d8bac5bc481 100644
--- a/tc/m_skbmod.c
+++ b/tc/m_skbmod.c
@@ -28,7 +28,7 @@ 
 static void skbmod_explain(void)
 {
 	fprintf(stderr,
-		"Usage:... skbmod { set <SETTABLE> | swap <SWAPPABLE> } [CONTROL] [index INDEX]\n"
+		"Usage:... skbmod { set <SETTABLE> | swap <SWAPPABLE> | ecn } [CONTROL] [index INDEX]\n"
 		"where SETTABLE is: [dmac DMAC] [smac SMAC] [etype ETYPE]\n"
 		"where SWAPPABLE is: \"mac\" to swap mac addresses\n"
 		"\tDMAC := 6 byte Destination MAC address\n"
@@ -111,6 +111,9 @@  static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p,
 			p.flags |= SKBMOD_F_SMAC;
 			fprintf(stderr, "src MAC address <%s>\n", saddr);
 			ok += 1;
+		} else if (matches(*argv, "ecn") == 0) {
+			p.flags |= SKBMOD_F_ECN;
+			ok += 1;
 		} else if (matches(*argv, "help") == 0) {
 			skbmod_usage();
 		} else {
@@ -211,6 +214,9 @@  static int print_skbmod(struct action_util *au, FILE *f, struct rtattr *arg)
 	if (p->flags & SKBMOD_F_SWAPMAC)
 		fprintf(f, "swap mac ");
 
+	if (p->flags & SKBMOD_F_ECN)
+		fprintf(f, "ecn ");
+
 	fprintf(f, "\n\t index %u ref %d bind %d", p->index, p->refcnt,
 		p->bindcnt);
 	if (show_stats) {