diff mbox series

net: netfilter: Add RFC-7597 Section 5.1 PSID support

Message ID 20210629004819.4750-1-Cole.Dishington@alliedtelesis.co.nz
State New
Headers show
Series net: netfilter: Add RFC-7597 Section 5.1 PSID support | expand

Commit Message

Cole Dishington June 29, 2021, 12:48 a.m. UTC
This adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing. I have also submitted a patch to netfilter iptables for these changes.
    
    Comments:
    Selecting the ports for psid needs to be in nf_nat_core since the PSID ranges are not a single range. e.g. offset=1024, PSID=0, psid_length=8 generates the ranges 1024-1027, 2048-2051, ..., 63488-63491, ... (example taken from RFC7597 B.2).
    This is why it is enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init upper/lower boundaries.
    
    Changes in v2:
    - Moved cached range2 from struct nf_conn to nf_conn_nat.
    - Moved psid fields out of union nf_conntrack_man_proto. Now using range2 fields src, dst, and base to store psid parameters.
    - Readded removed error check for nf_ct_expect_related()
    - Added new version to masquerade iptables extension to use the range2 base field.

 include/net/netfilter/nf_nat.h        |  1 +
 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/nf_nat_core.c           | 69 +++++++++++++++++++++++----
 net/netfilter/nf_nat_ftp.c            | 29 ++++++-----
 net/netfilter/nf_nat_helper.c         | 16 +++++--
 net/netfilter/nf_nat_masquerade.c     | 13 +++--
 net/netfilter/xt_MASQUERADE.c         | 44 +++++++++++++++--
 7 files changed, 140 insertions(+), 35 deletions(-)

Comments

Florian Westphal June 30, 2021, 2:20 p.m. UTC | #1
Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
>     Comments:

>     Selecting the ports for psid needs to be in nf_nat_core since the PSID ranges are not a single range. e.g. offset=1024, PSID=0, psid_length=8 generates the ranges 1024-1027, 2048-2051, ..., 63488-63491, ... (example taken from RFC7597 B.2).

>     This is why it is enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init upper/lower boundaries.


I suspect this misses a NOT.  But current algorithm has problems, see
below.

> +	if (range->flags & NF_NAT_RANGE_PSID) {

> +		/* PSID defines a group of port ranges, per PSID. PSID

> +		 * is already contained in min and max.

> +		 */

> +		unsigned int min_to_max, base;

> +

> +		min = ntohs(range->min_proto.all);

> +		max = ntohs(range->max_proto.all);

> +		base = ntohs(range->base_proto.all);

> +		min_to_max = max - min;

> +		for (; max <= (1 << 16) - 1; min += base, max = min + min_to_max) {

> +			for (off = 0; off <= min_to_max; off++) {

> +				*keyptr = htons(min + off);

> +				if (!nf_nat_used_tuple(tuple, ct))

> +					return;

> +			}

> +		}

> +	}


I fear this searches waaaay to many ports.
We had softlockups in the past because of exhausive searches.

See a504b703bb1da526a01593da0e4be2af9d9f5fa8
("netfilter: nat: limit port clash resolution attempts").

I suggest you try pre-selecting one of the eligible ranges in
nf_nat_masquerade_ipv4 when the 'newrange' is filled in and set
RANGE_PROTO_SPECIFIED.

Maybe even prandom-based preselection is good enough.

>  	/* If no range specified... */

>  	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {

>  		/* If it's dst rewrite, can't change port */

> @@ -529,11 +572,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,

>  

>  	/* Only bother mapping if it's not already in range and unique */

>  	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {

> -		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {

> +		/* PSID mode is present always needs to check

> +		 * to see if the source ports are in range.

> +		 */

> +		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||

> +		    (range->flags & NF_NAT_RANGE_PSID &&


Why the extra check?
Can't you set NF_NAT_RANGE_PROTO_SPECIFIED in case PSID is requested by
userspace?

> diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c

> index aace6768a64e..f65163278db0 100644

> --- a/net/netfilter/nf_nat_ftp.c

> +++ b/net/netfilter/nf_nat_ftp.c

> @@ -17,6 +17,10 @@

>  #include <net/netfilter/nf_conntrack_helper.h>

>  #include <net/netfilter/nf_conntrack_expect.h>

>  #include <linux/netfilter/nf_conntrack_ftp.h>

> +void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,

> +				 const struct nf_nat_range2 *range,

> +				 enum nf_nat_manip_type maniptype,

> +				 const struct nf_conn *ct);

>  

>  #define NAT_HELPER_NAME "ftp"

>  

> @@ -72,8 +76,13 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,

>  	u_int16_t port;

>  	int dir = CTINFO2DIR(ctinfo);

>  	struct nf_conn *ct = exp->master;

> +	struct nf_conn_nat *nat = nfct_nat(ct);

>  	char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];

>  	unsigned int buflen;

> +	int ret;

> +

> +	if (WARN_ON_ONCE(!nat))

> +		return NF_DROP;

>  

>  	pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);

>  

> @@ -86,18 +95,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,

>  	 * this one. */

>  	exp->expectfn = nf_nat_follow_master;

>  

> -	/* Try to get same port: if not, try to change it. */

> -	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {

> -		int ret;

> -

> -		exp->tuple.dst.u.tcp.port = htons(port);

> -		ret = nf_ct_expect_related(exp, 0);

> -		if (ret == 0)

> -			break;

> -		else if (ret != -EBUSY) {

> -			port = 0;

> -			break;

> -		}

> +	/* Find a port that matches the MASQ rule. */

> +	nf_nat_l4proto_unique_tuple(&exp->tuple, nat->range,

> +				    dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,

> +				    ct);


Hmm, I am ingorant on details here, but is this correct?

This could be an inbound connection, rather than outbound.

> diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c

> index a263505455fc..2d105e4eb8f8 100644

> --- a/net/netfilter/nf_nat_helper.c

> +++ b/net/netfilter/nf_nat_helper.c

> @@ -179,15 +179,23 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);

>  void nf_nat_follow_master(struct nf_conn *ct,

>  			  struct nf_conntrack_expect *exp)

>  {

> +	struct nf_conn_nat *nat = NULL;

>  	struct nf_nat_range2 range;

>  

>  	/* This must be a fresh one. */

>  	BUG_ON(ct->status & IPS_NAT_DONE_MASK);

>  

> -	/* Change src to where master sends to */

> -	range.flags = NF_NAT_RANGE_MAP_IPS;

> -	range.min_addr = range.max_addr

> -		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;

> +	if (exp->master && !exp->dir) {

> +		nat = nfct_nat(exp->master);

> +		if (nat)

> +			range = *nat->range;


Can't you store the psid-relevant parts of the range struct only?
Non-PSID doesn't need the original range, so why do you?

> diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c

> index 8e8a65d46345..d83cd3d8ad3f 100644

> --- a/net/netfilter/nf_nat_masquerade.c

> +++ b/net/netfilter/nf_nat_masquerade.c

> @@ -45,10 +45,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,

>  		return NF_DROP;

>  	}

>  

> -	nat = nf_ct_nat_ext_add(ct);

> -	if (nat)

> -		nat->masq_index = out->ifindex;

> -

>  	/* Transfer from original range. */

>  	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));

>  	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));

> @@ -57,6 +53,15 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,

>  	newrange.max_addr.ip = newsrc;

>  	newrange.min_proto   = range->min_proto;

>  	newrange.max_proto   = range->max_proto;

> +	newrange.base_proto  = range->base_proto;

> +

> +	nat = nf_ct_nat_ext_add(ct);

> +	if (nat) {

> +		nat->masq_index = out->ifindex;

> +		if (!nat->range)

> +			nat->range = kmalloc(sizeof(*nat->range), 0);

> +		memcpy(nat->range, &newrange, sizeof(*nat->range));


kmemdup.  Also misses error handling.  Should use GFP_ATOMIC.
Where is this free'd again?

It would be good if you could chop this up in smaller chunks.
A selftest would be nice as well (see tools/testing/selftests/netfilter).
Florian Westphal July 5, 2021, 10:39 a.m. UTC | #2
Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> Adds support for masquerading into a smaller subset of ports -

> defined by the PSID values from RFC-7597 Section 5.1. This is part of

> the support for MAP-E and Lightweight 4over6, which allows multiple

> devices to share an IPv4 address by splitting the L4 port / id into

> ranges.

> 

> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>

> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>

> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>

> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>

> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>

> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>

> ---


Just a quick review:
> +	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */

> +	if (is_psid) {

> +		u16 j = ntohs(max->all) - ntohs(min->all) + 1;

> +		u16 a = (1 << 16) / ntohs(base->all);


This gives crash when base->all is 0.
If this is impossible, please add a comment, otherwise this needs
a sanity test on the divisor.

> @@ -55,8 +55,21 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,

>  	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;

>  	newrange.min_addr.ip = newsrc;

>  	newrange.max_addr.ip = newsrc;

> -	newrange.min_proto   = range->min_proto;

> -	newrange.max_proto   = range->max_proto;

> +

> +	if (range->flags & NF_NAT_RANGE_PSID) {

> +		u16 off = prandom_u32();

> +		u16 base = ntohs(range->base_proto.all);

> +		u16 min =  ntohs(range->min_proto.all);

> +		u16 max_off = ((1 << 16) / base) - 1;

> +

> +		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;

> +		newrange.min_proto.all   = htons(min + base * (off % max_off));


Same here for base and max_off.
Cole Dishington July 16, 2021, 12:27 a.m. UTC | #3
Thanks for your time reviewing!

Changes in v4:
- Handle special case of no offset bits (a=0 / A=2^16).

Cole Dishington (3):
  net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  net: netfilter: Add RFC-7597 Section 5.1 PSID support
  selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests

 include/uapi/linux/netfilter/nf_nat.h         |   3 +-
 net/netfilter/nf_nat_core.c                   |  39 +++-
 net/netfilter/nf_nat_masquerade.c             |  20 +-
 net/netfilter/xt_MASQUERADE.c                 |  44 ++++-
 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 5 files changed, 276 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh
diff mbox series

Patch

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 987111ae5240..67cc033f76bb 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -32,6 +32,7 @@  struct nf_conn_nat {
 	union nf_conntrack_nat_help help;
 #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE)
 	int masq_index;
+	struct nf_nat_range2 *range;
 #endif
 };
 
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@ 
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@ 
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..7307bb28ece2 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,32 @@  static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 offset_mask = 0;
+	u16 psid_mask = 0;
+	u16 psid = 0;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 j = ntohs(max->all) - ntohs(min->all) + 1;
+		u16 a = (1 << 16) / ntohs(base->all);
+
+		offset_mask = (a - 1) * ntohs(base->all);
+		psid_mask = ((ntohs(base->all) / j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return ((ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +234,10 @@  static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return ((ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +262,8 @@  static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -360,10 +384,10 @@  find_best_ips_proto(const struct nf_conntrack_zone *zone,
  *
  * Per-protocol part of tuple is initialized to the incoming packet.
  */
-static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-					const struct nf_nat_range2 *range,
-					enum nf_nat_manip_type maniptype,
-					const struct nf_conn *ct)
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range2 *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct)
 {
 	unsigned int range_size, min, max, i, attempts;
 	__be16 *keyptr;
@@ -420,6 +444,25 @@  static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		return;
 	}
 
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		/* PSID defines a group of port ranges, per PSID. PSID
+		 * is already contained in min and max.
+		 */
+		unsigned int min_to_max, base;
+
+		min = ntohs(range->min_proto.all);
+		max = ntohs(range->max_proto.all);
+		base = ntohs(range->base_proto.all);
+		min_to_max = max - min;
+		for (; max <= (1 << 16) - 1; min += base, max = min + min_to_max) {
+			for (off = 0; off <= min_to_max; off++) {
+				*keyptr = htons(min + off);
+				if (!nf_nat_used_tuple(tuple, ct))
+					return;
+			}
+		}
+	}
+
 	/* If no range specified... */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
@@ -529,11 +572,19 @@  get_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	/* Only bother mapping if it's not already in range and unique */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
-		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+		/* PSID mode is present always needs to check
+		 * to see if the source ports are in range.
+		 */
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||
+		    (range->flags & NF_NAT_RANGE_PSID &&
+		     !in_range(orig_tuple, range))) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index aace6768a64e..f65163278db0 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -17,6 +17,10 @@ 
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_ftp.h>
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range2 *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct);
 
 #define NAT_HELPER_NAME "ftp"
 
@@ -72,8 +76,13 @@  static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	u_int16_t port;
 	int dir = CTINFO2DIR(ctinfo);
 	struct nf_conn *ct = exp->master;
+	struct nf_conn_nat *nat = nfct_nat(ct);
 	char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
 	unsigned int buflen;
+	int ret;
+
+	if (WARN_ON_ONCE(!nat))
+		return NF_DROP;
 
 	pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);
 
@@ -86,18 +95,14 @@  static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	 * this one. */
 	exp->expectfn = nf_nat_follow_master;
 
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		int ret;
-
-		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp, 0);
-		if (ret == 0)
-			break;
-		else if (ret != -EBUSY) {
-			port = 0;
-			break;
-		}
+	/* Find a port that matches the MASQ rule. */
+	nf_nat_l4proto_unique_tuple(&exp->tuple, nat->range,
+				    dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
+				    ct);
+	ret = nf_ct_expect_related(exp, 0);
+	port = ntohs(exp->tuple.dst.u.tcp.port);
+	if (ret != 0 && ret != -EBUSY) {
+		port = 0;
 	}
 
 	if (port == 0) {
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index a263505455fc..2d105e4eb8f8 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -179,15 +179,23 @@  EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 void nf_nat_follow_master(struct nf_conn *ct,
 			  struct nf_conntrack_expect *exp)
 {
+	struct nf_conn_nat *nat = NULL;
 	struct nf_nat_range2 range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
-	/* Change src to where master sends to */
-	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_addr = range.max_addr
-		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+	if (exp->master && !exp->dir) {
+		nat = nfct_nat(exp->master);
+		if (nat)
+			range = *nat->range;
+	}
+	if (!nat) {
+		/* Change src to where master sends to */
+		range.flags = NF_NAT_RANGE_MAP_IPS;
+		range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+		range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+	}
 	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..d83cd3d8ad3f 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -45,10 +45,6 @@  nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 		return NF_DROP;
 	}
 
-	nat = nf_ct_nat_ext_add(ct);
-	if (nat)
-		nat->masq_index = out->ifindex;
-
 	/* Transfer from original range. */
 	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
 	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
@@ -57,6 +53,15 @@  nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.max_addr.ip = newsrc;
 	newrange.min_proto   = range->min_proto;
 	newrange.max_proto   = range->max_proto;
+	newrange.base_proto  = range->base_proto;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat) {
+		nat->masq_index = out->ifindex;
+		if (!nat->range)
+			nat->range = kmalloc(sizeof(*nat->range), 0);
+		memcpy(nat->range, &newrange, sizeof(*nat->range));
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@  MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@  static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@  masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@  static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@  static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)