From 3ee7c157f796aac5c119669697e224eb4a0cf560 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Sun, 5 Nov 2017 17:36:53 -0700 Subject: [PATCH] netfilter: nf_defrag_ipv4: Add sysctl to disable per interface Add a sysctl nf_ipv4_defrag_skip to skip defragmentation per interface. This is set 0 to preserve existing behavior (always defrag per interface). This is useful for pure ipv4 forwarding scenarios (without NAT) in conjunction with xfrm. It appears that network stack defrags the packets and then forwards them to xfrm which then encrypts and then later fragments them on a different boundary compared to the source. CRs-Fixed: 2140310 Change-Id: I11956284a9692579274e8626f61cc6432232254c Signed-off-by: Subash Abhinov Kasiviswanathan --- Documentation/networking/ip-sysctl.txt | 4 ++++ include/linux/inetdevice.h | 2 ++ include/uapi/linux/ip.h | 1 + include/uapi/linux/sysctl.h | 1 + kernel/sysctl_binary.c | 1 + net/ipv4/devinet.c | 2 ++ net/ipv4/netfilter/nf_defrag_ipv4.c | 10 ++++++++-- 7 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f6851d94c1af..40dc329f142b 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1239,6 +1239,10 @@ igmp_link_local_mcast_reports - BOOLEAN 224.0.0.X range. Default TRUE +nf_ipv4_defrag_skip - BOOLEAN + Skip defragmentation per interface if set. + Default : 0 (always defrag) + Alexey Kuznetsov. kuznet@ms2.inr.ac.ru diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ee971f335a8b..7118876e9896 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -128,6 +128,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) +#define IN_DEV_NF_IPV4_DEFRAG_SKIP(in_dev) \ + IN_DEV_ORCONF((in_dev), NF_IPV4_DEFRAG_SKIP) struct in_ifaddr { struct hlist_node hash; diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 08f894d2ddbd..7b5e2aac86ac 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -165,6 +165,7 @@ enum IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF_NF_IPV4_DEFRAG_SKIP, __IPV4_DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 01eb22ca6b3d..47e0de1df362 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -483,6 +483,7 @@ enum NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, + NET_IPV4_CONF_NF_IPV4_DEFRAG_SKIP = 23, }; /* /proc/sys/net/ipv4/netfilter */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 4a816bab38a2..d7612fcba10a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -255,6 +255,7 @@ static const struct bin_table bin_net_ipv4_conf_vars_table[] = { { CTL_INT, NET_IPV4_CONF_NOPOLICY, "disable_policy" }, { CTL_INT, NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" }, { CTL_INT, NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" }, + { CTL_INT, NET_IPV4_CONF_NF_IPV4_DEFRAG_SKIP, "nf_ipv4_defrag_skip" }, {} }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0212591b0077..1110e70e0ec6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2196,6 +2196,8 @@ static struct devinet_sysctl_table { "promote_secondaries"), DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, "route_localnet"), + DEVINET_SYSCTL_RW_ENTRY(NF_IPV4_DEFRAG_SKIP, + "nf_ipv4_defrag_skip"), }, }; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index a04dee536b8e..39455484bd13 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -80,8 +81,13 @@ static unsigned int ipv4_conntrack_defrag(void *priv, #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { - enum ip_defrag_users user = - nf_ct_defrag_user(state->hook, skb); + enum ip_defrag_users user; + + if (skb->dev && + IN_DEV_NF_IPV4_DEFRAG_SKIP(__in_dev_get_rcu(skb->dev))) + return NF_ACCEPT; + + user = nf_ct_defrag_user(state->hook, skb); if (nf_ct_ipv4_gather_frags(state->net, skb, user)) return NF_STOLEN;