From e9e5eee8733739f13a204132b502494b3f494f3b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 8 Nov 2010 20:05:57 +0900 Subject: [PATCH 1/8] IPVS: Add persistence engine to connection entry The dest of a connection may not exist if it has been created as the result of connection synchronisation. But in order for connection entries for templates with persistence engine data created through connection synchronisation to be valid access to the persistence engine pointer is required. So add the persistence engine to the connection itself. Signed-off-by: Simon Horman --- include/net/ip_vs.h | 16 ++++++++++++++-- net/netfilter/ipvs/ip_vs_conn.c | 19 ++++++++++--------- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_pe.c | 14 ++++---------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b7bbd6c28cfa..be2b5690f892 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -422,6 +422,7 @@ struct ip_vs_conn { struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ + const struct ip_vs_pe *pe; char *pe_data; __u8 pe_data_len; }; @@ -814,8 +815,19 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); void ip_vs_unbind_pe(struct ip_vs_service *svc); int register_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe); -extern struct ip_vs_pe *ip_vs_pe_get(const char *name); -extern void ip_vs_pe_put(struct ip_vs_pe *pe); +struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); + +static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) +{ + if (pe && pe->module) + __module_get(pe->module); +} + +static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) +{ + if (pe && pe->module) + module_put(pe->module); +} /* * IPVS protocol functions (from ip_vs_proto.c) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e9adecdc8ca4..64a9ca314100 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -176,8 +176,8 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, NULL, 0, &p); - if (cp->dest && cp->dest->svc->pe) { - p.pe = cp->dest->svc->pe; + if (cp->pe) { + p.pe = cp->pe; p.pe_data = cp->pe_data; p.pe_data_len = cp->pe_data_len; } @@ -765,6 +765,7 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->flags & IP_VS_CONN_F_NFCT) ip_vs_conn_drop_conntrack(cp); + ip_vs_pe_put(cp->pe); kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); @@ -826,7 +827,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; - if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { + if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) { + ip_vs_pe_get(p->pe); + cp->pe = p->pe; cp->pe_data = p->pe_data; cp->pe_data_len = p->pe_data_len; } @@ -958,15 +961,13 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; - if (cp->dest && cp->pe_data && - cp->dest->svc->pe->show_pe_data) { + if (cp->pe_data) { pe_data[0] = ' '; - len = strlen(cp->dest->svc->pe->name); - memcpy(pe_data + 1, cp->dest->svc->pe->name, len); + len = strlen(cp->pe->name); + memcpy(pe_data + 1, cp->pe->name, len); pe_data[len + 1] = ' '; len += 2; - len += cp->dest->svc->pe->show_pe_data(cp, - pe_data + len); + len += cp->pe->show_pe_data(cp, pe_data + len); } pe_data[len] = '\0'; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5f5daa30b0af..3e92558dfcc2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1139,7 +1139,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, } if (u->pe_name && *u->pe_name) { - pe = ip_vs_pe_get(u->pe_name); + pe = ip_vs_pe_getbyname(u->pe_name); if (pe == NULL) { pr_info("persistence engine module ip_vs_pe_%s " "not found\n", u->pe_name); @@ -1250,7 +1250,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = sched; if (u->pe_name && *u->pe_name) { - pe = ip_vs_pe_get(u->pe_name); + pe = ip_vs_pe_getbyname(u->pe_name); if (pe == NULL) { pr_info("persistence engine module ip_vs_pe_%s " "not found\n", u->pe_name); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 3414af70ee12..e99f920b93d1 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -30,7 +30,7 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc) /* Get pe in the pe list by name */ static struct ip_vs_pe * -ip_vs_pe_getbyname(const char *pe_name) +__ip_vs_pe_getbyname(const char *pe_name) { struct ip_vs_pe *pe; @@ -60,28 +60,22 @@ ip_vs_pe_getbyname(const char *pe_name) } /* Lookup pe and try to load it if it doesn't exist */ -struct ip_vs_pe *ip_vs_pe_get(const char *name) +struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) { struct ip_vs_pe *pe; /* Search for the pe by name */ - pe = ip_vs_pe_getbyname(name); + pe = __ip_vs_pe_getbyname(name); /* If pe not found, load the module and search again */ if (!pe) { request_module("ip_vs_pe_%s", name); - pe = ip_vs_pe_getbyname(name); + pe = __ip_vs_pe_getbyname(name); } return pe; } -void ip_vs_pe_put(struct ip_vs_pe *pe) -{ - if (pe && pe->module) - module_put(pe->module); -} - /* Register a pe in the pe list */ int register_ip_vs_pe(struct ip_vs_pe *pe) { From ea2c73afc23db3084fd857b027446c38fc7ff2c9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 8 Nov 2010 20:06:30 +0900 Subject: [PATCH 2/8] IPVS: Only match pe_data created by the same pe Only match persistence engine data if it was created by the same persistence engine. Reported-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 64a9ca314100..261db1a17633 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -354,7 +354,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (p->pe_data && p->pe->ct_match) { - if (p->pe->ct_match(p, cp)) + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) goto out; continue; } From d494262b8a0f3507b62104a565849124abe29827 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 09:33:15 +0900 Subject: [PATCH 3/8] IPVS: Make the cp argument to ip_vs_sync_conn() static Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index be2b5690f892..d5a32e47f9d9 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -916,7 +916,7 @@ extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); extern int stop_sync_thread(int state); -extern void ip_vs_sync_conn(struct ip_vs_conn *cp); +extern void ip_vs_sync_conn(const struct ip_vs_conn *cp); /* diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index ab85aedea17e..a4dccbc4f1bb 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -236,7 +236,7 @@ get_curr_sync_buff(unsigned long time) * Add an ip_vs_conn information into the current sync_buff. * Called by ip_vs_in. */ -void ip_vs_sync_conn(struct ip_vs_conn *cp) +void ip_vs_sync_conn(const struct ip_vs_conn *cp) { struct ip_vs_sync_mesg *m; struct ip_vs_sync_conn *s; From 7ae246a15a5c9d26cfb572d36794325db0400b18 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 09:33:25 +0900 Subject: [PATCH 4/8] IPVS: Remove useless { } block from ip_vs_process_message() Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index a4dccbc4f1bb..72b3d88d35f4 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -381,20 +381,18 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } } - { - if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, - (union nf_inet_addr *)&s->caddr, - s->cport, - (union nf_inet_addr *)&s->vaddr, - s->vport, ¶m)) { - pr_err("ip_vs_conn_fill_param_sync failed"); - return; - } - if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(¶m); - else - cp = ip_vs_ct_in_get(¶m); + if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport, ¶m)) { + pr_err("ip_vs_conn_fill_param_sync failed"); + return; } + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(¶m); + else + cp = ip_vs_ct_in_get(¶m); if (!cp) { /* * Find the appropriate destination for the connection. From 8aadf93c9c1ff1a53aafd18d038be0d709b5ebc0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 09:33:28 +0900 Subject: [PATCH 5/8] IPVS: buffer argument to ip_vs_process_message() should not be const It is assigned to a non-const variable and its contents are modified. Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 72b3d88d35f4..3897d6bf3b29 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -303,7 +303,7 @@ ip_vs_conn_fill_param_sync(int af, int protocol, * Process received multicast message and create the corresponding * ip_vs_conn entries. */ -static void ip_vs_process_message(const char *buffer, const size_t buflen) +static void ip_vs_process_message(char *buffer, const size_t buflen) { struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; struct ip_vs_sync_conn *s; From 4ecd29447e6b9c12190e21c3e44ed5b12693c467 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:38:52 +0100 Subject: [PATCH 6/8] ipvs: add static and read_mostly attributes ip_vs_conn_tab_bits & ip_vs_conn_tab_mask are static to ipvs/ip_vs_conn.c ip_vs_conn_tab_size, ip_vs_conn_tab_mask, ip_vs_conn_tab [the pointer], ip_vs_conn_rnd are mostly read. Signed-off-by: Eric Dumazet Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 261db1a17633..7615f9e3d955 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -48,18 +48,18 @@ /* * Connection hash size. Default is what was selected at compile time. */ -int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; +static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); /* size and mask values */ -int ip_vs_conn_tab_size; -int ip_vs_conn_tab_mask; +int ip_vs_conn_tab_size __read_mostly; +static int ip_vs_conn_tab_mask __read_mostly; /* * Connection hash table: for input and output packets lookups of IPVS */ -static struct list_head *ip_vs_conn_tab; +static struct list_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; @@ -71,7 +71,7 @@ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); /* random value for IPVS connection hash */ -static unsigned int ip_vs_conn_rnd; +static unsigned int ip_vs_conn_rnd __read_mostly; /* * Fine locking granularity for big connection hash table From a333e2ec05791bb866086274ac9749315900a0a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 19:46:33 +0100 Subject: [PATCH 7/8] ipvs: remove shadow rt variable Remove a sparse warning about rt variable. Signed-off-by: Eric Dumazet Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 10bd39c0ae2d..50b131c28b85 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -188,7 +188,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb) }, .mark = skb->mark, }; - struct rtable *rt; if (ip_route_output_key(net, &rt, &fl)) return 0; From 8f1b03a4c18e8f3f0801447b62330faa8ed3bb37 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 10:08:49 +0900 Subject: [PATCH 8/8] ipvs: allow transmit of GRO aggregated skbs Attempt at allowing LVS to transmit skbs of greater than MTU length that have been aggregated by GRO and can thus be deaggregated by GSO. Cc: Julian Anastasov Cc: Herbert Xu Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 50b131c28b85..fb2a445ddc59 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -407,7 +407,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && + !skb_is_gso(skb)) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); @@ -460,7 +461,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -560,7 +561,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && + !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); @@ -675,7 +677,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -790,8 +792,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, df |= (old_iph->frag_off & htons(IP_DF)); - if ((old_iph->frag_off & htons(IP_DF)) - && mtu < ntohs(old_iph->tot_len)) { + if ((old_iph->frag_off & htons(IP_DF) && + mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; @@ -903,7 +905,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); - if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { + if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && + !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -1008,7 +1011,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { + if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && + !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); IP_VS_DBG_RL("%s(): frag needed\n", __func__); @@ -1175,7 +1179,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { + if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && + !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; @@ -1289,7 +1294,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev);