From 61f3cade763dca46127146a52d829e30b8f48921 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:02 -0800 Subject: [PATCH 1/4] geneve: Remove workqueue. The work queue is used only to free the UDP socket upon destruction. This is not necessary with Geneve and generally makes the code more difficult to reason about. It also introduces nondeterministic behavior such as when a socket is rapidly deleted and recreated, which could fail as the the deletion happens asynchronously. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/net/geneve.h | 1 - net/ipv4/geneve.c | 21 ++------------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/include/net/geneve.h b/include/net/geneve.h index 112132cf8e2e..56c7e1ac216a 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -71,7 +71,6 @@ struct geneve_sock { struct hlist_node hlist; geneve_rcv_t *rcv; void *rcv_data; - struct work_struct del_work; struct socket *sock; struct rcu_head rcu; atomic_t refcnt; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 19e256e1dd92..136a829e8746 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -61,8 +61,6 @@ struct geneve_net { static int geneve_net_id; -static struct workqueue_struct *geneve_wq; - static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); @@ -307,15 +305,6 @@ error: return 1; } -static void geneve_del_work(struct work_struct *work) -{ - struct geneve_sock *gs = container_of(work, struct geneve_sock, - del_work); - - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); -} - static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port) { @@ -356,8 +345,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, if (!gs) return ERR_PTR(-ENOMEM); - INIT_WORK(&gs->del_work, geneve_del_work); - sock = geneve_create_sock(net, ipv6, port); if (IS_ERR(sock)) { kfree(gs); @@ -430,7 +417,8 @@ void geneve_sock_release(struct geneve_sock *gs) geneve_notify_del_rx_port(gs); spin_unlock(&gn->sock_lock); - queue_work(geneve_wq, &gs->del_work); + udp_tunnel_sock_release(gs->sock); + kfree_rcu(gs, rcu); } EXPORT_SYMBOL_GPL(geneve_sock_release); @@ -458,10 +446,6 @@ static int __init geneve_init_module(void) { int rc; - geneve_wq = alloc_workqueue("geneve", 0, 0); - if (!geneve_wq) - return -ENOMEM; - rc = register_pernet_subsys(&geneve_net_ops); if (rc) return rc; @@ -474,7 +458,6 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { - destroy_workqueue(geneve_wq); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); From 829a3ada9cc7d4c30fa61f8033403fb6c8f8092a Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:03 -0800 Subject: [PATCH 2/4] geneve: Simplify locking. The existing Geneve locking scheme was pulled over directly from VXLAN. However, VXLAN has a number of built in mechanisms which make the locking more complex and are unlikely to be necessary with Geneve. This simplifies the locking to use a basic scheme of a mutex when doing updates plus RCU on receive. In addition to making the code easier to read, this also avoids the possibility of a race when creating or destroying sockets since UDP sockets and the list of Geneve sockets are protected by different locks. After this change, the entire operation is atomic. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/net/geneve.h | 2 +- net/ipv4/geneve.c | 59 +++++++++++++++++++------------------------- 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/include/net/geneve.h b/include/net/geneve.h index 56c7e1ac216a..b40f4affc4cb 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -73,7 +73,7 @@ struct geneve_sock { void *rcv_data; struct socket *sock; struct rcu_head rcu; - atomic_t refcnt; + int refcnt; struct udp_offload udp_offloads; }; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 136a829e8746..ad8dbae11d01 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -50,13 +51,15 @@ #include #endif +/* Protects sock_list and refcounts. */ +static DEFINE_MUTEX(geneve_mutex); + #define PORT_HASH_BITS 8 #define PORT_HASH_SIZE (1<sock->sk)->inet_sport == port) return gs; } @@ -336,7 +339,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool ipv6) { - struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; @@ -352,7 +354,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, } gs->sock = sock; - atomic_set(&gs->refcnt, 1); + gs->refcnt = 1; gs->rcv = rcv; gs->rcv_data = data; @@ -360,11 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, gs->udp_offloads.port = port; gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; - - spin_lock(&gn->sock_lock); - hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); geneve_notify_add_rx_port(gs); - spin_unlock(&gn->sock_lock); /* Mark socket as an encapsulation socket */ tunnel_cfg.sk_user_data = gs; @@ -373,6 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); + hlist_add_head(&gs->hlist, gs_head(net, port)); + return gs; } @@ -380,25 +380,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool no_share, bool ipv6) { - struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; - gs = geneve_socket_create(net, port, rcv, data, ipv6); - if (!IS_ERR(gs)) - return gs; + mutex_lock(&geneve_mutex); - if (no_share) /* Return error if sharing is not allowed. */ - return ERR_PTR(-EINVAL); - - spin_lock(&gn->sock_lock); gs = geneve_find_sock(net, port); - if (gs && ((gs->rcv != rcv) || - !atomic_add_unless(&gs->refcnt, 1, 0))) + if (gs) { + if (!no_share && gs->rcv == rcv) + gs->refcnt++; + else gs = ERR_PTR(-EBUSY); - spin_unlock(&gn->sock_lock); + } else { + gs = geneve_socket_create(net, port, rcv, data, ipv6); + } - if (!gs) - gs = ERR_PTR(-EINVAL); + mutex_unlock(&geneve_mutex); return gs; } @@ -406,19 +402,18 @@ EXPORT_SYMBOL_GPL(geneve_sock_add); void geneve_sock_release(struct geneve_sock *gs) { - struct net *net = sock_net(gs->sock->sk); - struct geneve_net *gn = net_generic(net, geneve_net_id); + mutex_lock(&geneve_mutex); - if (!atomic_dec_and_test(&gs->refcnt)) - return; + if (--gs->refcnt) + goto unlock; - spin_lock(&gn->sock_lock); - hlist_del_rcu(&gs->hlist); + hlist_del(&gs->hlist); geneve_notify_del_rx_port(gs); - spin_unlock(&gn->sock_lock); - udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); + +unlock: + mutex_unlock(&geneve_mutex); } EXPORT_SYMBOL_GPL(geneve_sock_release); @@ -427,8 +422,6 @@ static __net_init int geneve_init_net(struct net *net) struct geneve_net *gn = net_generic(net, geneve_net_id); unsigned int h; - spin_lock_init(&gn->sock_lock); - for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&gn->sock_list[h]); @@ -454,7 +447,7 @@ static int __init geneve_init_module(void) return 0; } -late_initcall(geneve_init_module); +module_init(geneve_init_module); static void __exit geneve_cleanup_module(void) { From df5dba8e52be50e615e03ef73b34611d82587f42 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:04 -0800 Subject: [PATCH 3/4] geneve: Remove socket hash table. The hash table for open Geneve ports is used only on creation and deletion time. It is not performance critical and is not likely to grow to a large number of items. Therefore, this can be changed to use a simple linked list. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/net/geneve.h | 2 +- net/ipv4/geneve.c | 26 +++++++------------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/include/net/geneve.h b/include/net/geneve.h index b40f4affc4cb..03aa2adb5bab 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -68,7 +68,7 @@ struct geneve_sock; typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb); struct geneve_sock { - struct hlist_node hlist; + struct list_head list; geneve_rcv_t *rcv; void *rcv_data; struct socket *sock; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index ad8dbae11d01..4fe5a592821c 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -54,12 +53,9 @@ /* Protects sock_list and refcounts. */ static DEFINE_MUTEX(geneve_mutex); -#define PORT_HASH_BITS 8 -#define PORT_HASH_SIZE (1<sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; -} - /* Find geneve socket based on network namespace and UDP port */ static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) { + struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; - hlist_for_each_entry(gs, gs_head(net, port), hlist) { + list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == port) return gs; } @@ -339,6 +329,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool ipv6) { + struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; @@ -371,7 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); - hlist_add_head(&gs->hlist, gs_head(net, port)); + list_add(&gs->list, &gn->sock_list); return gs; } @@ -407,7 +398,7 @@ void geneve_sock_release(struct geneve_sock *gs) if (--gs->refcnt) goto unlock; - hlist_del(&gs->hlist); + list_del(&gs->list); geneve_notify_del_rx_port(gs); udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); @@ -420,17 +411,14 @@ EXPORT_SYMBOL_GPL(geneve_sock_release); static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); - unsigned int h; - for (h = 0; h < PORT_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&gn->sock_list[h]); + INIT_LIST_HEAD(&gn->sock_list); return 0; } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit = NULL, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; From 46b1e4f9115d48d17bb92f612e4fa45a521a0537 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:05 -0800 Subject: [PATCH 4/4] geneve: Check family when reusing sockets. When searching for an existing socket to reuse, the address family is not taken into account - only port number. This means that an IPv4 socket could be used for IPv6 traffic and vice versa, which is sure to cause problems when passing packets. It is not possible to trigger this problem currently because the only user of Geneve creates just IPv4 sockets. However, that is likely to change in the near future. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 4fe5a592821c..5b52046ec7a2 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -65,14 +65,15 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) return (struct genevehdr *)(udp_hdr(skb) + 1); } -/* Find geneve socket based on network namespace and UDP port */ -static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) +static struct geneve_sock *geneve_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; list_for_each_entry(gs, &gn->sock_list, list) { - if (inet_sk(gs->sock->sk)->inet_sport == port) + if (inet_sk(gs->sock->sk)->inet_sport == port && + inet_sk(gs->sock->sk)->sk.sk_family == family) return gs; } @@ -375,7 +376,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, mutex_lock(&geneve_mutex); - gs = geneve_find_sock(net, port); + gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); if (gs) { if (!no_share && gs->rcv == rcv) gs->refcnt++;