Merge branch 'ipv6_percpu_rt_deadlock'
Martin KaFai Lau says: ==================== ipv6: Fix a potential deadlock when creating pcpu rt v1 -> v2: A minor change in the commit message of patch 2. This patch series fixes a potential deadlock when creating a pcpu rt. It happens when dst_alloc() decided to run gc. Something like this: read_lock(&table->tb6_lock); ip6_rt_pcpu_alloc() => dst_alloc() => ip6_dst_gc() => write_lock(&table->tb6_lock); /* oops */ Patch 1 and 2 are some prep works. Patch 3 is the fix. Original report: https://bugzilla.kernel.org/show_bug.cgi?id=102291 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
1f979b117b
2 changed files with 57 additions and 24 deletions
|
@ -172,6 +172,8 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
|
||||||
*ppcpu_rt = NULL;
|
*ppcpu_rt = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
non_pcpu_rt->rt6i_pcpu = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rt6_release(struct rt6_info *rt)
|
static void rt6_release(struct rt6_info *rt)
|
||||||
|
|
|
@ -318,8 +318,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
|
||||||
/* allocate dst with ip6_dst_ops */
|
/* allocate dst with ip6_dst_ops */
|
||||||
static struct rt6_info *__ip6_dst_alloc(struct net *net,
|
static struct rt6_info *__ip6_dst_alloc(struct net *net,
|
||||||
struct net_device *dev,
|
struct net_device *dev,
|
||||||
int flags,
|
int flags)
|
||||||
struct fib6_table *table)
|
|
||||||
{
|
{
|
||||||
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
|
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
|
||||||
0, DST_OBSOLETE_FORCE_CHK, flags);
|
0, DST_OBSOLETE_FORCE_CHK, flags);
|
||||||
|
@ -336,10 +335,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
|
||||||
|
|
||||||
static struct rt6_info *ip6_dst_alloc(struct net *net,
|
static struct rt6_info *ip6_dst_alloc(struct net *net,
|
||||||
struct net_device *dev,
|
struct net_device *dev,
|
||||||
int flags,
|
int flags)
|
||||||
struct fib6_table *table)
|
|
||||||
{
|
{
|
||||||
struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
|
struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
|
||||||
|
|
||||||
if (rt) {
|
if (rt) {
|
||||||
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
|
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
|
||||||
|
@ -950,8 +948,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
|
||||||
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
|
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
|
||||||
ort = (struct rt6_info *)ort->dst.from;
|
ort = (struct rt6_info *)ort->dst.from;
|
||||||
|
|
||||||
rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
|
rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
|
||||||
0, ort->rt6i_table);
|
|
||||||
|
|
||||||
if (!rt)
|
if (!rt)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -983,8 +980,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
|
||||||
struct rt6_info *pcpu_rt;
|
struct rt6_info *pcpu_rt;
|
||||||
|
|
||||||
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
|
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
|
||||||
rt->dst.dev, rt->dst.flags,
|
rt->dst.dev, rt->dst.flags);
|
||||||
rt->rt6i_table);
|
|
||||||
|
|
||||||
if (!pcpu_rt)
|
if (!pcpu_rt)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -997,32 +993,53 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
|
||||||
/* It should be called with read_lock_bh(&tb6_lock) acquired */
|
/* It should be called with read_lock_bh(&tb6_lock) acquired */
|
||||||
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
|
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
|
||||||
{
|
{
|
||||||
struct rt6_info *pcpu_rt, *prev, **p;
|
struct rt6_info *pcpu_rt, **p;
|
||||||
|
|
||||||
p = this_cpu_ptr(rt->rt6i_pcpu);
|
p = this_cpu_ptr(rt->rt6i_pcpu);
|
||||||
pcpu_rt = *p;
|
pcpu_rt = *p;
|
||||||
|
|
||||||
if (pcpu_rt)
|
if (pcpu_rt) {
|
||||||
goto done;
|
dst_hold(&pcpu_rt->dst);
|
||||||
|
rt6_dst_from_metrics_check(pcpu_rt);
|
||||||
|
}
|
||||||
|
return pcpu_rt;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
|
||||||
|
{
|
||||||
|
struct fib6_table *table = rt->rt6i_table;
|
||||||
|
struct rt6_info *pcpu_rt, *prev, **p;
|
||||||
|
|
||||||
pcpu_rt = ip6_rt_pcpu_alloc(rt);
|
pcpu_rt = ip6_rt_pcpu_alloc(rt);
|
||||||
if (!pcpu_rt) {
|
if (!pcpu_rt) {
|
||||||
struct net *net = dev_net(rt->dst.dev);
|
struct net *net = dev_net(rt->dst.dev);
|
||||||
|
|
||||||
pcpu_rt = net->ipv6.ip6_null_entry;
|
dst_hold(&net->ipv6.ip6_null_entry->dst);
|
||||||
goto done;
|
return net->ipv6.ip6_null_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
prev = cmpxchg(p, NULL, pcpu_rt);
|
read_lock_bh(&table->tb6_lock);
|
||||||
if (prev) {
|
if (rt->rt6i_pcpu) {
|
||||||
/* If someone did it before us, return prev instead */
|
p = this_cpu_ptr(rt->rt6i_pcpu);
|
||||||
|
prev = cmpxchg(p, NULL, pcpu_rt);
|
||||||
|
if (prev) {
|
||||||
|
/* If someone did it before us, return prev instead */
|
||||||
|
dst_destroy(&pcpu_rt->dst);
|
||||||
|
pcpu_rt = prev;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* rt has been removed from the fib6 tree
|
||||||
|
* before we have a chance to acquire the read_lock.
|
||||||
|
* In this case, don't brother to create a pcpu rt
|
||||||
|
* since rt is going away anyway. The next
|
||||||
|
* dst_check() will trigger a re-lookup.
|
||||||
|
*/
|
||||||
dst_destroy(&pcpu_rt->dst);
|
dst_destroy(&pcpu_rt->dst);
|
||||||
pcpu_rt = prev;
|
pcpu_rt = rt;
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
|
||||||
dst_hold(&pcpu_rt->dst);
|
dst_hold(&pcpu_rt->dst);
|
||||||
rt6_dst_from_metrics_check(pcpu_rt);
|
rt6_dst_from_metrics_check(pcpu_rt);
|
||||||
|
read_unlock_bh(&table->tb6_lock);
|
||||||
return pcpu_rt;
|
return pcpu_rt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1097,9 +1114,22 @@ redo_rt6_select:
|
||||||
rt->dst.lastuse = jiffies;
|
rt->dst.lastuse = jiffies;
|
||||||
rt->dst.__use++;
|
rt->dst.__use++;
|
||||||
pcpu_rt = rt6_get_pcpu_route(rt);
|
pcpu_rt = rt6_get_pcpu_route(rt);
|
||||||
read_unlock_bh(&table->tb6_lock);
|
|
||||||
|
if (pcpu_rt) {
|
||||||
|
read_unlock_bh(&table->tb6_lock);
|
||||||
|
} else {
|
||||||
|
/* We have to do the read_unlock first
|
||||||
|
* because rt6_make_pcpu_route() may trigger
|
||||||
|
* ip6_dst_gc() which will take the write_lock.
|
||||||
|
*/
|
||||||
|
dst_hold(&rt->dst);
|
||||||
|
read_unlock_bh(&table->tb6_lock);
|
||||||
|
pcpu_rt = rt6_make_pcpu_route(rt);
|
||||||
|
dst_release(&rt->dst);
|
||||||
|
}
|
||||||
|
|
||||||
return pcpu_rt;
|
return pcpu_rt;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1555,7 +1585,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
|
||||||
if (unlikely(!idev))
|
if (unlikely(!idev))
|
||||||
return ERR_PTR(-ENODEV);
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
rt = ip6_dst_alloc(net, dev, 0, NULL);
|
rt = ip6_dst_alloc(net, dev, 0);
|
||||||
if (unlikely(!rt)) {
|
if (unlikely(!rt)) {
|
||||||
in6_dev_put(idev);
|
in6_dev_put(idev);
|
||||||
dst = ERR_PTR(-ENOMEM);
|
dst = ERR_PTR(-ENOMEM);
|
||||||
|
@ -1742,7 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg)
|
||||||
if (!table)
|
if (!table)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
|
rt = ip6_dst_alloc(net, NULL,
|
||||||
|
(cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
|
||||||
|
|
||||||
if (!rt) {
|
if (!rt) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
@ -2399,7 +2430,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
|
||||||
{
|
{
|
||||||
struct net *net = dev_net(idev->dev);
|
struct net *net = dev_net(idev->dev);
|
||||||
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
|
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
|
||||||
DST_NOCOUNT, NULL);
|
DST_NOCOUNT);
|
||||||
if (!rt)
|
if (!rt)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue