[NETNS][IPV6] route6 - dynamically allocate ip6_dst_ops
ip6_dst_ops is dynamically allocated in init and exit functions. That
provides the ability to do multiple instanciations of this structure.
This will be needed for network namespaces, indeed dst_ops stores data
that are required to be per namespace: entries and gc_thresh.
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7ff66ce..fa014d7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -97,7 +97,7 @@
struct in6_addr *gwaddr, int ifindex);
#endif
-static struct dst_ops ip6_dst_ops = {
+static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
.protocol = __constant_htons(ETH_P_IPV6),
.gc = ip6_dst_gc,
@@ -113,6 +113,8 @@
.entries = ATOMIC_INIT(0),
};
+static struct dst_ops *ip6_dst_ops;
+
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
{
}
@@ -137,7 +139,6 @@
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
- .ops = &ip6_dst_ops,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
@@ -160,7 +161,6 @@
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
- .ops = &ip6_dst_ops,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
@@ -178,7 +178,6 @@
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = dst_discard,
.output = dst_discard,
- .ops = &ip6_dst_ops,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
@@ -191,7 +190,7 @@
/* allocate dst with ip6_dst_ops */
static __inline__ struct rt6_info *ip6_dst_alloc(void)
{
- return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
+ return (struct rt6_info *)dst_alloc(ip6_dst_ops);
}
static void ip6_dst_destroy(struct dst_entry *dst)
@@ -1000,18 +999,18 @@
unsigned long now = jiffies;
if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
- atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
+ atomic_read(&ip6_dst_ops->entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
goto out;
expire++;
fib6_run_gc(expire, &init_net);
last_gc = now;
- if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
+ if (atomic_read(&ip6_dst_ops->entries) < ip6_dst_ops->gc_thresh)
expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
out:
expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
- return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
+ return (atomic_read(&ip6_dst_ops->entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
}
/* Clean host part of a prefix. Not necessary in radix tree,
@@ -2408,7 +2407,7 @@
net->ipv6.rt6_stats->fib_rt_alloc,
net->ipv6.rt6_stats->fib_rt_entries,
net->ipv6.rt6_stats->fib_rt_cache,
- atomic_read(&ip6_dst_ops.entries),
+ atomic_read(&ip6_dst_ops->entries),
net->ipv6.rt6_stats->fib_discarded_routes);
return 0;
@@ -2464,7 +2463,7 @@
{
.ctl_name = NET_IPV6_ROUTE_GC_THRESH,
.procname = "gc_thresh",
- .data = &ip6_dst_ops.gc_thresh,
+ .data = &ip6_dst_ops_template.gc_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -2553,8 +2552,7 @@
if (table) {
table[0].data = &net->ipv6.sysctl.flush_delay;
- /* table[1].data will be handled when we have
- routes per namespace */
+ table[1].data = &ip6_dst_ops_template.gc_thresh;
table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
@@ -2580,6 +2578,7 @@
goto out;
net->ipv6.ip6_null_entry->u.dst.path =
(struct dst_entry *)net->ipv6.ip6_null_entry;
+ net->ipv6.ip6_null_entry->u.dst.ops = ip6_dst_ops;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2591,6 +2590,7 @@
}
net->ipv6.ip6_prohibit_entry->u.dst.path =
(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
+ net->ipv6.ip6_prohibit_entry->u.dst.ops = ip6_dst_ops;
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2602,6 +2602,7 @@
}
net->ipv6.ip6_blk_hole_entry->u.dst.path =
(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
+ net->ipv6.ip6_blk_hole_entry->u.dst.ops = ip6_dst_ops;
#endif
#ifdef CONFIG_PROC_FS
@@ -2640,13 +2641,20 @@
{
int ret;
- ip6_dst_ops.kmem_cachep =
- kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!ip6_dst_ops.kmem_cachep)
+ ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
+ sizeof(*ip6_dst_ops), GFP_KERNEL);
+ if (!ip6_dst_ops)
return -ENOMEM;
- ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
+ ret = -ENOMEM;
+ ip6_dst_ops_template.kmem_cachep =
+ kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!ip6_dst_ops_template.kmem_cachep)
+ goto out_ip6_dst_ops;
+
+ ip6_dst_ops->kmem_cachep = ip6_dst_ops_template.kmem_cachep;
+ ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
ret = register_pernet_subsys(&ip6_route_net_ops);
if (ret)
@@ -2697,7 +2705,9 @@
out_register_subsys:
unregister_pernet_subsys(&ip6_route_net_ops);
out_kmem_cache:
- kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+ kmem_cache_destroy(ip6_dst_ops->kmem_cachep);
+out_ip6_dst_ops:
+ kfree(ip6_dst_ops);
goto out;
}
@@ -2708,5 +2718,6 @@
xfrm6_fini();
fib6_gc_cleanup();
unregister_pernet_subsys(&ip6_route_net_ops);
- kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+ kmem_cache_destroy(ip6_dst_ops->kmem_cachep);
+ kfree(ip6_dst_ops);
}