blob: 57b82dc1ae91c8426b0894b5e4e6030453c66352 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Eric Dumazet21efcfa2011-07-19 20:18:36 +000075static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080078static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080079static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080084static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080091#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080092static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000093 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080095 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080096static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000097 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080099#endif
100
David S. Miller06582542011-01-27 14:58:42 -0800101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000107 if (!(rt->dst.flags & DST_HOST))
108 return NULL;
109
David S. Miller06582542011-01-27 14:58:42 -0800110 if (!rt->rt6i_peer)
111 rt6_bind_peer(rt, 1);
112
113 peer = rt->rt6i_peer;
114 if (peer) {
115 u32 *old_p = __DST_METRICS_PTR(old);
116 unsigned long prev, new;
117
118 p = peer->metrics;
119 if (inet_metrics_new(peer))
120 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121
122 new = (unsigned long) p;
123 prev = cmpxchg(&dst->_metrics, old, new);
124
125 if (prev != old) {
126 p = __DST_METRICS_PTR(prev);
127 if (prev & DST_METRICS_READ_ONLY)
128 p = NULL;
129 }
130 }
131 return p;
132}
133
David S. Millerd3aaeb32011-07-18 00:40:17 -0700134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
135{
136 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
137}
138
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800139static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800141 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 .gc = ip6_dst_gc,
143 .gc_thresh = 1024,
144 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800145 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800146 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800147 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 .destroy = ip6_dst_destroy,
149 .ifdown = ip6_dst_ifdown,
150 .negative_advice = ip6_negative_advice,
151 .link_failure = ip6_link_failure,
152 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700153 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700154 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155};
156
Roland Dreierec831ea2011-01-31 13:16:00 -0800157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
158{
159 return 0;
160}
161
David S. Miller14e50e52007-05-24 18:17:54 -0700162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
163{
164}
165
Held Bernhard0972ddb2011-04-24 22:07:32 +0000166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
167 unsigned long old)
168{
169 return NULL;
170}
171
David S. Miller14e50e52007-05-24 18:17:54 -0700172static struct dst_ops ip6_dst_blackhole_ops = {
173 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800174 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700175 .destroy = ip6_dst_destroy,
176 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800177 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800178 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700179 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000180 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700181 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700182};
183
David S. Miller62fa8a82011-01-26 20:51:05 -0800184static const u32 ip6_template_metrics[RTAX_MAX] = {
185 [RTAX_HOPLIMIT - 1] = 255,
186};
187
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800188static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700189 .dst = {
190 .__refcnt = ATOMIC_INIT(1),
191 .__use = 1,
192 .obsolete = -1,
193 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700194 .input = ip6_pkt_discard,
195 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 },
197 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700198 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 .rt6i_metric = ~(u32) 0,
200 .rt6i_ref = ATOMIC_INIT(1),
201};
202
Thomas Graf101367c2006-08-04 03:39:02 -0700203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
204
David S. Miller6723ab52006-10-18 21:20:57 -0700205static int ip6_pkt_prohibit(struct sk_buff *skb);
206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700207
Adrian Bunk280a34c2008-04-21 02:29:32 -0700208static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700214 .input = ip6_pkt_prohibit,
215 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700218 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221};
222
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800223static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700224 .dst = {
225 .__refcnt = ATOMIC_INIT(1),
226 .__use = 1,
227 .obsolete = -1,
228 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700229 .input = dst_discard,
230 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700231 },
232 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700233 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700234 .rt6i_metric = ~(u32) 0,
235 .rt6i_ref = ATOMIC_INIT(1),
236};
237
238#endif
239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700242 struct net_device *dev,
243 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
David S. Miller957c6652011-06-24 15:25:00 -0700245 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700246
Madalin Bucurfbe58182011-09-26 07:04:56 +0000247 if (rt != NULL)
248 memset(&rt->rt6i_table, 0,
249 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700250
251 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252}
253
254static void ip6_dst_destroy(struct dst_entry *dst)
255{
256 struct rt6_info *rt = (struct rt6_info *)dst;
257 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800258 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000260 if (!(rt->dst.flags & DST_HOST))
261 dst_destroy_metrics_generic(dst);
262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 if (idev != NULL) {
264 rt->rt6i_idev = NULL;
265 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900266 }
David S. Millerb3419362010-11-30 12:27:11 -0800267 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800268 rt->rt6i_peer = NULL;
269 inet_putpeer(peer);
270 }
271}
272
David S. Miller6431cbc2011-02-07 20:38:06 -0800273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
274
275static u32 rt6_peer_genid(void)
276{
277 return atomic_read(&__rt6_peer_genid);
278}
279
David S. Millerb3419362010-11-30 12:27:11 -0800280void rt6_bind_peer(struct rt6_info *rt, int create)
281{
282 struct inet_peer *peer;
283
David S. Millerb3419362010-11-30 12:27:11 -0800284 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
285 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
286 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800287 else
288 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289}
290
291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
292 int how)
293{
294 struct rt6_info *rt = (struct rt6_info *)dst;
295 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800296 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900297 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800299 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
300 struct inet6_dev *loopback_idev =
301 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 if (loopback_idev != NULL) {
303 rt->rt6i_idev = loopback_idev;
304 in6_dev_put(idev);
305 }
306 }
307}
308
309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
310{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000311 return (rt->rt6i_flags & RTF_EXPIRES) &&
312 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313}
314
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000315static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700316{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000317 return ipv6_addr_type(daddr) &
318 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700319}
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700322 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
324
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800325static inline struct rt6_info *rt6_device_match(struct net *net,
326 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000327 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700329 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330{
331 struct rt6_info *local = NULL;
332 struct rt6_info *sprt;
333
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900334 if (!oif && ipv6_addr_any(saddr))
335 goto out;
336
Changli Gaod8d1f302010-06-10 23:31:35 -0700337 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900338 struct net_device *dev = sprt->rt6i_dev;
339
340 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 if (dev->ifindex == oif)
342 return sprt;
343 if (dev->flags & IFF_LOOPBACK) {
344 if (sprt->rt6i_idev == NULL ||
345 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700346 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900348 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 local->rt6i_idev->dev->ifindex == oif))
350 continue;
351 }
352 local = sprt;
353 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900354 } else {
355 if (ipv6_chk_addr(net, saddr, dev,
356 flags & RT6_LOOKUP_F_IFACE))
357 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900359 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900361 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 if (local)
363 return local;
364
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700365 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800366 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900368out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 return rt;
370}
371
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800372#ifdef CONFIG_IPV6_ROUTER_PREF
373static void rt6_probe(struct rt6_info *rt)
374{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000375 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800376 /*
377 * Okay, this does not seem to be appropriate
378 * for now, however, we need to check if it
379 * is really so; aka Router Reachability Probing.
380 *
381 * Router Reachability Probe MUST be rate-limited
382 * to no more than one per minute.
383 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000384 rcu_read_lock();
385 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800386 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000387 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800388 read_lock_bh(&neigh->lock);
389 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800390 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800391 struct in6_addr mcaddr;
392 struct in6_addr *target;
393
394 neigh->updated = jiffies;
395 read_unlock_bh(&neigh->lock);
396
397 target = (struct in6_addr *)&neigh->primary_key;
398 addrconf_addr_solict_mult(target, &mcaddr);
399 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000400 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800401 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000402 }
403out:
404 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800405}
406#else
407static inline void rt6_probe(struct rt6_info *rt)
408{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800409}
410#endif
411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800413 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800417 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700418 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800419 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700420 if ((dev->flags & IFF_LOOPBACK) &&
421 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
422 return 1;
423 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424}
425
Dave Jonesb6f99a22007-03-22 12:27:49 -0700426static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000428 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800429 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000430
431 rcu_read_lock();
432 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700433 if (rt->rt6i_flags & RTF_NONEXTHOP ||
434 !(rt->rt6i_flags & RTF_GATEWAY))
435 m = 1;
436 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800437 read_lock_bh(&neigh->lock);
438 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700439 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800440#ifdef CONFIG_IPV6_ROUTER_PREF
441 else if (neigh->nud_state & NUD_FAILED)
442 m = 0;
443#endif
444 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800445 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800446 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800447 } else
448 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000449 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800450 return m;
451}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800453static int rt6_score_route(struct rt6_info *rt, int oif,
454 int strict)
455{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700456 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900457
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700458 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700459 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800460 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800461#ifdef CONFIG_IPV6_ROUTER_PREF
462 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
463#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700464 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800465 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800466 return -1;
467 return m;
468}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469
David S. Millerf11e6652007-03-24 20:36:25 -0700470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
471 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800472{
David S. Millerf11e6652007-03-24 20:36:25 -0700473 int m;
474
475 if (rt6_check_expired(rt))
476 goto out;
477
478 m = rt6_score_route(rt, oif, strict);
479 if (m < 0)
480 goto out;
481
482 if (m > *mpri) {
483 if (strict & RT6_LOOKUP_F_REACHABLE)
484 rt6_probe(match);
485 *mpri = m;
486 match = rt;
487 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
488 rt6_probe(rt);
489 }
490
491out:
492 return match;
493}
494
495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
496 struct rt6_info *rr_head,
497 u32 metric, int oif, int strict)
498{
499 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800500 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
David S. Millerf11e6652007-03-24 20:36:25 -0700502 match = NULL;
503 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700504 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700505 match = find_match(rt, oif, strict, &mpri, match);
506 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700507 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700508 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800509
David S. Millerf11e6652007-03-24 20:36:25 -0700510 return match;
511}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800512
David S. Millerf11e6652007-03-24 20:36:25 -0700513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
514{
515 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800516 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
David S. Millerf11e6652007-03-24 20:36:25 -0700518 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800519 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
David S. Millerf11e6652007-03-24 20:36:25 -0700521 rt0 = fn->rr_ptr;
522 if (!rt0)
523 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
David S. Millerf11e6652007-03-24 20:36:25 -0700525 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800527 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700528 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700529 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700530
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800531 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700532 if (!next || next->rt6i_metric != rt0->rt6i_metric)
533 next = fn->leaf;
534
535 if (next != rt0)
536 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 }
538
David S. Millerf11e6652007-03-24 20:36:25 -0700539 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800540 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900542 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000543 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544}
545
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800546#ifdef CONFIG_IPV6_ROUTE_INFO
547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000548 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800549{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900550 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800551 struct route_info *rinfo = (struct route_info *) opt;
552 struct in6_addr prefix_buf, *prefix;
553 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900554 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800555 struct rt6_info *rt;
556
557 if (len < sizeof(struct route_info)) {
558 return -EINVAL;
559 }
560
561 /* Sanity check for prefix_len and length */
562 if (rinfo->length > 3) {
563 return -EINVAL;
564 } else if (rinfo->prefix_len > 128) {
565 return -EINVAL;
566 } else if (rinfo->prefix_len > 64) {
567 if (rinfo->length < 2) {
568 return -EINVAL;
569 }
570 } else if (rinfo->prefix_len > 0) {
571 if (rinfo->length < 1) {
572 return -EINVAL;
573 }
574 }
575
576 pref = rinfo->route_pref;
577 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000578 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800579
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900580 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800581
582 if (rinfo->length == 3)
583 prefix = (struct in6_addr *)rinfo->prefix;
584 else {
585 /* this function is safe */
586 ipv6_addr_prefix(&prefix_buf,
587 (struct in6_addr *)rinfo->prefix,
588 rinfo->prefix_len);
589 prefix = &prefix_buf;
590 }
591
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800592 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
593 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800594
595 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700596 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800597 rt = NULL;
598 }
599
600 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800601 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800602 pref);
603 else if (rt)
604 rt->rt6i_flags = RTF_ROUTEINFO |
605 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
606
607 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900608 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800609 rt->rt6i_flags &= ~RTF_EXPIRES;
610 } else {
611 rt->rt6i_expires = jiffies + HZ * lifetime;
612 rt->rt6i_flags |= RTF_EXPIRES;
613 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700614 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800615 }
616 return 0;
617}
618#endif
619
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800620#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700621do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800622 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700623 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700624 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700625 if (fn->fn_flags & RTN_TL_ROOT) \
626 goto out; \
627 pn = fn->parent; \
628 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800629 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700630 else \
631 fn = pn; \
632 if (fn->fn_flags & RTN_RTINFO) \
633 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700634 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700636} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700637
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
639 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500640 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641{
642 struct fib6_node *fn;
643 struct rt6_info *rt;
644
Thomas Grafc71099a2006-08-04 23:20:06 -0700645 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500646 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700647restart:
648 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500649 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
650 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700651out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700652 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700653 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 return rt;
655
656}
657
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
659 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700660{
David S. Miller4c9483b2011-03-12 16:22:43 -0500661 struct flowi6 fl6 = {
662 .flowi6_oif = oif,
663 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700664 };
665 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700666 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700667
Thomas Grafadaa70b2006-10-13 15:01:03 -0700668 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500669 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700670 flags |= RT6_LOOKUP_F_HAS_SADDR;
671 }
672
David S. Miller4c9483b2011-03-12 16:22:43 -0500673 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700674 if (dst->error == 0)
675 return (struct rt6_info *) dst;
676
677 dst_release(dst);
678
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 return NULL;
680}
681
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900682EXPORT_SYMBOL(rt6_lookup);
683
Thomas Grafc71099a2006-08-04 23:20:06 -0700684/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 It takes new route entry, the addition fails by any reason the
686 route is freed. In any case, if caller does not hold it, it may
687 be destroyed.
688 */
689
Thomas Graf86872cb2006-08-22 00:01:08 -0700690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691{
692 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700693 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
Thomas Grafc71099a2006-08-04 23:20:06 -0700695 table = rt->rt6i_table;
696 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700697 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700698 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700 return err;
701}
702
Thomas Graf40e22e82006-08-22 00:00:45 -0700703int ip6_ins_rt(struct rt6_info *rt)
704{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800705 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900706 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800707 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800708 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700709}
710
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
712 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000713 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 struct rt6_info *rt;
716
717 /*
718 * Clone the route.
719 */
720
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000721 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800724 struct neighbour *neigh;
725 int attempts = !in_softirq();
726
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900727 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
728 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000729 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900730 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900732 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736#ifdef CONFIG_IPV6_SUBTREES
737 if (rt->rt6i_src.plen && saddr) {
738 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
739 rt->rt6i_src.plen = 128;
740 }
741#endif
742
David S. Miller14deae42009-01-04 16:04:39 -0800743 retry:
744 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
745 if (IS_ERR(neigh)) {
746 struct net *net = dev_net(rt->rt6i_dev);
747 int saved_rt_min_interval =
748 net->ipv6.sysctl.ip6_rt_gc_min_interval;
749 int saved_rt_elasticity =
750 net->ipv6.sysctl.ip6_rt_gc_elasticity;
751
752 if (attempts-- > 0) {
753 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
754 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
755
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000756 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800757
758 net->ipv6.sysctl.ip6_rt_gc_elasticity =
759 saved_rt_elasticity;
760 net->ipv6.sysctl.ip6_rt_gc_min_interval =
761 saved_rt_min_interval;
762 goto retry;
763 }
764
765 if (net_ratelimit())
766 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700767 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700768 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800769 return NULL;
770 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700771 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800773 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800775 return rt;
776}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
779 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800780{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000781 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
782
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800783 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800784 rt->rt6i_flags |= RTF_CACHE;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000785 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800786 }
787 return rt;
788}
789
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500791 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792{
793 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800794 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700795 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800797 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700798 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700800 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
802relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700803 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800805restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500806 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807
808restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700809 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800810
David S. Miller4c9483b2011-03-12 16:22:43 -0500811 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800812 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800813 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800814 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
Changli Gaod8d1f302010-06-10 23:31:35 -0700816 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700817 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800818
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000819 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500820 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800821 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500822 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800823 else
824 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800825
Changli Gaod8d1f302010-06-10 23:31:35 -0700826 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800827 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800828
Changli Gaod8d1f302010-06-10 23:31:35 -0700829 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800830 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700831 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800832 if (!err)
833 goto out2;
834 }
835
836 if (--attempts <= 0)
837 goto out2;
838
839 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700840 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800841 * released someone could insert this route. Relookup.
842 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700843 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800844 goto relookup;
845
846out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800847 if (reachable) {
848 reachable = 0;
849 goto restart_2;
850 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700851 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700852 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700854 rt->dst.lastuse = jiffies;
855 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700856
857 return rt;
858}
859
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500861 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700862{
David S. Miller4c9483b2011-03-12 16:22:43 -0500863 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700864}
865
Thomas Grafc71099a2006-08-04 23:20:06 -0700866void ip6_route_input(struct sk_buff *skb)
867{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000868 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900869 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700870 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500871 struct flowi6 fl6 = {
872 .flowi6_iif = skb->dev->ifindex,
873 .daddr = iph->daddr,
874 .saddr = iph->saddr,
875 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
876 .flowi6_mark = skb->mark,
877 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700878 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700879
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800880 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700881 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700882
David S. Miller4c9483b2011-03-12 16:22:43 -0500883 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700884}
885
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500887 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700888{
David S. Miller4c9483b2011-03-12 16:22:43 -0500889 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700890}
891
Florian Westphal9c7a4f9c2011-03-22 19:17:36 -0700892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500893 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700894{
895 int flags = 0;
896
David S. Miller4c9483b2011-03-12 16:22:43 -0500897 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700898 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700899
David S. Miller4c9483b2011-03-12 16:22:43 -0500900 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700901 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000902 else if (sk)
903 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700904
David S. Miller4c9483b2011-03-12 16:22:43 -0500905 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906}
907
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900908EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
David S. Miller2774c132011-03-01 14:59:04 -0800910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700911{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700912 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700913 struct dst_entry *new = NULL;
914
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700915 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700916 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700917 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
918
Changli Gaod8d1f302010-06-10 23:31:35 -0700919 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700920
David S. Miller14e50e52007-05-24 18:17:54 -0700921 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800922 new->input = dst_discard;
923 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700924
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000925 if (dst_metrics_read_only(&ort->dst))
926 new->_metrics = ort->dst._metrics;
927 else
928 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700929 rt->rt6i_idev = ort->rt6i_idev;
930 if (rt->rt6i_idev)
931 in6_dev_hold(rt->rt6i_idev);
932 rt->rt6i_expires = 0;
933
934 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
935 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
936 rt->rt6i_metric = 0;
937
938 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
939#ifdef CONFIG_IPV6_SUBTREES
940 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
941#endif
942
943 dst_free(new);
944 }
945
David S. Miller69ead7a2011-03-01 14:45:33 -0800946 dst_release(dst_orig);
947 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700948}
David S. Miller14e50e52007-05-24 18:17:54 -0700949
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950/*
951 * Destination cache support functions
952 */
953
954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
955{
956 struct rt6_info *rt;
957
958 rt = (struct rt6_info *) dst;
959
David S. Miller6431cbc2011-02-07 20:38:06 -0800960 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
961 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
962 if (!rt->rt6i_peer)
963 rt6_bind_peer(rt, 0);
964 rt->rt6i_peer_genid = rt6_peer_genid();
965 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800967 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 return NULL;
969}
970
971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
972{
973 struct rt6_info *rt = (struct rt6_info *) dst;
974
975 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000976 if (rt->rt6i_flags & RTF_CACHE) {
977 if (rt6_check_expired(rt)) {
978 ip6_del_rt(rt);
979 dst = NULL;
980 }
981 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000983 dst = NULL;
984 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000986 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987}
988
989static void ip6_link_failure(struct sk_buff *skb)
990{
991 struct rt6_info *rt;
992
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000993 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
Eric Dumazetadf30902009-06-02 05:19:30 +0000995 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 if (rt) {
997 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700998 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 rt->rt6i_flags |= RTF_EXPIRES;
1000 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001 rt->rt6i_node->fn_sernum = -1;
1002 }
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1006{
1007 struct rt6_info *rt6 = (struct rt6_info*)dst;
1008
1009 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010 rt6->rt6i_flags |= RTF_MODIFIED;
1011 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001012 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001014 features |= RTAX_FEATURE_ALLFRAG;
1015 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 }
David S. Millerdefb3512010-12-08 21:16:57 -08001017 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 }
1019}
1020
David S. Miller0dbaee32010-12-13 12:52:14 -08001021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022{
David S. Miller0dbaee32010-12-13 12:52:14 -08001023 struct net_device *dev = dst->dev;
1024 unsigned int mtu = dst_mtu(dst);
1025 struct net *net = dev_net(dev);
1026
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
Daniel Lezcano55786892008-03-04 13:47:47 -08001029 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
1032 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001033 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 * rely only on pmtu discovery"
1037 */
1038 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039 mtu = IPV6_MAXPLEN;
1040 return mtu;
1041}
1042
David S. Millerd33e4552010-12-14 13:01:14 -08001043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045 unsigned int mtu = IPV6_MIN_MTU;
1046 struct inet6_dev *idev;
1047
1048 rcu_read_lock();
1049 idev = __in6_dev_get(dst->dev);
1050 if (idev)
1051 mtu = idev->cnf.mtu6;
1052 rcu_read_unlock();
1053
1054 return mtu;
1055}
1056
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001059
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001062 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063{
1064 struct rt6_info *rt;
1065 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001066 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
1068 if (unlikely(idev == NULL))
1069 return NULL;
1070
David S. Miller957c6652011-06-24 15:25:00 -07001071 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 if (unlikely(rt == NULL)) {
1073 in6_dev_put(idev);
1074 goto out;
1075 }
1076
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 if (neigh)
1078 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001079 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001081 if (IS_ERR(neigh))
1082 neigh = NULL;
1083 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001085 rt->dst.flags |= DST_HOST;
1086 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001087 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001088 atomic_set(&rt->dst.__refcnt, 1);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001089 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1090 rt->rt6i_dst.plen = 128;
1091 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001092 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001094 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001095 rt->dst.next = icmp6_dst_gc_list;
1096 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001097 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
Daniel Lezcano55786892008-03-04 13:47:47 -08001099 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100
1101out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001102 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103}
1104
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001105int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106{
Hagen Paul Pfeifere9476e952011-02-25 05:45:19 +00001107 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001108 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001110 spin_lock_bh(&icmp6_dst_lock);
1111 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001112
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 while ((dst = *pprev) != NULL) {
1114 if (!atomic_read(&dst->__refcnt)) {
1115 *pprev = dst->next;
1116 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 } else {
1118 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001119 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 }
1121 }
1122
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001123 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001124
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001125 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126}
1127
David S. Miller1e493d12008-09-10 17:27:15 -07001128static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1129 void *arg)
1130{
1131 struct dst_entry *dst, **pprev;
1132
1133 spin_lock_bh(&icmp6_dst_lock);
1134 pprev = &icmp6_dst_gc_list;
1135 while ((dst = *pprev) != NULL) {
1136 struct rt6_info *rt = (struct rt6_info *) dst;
1137 if (func(rt, arg)) {
1138 *pprev = dst->next;
1139 dst_free(dst);
1140 } else {
1141 pprev = &dst->next;
1142 }
1143 }
1144 spin_unlock_bh(&icmp6_dst_lock);
1145}
1146
Daniel Lezcano569d3642008-01-18 03:56:57 -08001147static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001150 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001151 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1152 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1153 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1154 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1155 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001156 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
Eric Dumazetfc66f952010-10-08 06:37:34 +00001158 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001159 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001160 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 goto out;
1162
Benjamin Thery6891a342008-03-04 13:49:47 -08001163 net->ipv6.ip6_rt_gc_expire++;
1164 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1165 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001166 entries = dst_entries_get_slow(ops);
1167 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001168 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001170 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001171 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172}
1173
1174/* Clean host part of a prefix. Not necessary in radix tree,
1175 but results in cleaner routing tables.
1176
1177 Remove it only when all the things will work!
1178 */
1179
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001180int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181{
David S. Miller5170ae82010-12-12 21:35:57 -08001182 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001183 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001184 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001185 struct inet6_dev *idev;
1186
1187 rcu_read_lock();
1188 idev = __in6_dev_get(dev);
1189 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001190 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001191 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001192 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001193 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 }
1195 return hoplimit;
1196}
David S. Millerabbf46a2010-12-12 21:14:46 -08001197EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
1199/*
1200 *
1201 */
1202
Thomas Graf86872cb2006-08-22 00:01:08 -07001203int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204{
1205 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001206 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 struct rt6_info *rt = NULL;
1208 struct net_device *dev = NULL;
1209 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001210 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 int addr_type;
1212
Thomas Graf86872cb2006-08-22 00:01:08 -07001213 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 return -EINVAL;
1215#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001216 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 return -EINVAL;
1218#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001219 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001221 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 if (!dev)
1223 goto out;
1224 idev = in6_dev_get(dev);
1225 if (!idev)
1226 goto out;
1227 }
1228
Thomas Graf86872cb2006-08-22 00:01:08 -07001229 if (cfg->fc_metric == 0)
1230 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
Daniel Lezcano55786892008-03-04 13:47:47 -08001232 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001233 if (table == NULL) {
1234 err = -ENOBUFS;
1235 goto out;
1236 }
1237
David S. Miller957c6652011-06-24 15:25:00 -07001238 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239
1240 if (rt == NULL) {
1241 err = -ENOMEM;
1242 goto out;
1243 }
1244
Changli Gaod8d1f302010-06-10 23:31:35 -07001245 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001246 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1247 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1248 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
Thomas Graf86872cb2006-08-22 00:01:08 -07001250 if (cfg->fc_protocol == RTPROT_UNSPEC)
1251 cfg->fc_protocol = RTPROT_BOOT;
1252 rt->rt6i_protocol = cfg->fc_protocol;
1253
1254 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
1256 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001257 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001258 else if (cfg->fc_flags & RTF_LOCAL)
1259 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001261 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
Changli Gaod8d1f302010-06-10 23:31:35 -07001263 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264
Thomas Graf86872cb2006-08-22 00:01:08 -07001265 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1266 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001268 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001270 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1271 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1272 if (!metrics) {
1273 err = -ENOMEM;
1274 goto out;
1275 }
1276 dst_init_metrics(&rt->dst, metrics, 0);
1277 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001279 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1280 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281#endif
1282
Thomas Graf86872cb2006-08-22 00:01:08 -07001283 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
1285 /* We cannot add true routes via loopback here,
1286 they would result in kernel looping; promote them to reject routes
1287 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001288 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001289 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1290 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001292 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 if (dev) {
1294 dev_put(dev);
1295 in6_dev_put(idev);
1296 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001297 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 dev_hold(dev);
1299 idev = in6_dev_get(dev);
1300 if (!idev) {
1301 err = -ENODEV;
1302 goto out;
1303 }
1304 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001305 rt->dst.output = ip6_pkt_discard_out;
1306 rt->dst.input = ip6_pkt_discard;
1307 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1309 goto install_route;
1310 }
1311
Thomas Graf86872cb2006-08-22 00:01:08 -07001312 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001313 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 int gwa_type;
1315
Thomas Graf86872cb2006-08-22 00:01:08 -07001316 gw_addr = &cfg->fc_gateway;
1317 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 gwa_type = ipv6_addr_type(gw_addr);
1319
1320 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1321 struct rt6_info *grt;
1322
1323 /* IPv6 strictly inhibits using not link-local
1324 addresses as nexthop address.
1325 Otherwise, router will not able to send redirects.
1326 It is very good, but in some (rare!) circumstances
1327 (SIT, PtP, NBMA NOARP links) it is handy to allow
1328 some exceptions. --ANK
1329 */
1330 err = -EINVAL;
1331 if (!(gwa_type&IPV6_ADDR_UNICAST))
1332 goto out;
1333
Daniel Lezcano55786892008-03-04 13:47:47 -08001334 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335
1336 err = -EHOSTUNREACH;
1337 if (grt == NULL)
1338 goto out;
1339 if (dev) {
1340 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001341 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 goto out;
1343 }
1344 } else {
1345 dev = grt->rt6i_dev;
1346 idev = grt->rt6i_idev;
1347 dev_hold(dev);
1348 in6_dev_hold(grt->rt6i_idev);
1349 }
1350 if (!(grt->rt6i_flags&RTF_GATEWAY))
1351 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001352 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353
1354 if (err)
1355 goto out;
1356 }
1357 err = -EINVAL;
1358 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1359 goto out;
1360 }
1361
1362 err = -ENODEV;
1363 if (dev == NULL)
1364 goto out;
1365
Daniel Walterc3968a82011-04-13 21:10:57 +00001366 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1367 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1368 err = -EINVAL;
1369 goto out;
1370 }
1371 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1372 rt->rt6i_prefsrc.plen = 128;
1373 } else
1374 rt->rt6i_prefsrc.plen = 0;
1375
Thomas Graf86872cb2006-08-22 00:01:08 -07001376 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001377 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1378 if (IS_ERR(n)) {
1379 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 goto out;
1381 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001382 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 }
1384
Thomas Graf86872cb2006-08-22 00:01:08 -07001385 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
1387install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001388 if (cfg->fc_mx) {
1389 struct nlattr *nla;
1390 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391
Thomas Graf86872cb2006-08-22 00:01:08 -07001392 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001393 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001394
1395 if (type) {
1396 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 err = -EINVAL;
1398 goto out;
1399 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001400
David S. Millerdefb3512010-12-08 21:16:57 -08001401 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 }
1404 }
1405
Changli Gaod8d1f302010-06-10 23:31:35 -07001406 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001408 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001409
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001410 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001411
Thomas Graf86872cb2006-08-22 00:01:08 -07001412 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
1414out:
1415 if (dev)
1416 dev_put(dev);
1417 if (idev)
1418 in6_dev_put(idev);
1419 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001420 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 return err;
1422}
1423
Thomas Graf86872cb2006-08-22 00:01:08 -07001424static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425{
1426 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001427 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001428 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001430 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001431 return -ENOENT;
1432
Thomas Grafc71099a2006-08-04 23:20:06 -07001433 table = rt->rt6i_table;
1434 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
Thomas Graf86872cb2006-08-22 00:01:08 -07001436 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001437 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438
Thomas Grafc71099a2006-08-04 23:20:06 -07001439 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440
1441 return err;
1442}
1443
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001444int ip6_del_rt(struct rt6_info *rt)
1445{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001446 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001447 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001448 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001449 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001450}
1451
Thomas Graf86872cb2006-08-22 00:01:08 -07001452static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453{
Thomas Grafc71099a2006-08-04 23:20:06 -07001454 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 struct fib6_node *fn;
1456 struct rt6_info *rt;
1457 int err = -ESRCH;
1458
Daniel Lezcano55786892008-03-04 13:47:47 -08001459 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001460 if (table == NULL)
1461 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462
Thomas Grafc71099a2006-08-04 23:20:06 -07001463 read_lock_bh(&table->tb6_lock);
1464
1465 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001466 &cfg->fc_dst, cfg->fc_dst_len,
1467 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001468
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001470 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001471 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001473 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001475 if (cfg->fc_flags & RTF_GATEWAY &&
1476 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001478 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001480 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001481 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482
Thomas Graf86872cb2006-08-22 00:01:08 -07001483 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 }
1485 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001486 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487
1488 return err;
1489}
1490
1491/*
1492 * Handle redirects
1493 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001494struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001495 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001496 struct in6_addr gateway;
1497};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001499static struct rt6_info *__ip6_route_redirect(struct net *net,
1500 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001501 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001502 int flags)
1503{
David S. Miller4c9483b2011-03-12 16:22:43 -05001504 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001505 struct rt6_info *rt;
1506 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001507
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001509 * Get the "current" route for this destination and
1510 * check if the redirect has come from approriate router.
1511 *
1512 * RFC 2461 specifies that redirects should only be
1513 * accepted if they come from the nexthop to the target.
1514 * Due to the way the routes are chosen, this notion
1515 * is a bit fuzzy and one might need to check all possible
1516 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518
Thomas Grafc71099a2006-08-04 23:20:06 -07001519 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001520 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001521restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001522 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001523 /*
1524 * Current route is on-link; redirect is always invalid.
1525 *
1526 * Seems, previous statement is not true. It could
1527 * be node, which looks for us as on-link (f.e. proxy ndisc)
1528 * But then router serving it might decide, that we should
1529 * know truth 8)8) --ANK (980726).
1530 */
1531 if (rt6_check_expired(rt))
1532 continue;
1533 if (!(rt->rt6i_flags & RTF_GATEWAY))
1534 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001535 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001536 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001537 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001538 continue;
1539 break;
1540 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001541
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001542 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001543 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001544 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001545out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001546 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001547
1548 read_unlock_bh(&table->tb6_lock);
1549
1550 return rt;
1551};
1552
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001553static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1554 const struct in6_addr *src,
1555 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001556 struct net_device *dev)
1557{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001558 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001559 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001560 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001561 .fl6 = {
1562 .flowi6_oif = dev->ifindex,
1563 .daddr = *dest,
1564 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001565 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001566 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001567
Brian Haley86c36ce2009-10-07 13:58:01 -07001568 ipv6_addr_copy(&rdfl.gateway, gateway);
1569
Thomas Grafadaa70b2006-10-13 15:01:03 -07001570 if (rt6_need_strict(dest))
1571 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001572
David S. Miller4c9483b2011-03-12 16:22:43 -05001573 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001574 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001575}
1576
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001577void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1578 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001579 struct neighbour *neigh, u8 *lladdr, int on_link)
1580{
1581 struct rt6_info *rt, *nrt = NULL;
1582 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001583 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001584
1585 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1586
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001587 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 if (net_ratelimit())
1589 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1590 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001591 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 }
1593
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 /*
1595 * We have finally decided to accept it.
1596 */
1597
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001598 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1600 NEIGH_UPDATE_F_OVERRIDE|
1601 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1602 NEIGH_UPDATE_F_ISROUTER))
1603 );
1604
1605 /*
1606 * Redirect received -> path was valid.
1607 * Look, redirects are sent only in response to data packets,
1608 * so that this nexthop apparently is reachable. --ANK
1609 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001610 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
1612 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001613 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 goto out;
1615
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001616 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 if (nrt == NULL)
1618 goto out;
1619
1620 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1621 if (on_link)
1622 nrt->rt6i_flags &= ~RTF_GATEWAY;
1623
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001625 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626
Thomas Graf40e22e82006-08-22 00:00:45 -07001627 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 goto out;
1629
Changli Gaod8d1f302010-06-10 23:31:35 -07001630 netevent.old = &rt->dst;
1631 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001632 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1633
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001635 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 return;
1637 }
1638
1639out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001640 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641}
1642
1643/*
1644 * Handle ICMP "packet too big" messages
1645 * i.e. Path MTU discovery
1646 */
1647
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001648static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001649 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650{
1651 struct rt6_info *rt, *nrt;
1652 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001653again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001654 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 if (rt == NULL)
1656 return;
1657
Andrey Vagind3052b52010-12-11 15:20:11 +00001658 if (rt6_check_expired(rt)) {
1659 ip6_del_rt(rt);
1660 goto again;
1661 }
1662
Changli Gaod8d1f302010-06-10 23:31:35 -07001663 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 goto out;
1665
1666 if (pmtu < IPV6_MIN_MTU) {
1667 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001668 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 * MTU (1280) and a fragment header should always be included
1670 * after a node receiving Too Big message reporting PMTU is
1671 * less than the IPv6 Minimum Link MTU.
1672 */
1673 pmtu = IPV6_MIN_MTU;
1674 allfrag = 1;
1675 }
1676
1677 /* New mtu received -> path was valid.
1678 They are sent only in response to data packets,
1679 so that this nexthop apparently is reachable. --ANK
1680 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001681 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
1683 /* Host route. If it is static, it would be better
1684 not to override it, but add new one, so that
1685 when cache entry will expire old pmtu
1686 would return automatically.
1687 */
1688 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001689 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1690 if (allfrag) {
1691 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1692 features |= RTAX_FEATURE_ALLFRAG;
1693 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1694 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001695 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1697 goto out;
1698 }
1699
1700 /* Network route.
1701 Two cases are possible:
1702 1. It is connected route. Action: COW
1703 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1704 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001705 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001706 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001707 else
1708 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001709
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001710 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001711 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1712 if (allfrag) {
1713 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1714 features |= RTAX_FEATURE_ALLFRAG;
1715 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1716 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001717
1718 /* According to RFC 1981, detecting PMTU increase shouldn't be
1719 * happened within 5 mins, the recommended timer is 10 mins.
1720 * Here this route expiration time is set to ip6_rt_mtu_expires
1721 * which is 10 mins. After 10 mins the decreased pmtu is expired
1722 * and detecting PMTU increase will be automatically happened.
1723 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001724 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001725 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1726
Thomas Graf40e22e82006-08-22 00:00:45 -07001727 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001730 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731}
1732
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001733void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001734 struct net_device *dev, u32 pmtu)
1735{
1736 struct net *net = dev_net(dev);
1737
1738 /*
1739 * RFC 1981 states that a node "MUST reduce the size of the packets it
1740 * is sending along the path" that caused the Packet Too Big message.
1741 * Since it's not possible in the general case to determine which
1742 * interface was used to send the original packet, we update the MTU
1743 * on the interface that will be used to send future packets. We also
1744 * update the MTU on the interface that received the Packet Too Big in
1745 * case the original packet was forced out that interface with
1746 * SO_BINDTODEVICE or similar. This is the next best thing to the
1747 * correct behaviour, which would be to update the MTU on all
1748 * interfaces.
1749 */
1750 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1751 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1752}
1753
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754/*
1755 * Misc support functions
1756 */
1757
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001758static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1759 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001761 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001762 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001763 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
1765 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001766 rt->dst.input = ort->dst.input;
1767 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001768 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001770 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001771 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001772 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001773 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 rt->rt6i_idev = ort->rt6i_idev;
1775 if (rt->rt6i_idev)
1776 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001777 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 rt->rt6i_expires = 0;
1779
1780 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1781 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1782 rt->rt6i_metric = 0;
1783
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784#ifdef CONFIG_IPV6_SUBTREES
1785 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1786#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001787 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001788 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 }
1790 return rt;
1791}
1792
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001793#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001794static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001795 const struct in6_addr *prefix, int prefixlen,
1796 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001797{
1798 struct fib6_node *fn;
1799 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001800 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001801
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001802 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001803 if (table == NULL)
1804 return NULL;
1805
1806 write_lock_bh(&table->tb6_lock);
1807 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001808 if (!fn)
1809 goto out;
1810
Changli Gaod8d1f302010-06-10 23:31:35 -07001811 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001812 if (rt->rt6i_dev->ifindex != ifindex)
1813 continue;
1814 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1815 continue;
1816 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1817 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001818 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001819 break;
1820 }
1821out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001822 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001823 return rt;
1824}
1825
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001826static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001827 const struct in6_addr *prefix, int prefixlen,
1828 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001829 unsigned pref)
1830{
Thomas Graf86872cb2006-08-22 00:01:08 -07001831 struct fib6_config cfg = {
1832 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001833 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001834 .fc_ifindex = ifindex,
1835 .fc_dst_len = prefixlen,
1836 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1837 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001838 .fc_nlinfo.pid = 0,
1839 .fc_nlinfo.nlh = NULL,
1840 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001841 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001842
Thomas Graf86872cb2006-08-22 00:01:08 -07001843 ipv6_addr_copy(&cfg.fc_dst, prefix);
1844 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1845
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001846 /* We should treat it as a default route if prefix length is 0. */
1847 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001848 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001849
Thomas Graf86872cb2006-08-22 00:01:08 -07001850 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001851
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001852 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001853}
1854#endif
1855
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001856struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001857{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001861 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001862 if (table == NULL)
1863 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864
Thomas Grafc71099a2006-08-04 23:20:06 -07001865 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001866 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001868 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1870 break;
1871 }
1872 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001873 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001874 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 return rt;
1876}
1877
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001878struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001879 struct net_device *dev,
1880 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881{
Thomas Graf86872cb2006-08-22 00:01:08 -07001882 struct fib6_config cfg = {
1883 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001884 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001885 .fc_ifindex = dev->ifindex,
1886 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1887 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001888 .fc_nlinfo.pid = 0,
1889 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001890 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001891 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892
Thomas Graf86872cb2006-08-22 00:01:08 -07001893 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894
Thomas Graf86872cb2006-08-22 00:01:08 -07001895 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 return rt6_get_dflt_router(gwaddr, dev);
1898}
1899
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001900void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901{
1902 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001903 struct fib6_table *table;
1904
1905 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001906 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001907 if (table == NULL)
1908 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909
1910restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001911 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001912 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001914 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001915 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001916 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 goto restart;
1918 }
1919 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001920 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921}
1922
Daniel Lezcano55786892008-03-04 13:47:47 -08001923static void rtmsg_to_fib6_config(struct net *net,
1924 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001925 struct fib6_config *cfg)
1926{
1927 memset(cfg, 0, sizeof(*cfg));
1928
1929 cfg->fc_table = RT6_TABLE_MAIN;
1930 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1931 cfg->fc_metric = rtmsg->rtmsg_metric;
1932 cfg->fc_expires = rtmsg->rtmsg_info;
1933 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1934 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1935 cfg->fc_flags = rtmsg->rtmsg_flags;
1936
Daniel Lezcano55786892008-03-04 13:47:47 -08001937 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001938
Thomas Graf86872cb2006-08-22 00:01:08 -07001939 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1940 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1941 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1942}
1943
Daniel Lezcano55786892008-03-04 13:47:47 -08001944int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945{
Thomas Graf86872cb2006-08-22 00:01:08 -07001946 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 struct in6_rtmsg rtmsg;
1948 int err;
1949
1950 switch(cmd) {
1951 case SIOCADDRT: /* Add a route */
1952 case SIOCDELRT: /* Delete a route */
1953 if (!capable(CAP_NET_ADMIN))
1954 return -EPERM;
1955 err = copy_from_user(&rtmsg, arg,
1956 sizeof(struct in6_rtmsg));
1957 if (err)
1958 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001959
Daniel Lezcano55786892008-03-04 13:47:47 -08001960 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001961
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 rtnl_lock();
1963 switch (cmd) {
1964 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001965 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 break;
1967 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001968 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 break;
1970 default:
1971 err = -EINVAL;
1972 }
1973 rtnl_unlock();
1974
1975 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001976 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977
1978 return -EINVAL;
1979}
1980
1981/*
1982 * Drop the packet on the floor
1983 */
1984
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001985static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001987 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001988 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001989 switch (ipstats_mib_noroutes) {
1990 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001991 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001992 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001993 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1994 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001995 break;
1996 }
1997 /* FALLTHROUGH */
1998 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001999 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2000 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002001 break;
2002 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002003 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 kfree_skb(skb);
2005 return 0;
2006}
2007
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002008static int ip6_pkt_discard(struct sk_buff *skb)
2009{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002010 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002011}
2012
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002013static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014{
Eric Dumazetadf30902009-06-02 05:19:30 +00002015 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002016 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017}
2018
David S. Miller6723ab52006-10-18 21:20:57 -07002019#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2020
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002021static int ip6_pkt_prohibit(struct sk_buff *skb)
2022{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002023 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002024}
2025
2026static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2027{
Eric Dumazetadf30902009-06-02 05:19:30 +00002028 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002029 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002030}
2031
David S. Miller6723ab52006-10-18 21:20:57 -07002032#endif
2033
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034/*
2035 * Allocate a dst for local (unicast / anycast) address.
2036 */
2037
2038struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2039 const struct in6_addr *addr,
2040 int anycast)
2041{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002042 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002043 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002044 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002045 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046
Ben Greear40385652010-11-08 12:33:48 +00002047 if (rt == NULL) {
2048 if (net_ratelimit())
2049 pr_warning("IPv6: Maximum number of routes reached,"
2050 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002052 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 in6_dev_hold(idev);
2055
David S. Miller11d53b42011-06-24 15:23:34 -07002056 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002057 rt->dst.input = ip6_input;
2058 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002060 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
2062 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002063 if (anycast)
2064 rt->rt6i_flags |= RTF_ANYCAST;
2065 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002067 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2068 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002069 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002070
David S. Miller29546a62011-03-03 12:10:37 -08002071 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002073 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074
2075 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2076 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002077 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078
Changli Gaod8d1f302010-06-10 23:31:35 -07002079 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080
2081 return rt;
2082}
2083
Daniel Walterc3968a82011-04-13 21:10:57 +00002084int ip6_route_get_saddr(struct net *net,
2085 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002086 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002087 unsigned int prefs,
2088 struct in6_addr *saddr)
2089{
2090 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2091 int err = 0;
2092 if (rt->rt6i_prefsrc.plen)
2093 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2094 else
2095 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2096 daddr, prefs, saddr);
2097 return err;
2098}
2099
2100/* remove deleted ip from prefsrc entries */
2101struct arg_dev_net_ip {
2102 struct net_device *dev;
2103 struct net *net;
2104 struct in6_addr *addr;
2105};
2106
2107static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2108{
2109 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2110 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2111 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2112
2113 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2114 rt != net->ipv6.ip6_null_entry &&
2115 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2116 /* remove prefsrc entry */
2117 rt->rt6i_prefsrc.plen = 0;
2118 }
2119 return 0;
2120}
2121
2122void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2123{
2124 struct net *net = dev_net(ifp->idev->dev);
2125 struct arg_dev_net_ip adni = {
2126 .dev = ifp->idev->dev,
2127 .net = net,
2128 .addr = &ifp->addr,
2129 };
2130 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2131}
2132
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002133struct arg_dev_net {
2134 struct net_device *dev;
2135 struct net *net;
2136};
2137
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138static int fib6_ifdown(struct rt6_info *rt, void *arg)
2139{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002140 const struct arg_dev_net *adn = arg;
2141 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002142
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002143 if ((rt->rt6i_dev == dev || dev == NULL) &&
2144 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 RT6_TRACE("deleted by ifdown %p\n", rt);
2146 return -1;
2147 }
2148 return 0;
2149}
2150
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002151void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002153 struct arg_dev_net adn = {
2154 .dev = dev,
2155 .net = net,
2156 };
2157
2158 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002159 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160}
2161
2162struct rt6_mtu_change_arg
2163{
2164 struct net_device *dev;
2165 unsigned mtu;
2166};
2167
2168static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2169{
2170 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2171 struct inet6_dev *idev;
2172
2173 /* In IPv6 pmtu discovery is not optional,
2174 so that RTAX_MTU lock cannot disable it.
2175 We still use this lock to block changes
2176 caused by addrconf/ndisc.
2177 */
2178
2179 idev = __in6_dev_get(arg->dev);
2180 if (idev == NULL)
2181 return 0;
2182
2183 /* For administrative MTU increase, there is no way to discover
2184 IPv6 PMTU increase, so PMTU increase should be updated here.
2185 Since RFC 1981 doesn't include administrative MTU increase
2186 update PMTU increase is a MUST. (i.e. jumbo frame)
2187 */
2188 /*
2189 If new MTU is less than route PMTU, this new MTU will be the
2190 lowest MTU in the path, update the route PMTU to reflect PMTU
2191 decreases; if new MTU is greater than route PMTU, and the
2192 old MTU is the lowest MTU in the path, update the route PMTU
2193 to reflect the increase. In this case if the other nodes' MTU
2194 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2195 PMTU discouvery.
2196 */
2197 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002198 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2199 (dst_mtu(&rt->dst) >= arg->mtu ||
2200 (dst_mtu(&rt->dst) < arg->mtu &&
2201 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002202 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002203 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 return 0;
2205}
2206
2207void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2208{
Thomas Grafc71099a2006-08-04 23:20:06 -07002209 struct rt6_mtu_change_arg arg = {
2210 .dev = dev,
2211 .mtu = mtu,
2212 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002214 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215}
2216
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002217static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002218 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002219 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002220 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002221 [RTA_PRIORITY] = { .type = NLA_U32 },
2222 [RTA_METRICS] = { .type = NLA_NESTED },
2223};
2224
2225static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2226 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227{
Thomas Graf86872cb2006-08-22 00:01:08 -07002228 struct rtmsg *rtm;
2229 struct nlattr *tb[RTA_MAX+1];
2230 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231
Thomas Graf86872cb2006-08-22 00:01:08 -07002232 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2233 if (err < 0)
2234 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235
Thomas Graf86872cb2006-08-22 00:01:08 -07002236 err = -EINVAL;
2237 rtm = nlmsg_data(nlh);
2238 memset(cfg, 0, sizeof(*cfg));
2239
2240 cfg->fc_table = rtm->rtm_table;
2241 cfg->fc_dst_len = rtm->rtm_dst_len;
2242 cfg->fc_src_len = rtm->rtm_src_len;
2243 cfg->fc_flags = RTF_UP;
2244 cfg->fc_protocol = rtm->rtm_protocol;
2245
2246 if (rtm->rtm_type == RTN_UNREACHABLE)
2247 cfg->fc_flags |= RTF_REJECT;
2248
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002249 if (rtm->rtm_type == RTN_LOCAL)
2250 cfg->fc_flags |= RTF_LOCAL;
2251
Thomas Graf86872cb2006-08-22 00:01:08 -07002252 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2253 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002254 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002255
2256 if (tb[RTA_GATEWAY]) {
2257 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2258 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002260
2261 if (tb[RTA_DST]) {
2262 int plen = (rtm->rtm_dst_len + 7) >> 3;
2263
2264 if (nla_len(tb[RTA_DST]) < plen)
2265 goto errout;
2266
2267 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002269
2270 if (tb[RTA_SRC]) {
2271 int plen = (rtm->rtm_src_len + 7) >> 3;
2272
2273 if (nla_len(tb[RTA_SRC]) < plen)
2274 goto errout;
2275
2276 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002278
Daniel Walterc3968a82011-04-13 21:10:57 +00002279 if (tb[RTA_PREFSRC])
2280 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2281
Thomas Graf86872cb2006-08-22 00:01:08 -07002282 if (tb[RTA_OIF])
2283 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2284
2285 if (tb[RTA_PRIORITY])
2286 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2287
2288 if (tb[RTA_METRICS]) {
2289 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2290 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002292
2293 if (tb[RTA_TABLE])
2294 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2295
2296 err = 0;
2297errout:
2298 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299}
2300
Thomas Grafc127ea22007-03-22 11:58:32 -07002301static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302{
Thomas Graf86872cb2006-08-22 00:01:08 -07002303 struct fib6_config cfg;
2304 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305
Thomas Graf86872cb2006-08-22 00:01:08 -07002306 err = rtm_to_fib6_config(skb, nlh, &cfg);
2307 if (err < 0)
2308 return err;
2309
2310 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311}
2312
Thomas Grafc127ea22007-03-22 11:58:32 -07002313static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314{
Thomas Graf86872cb2006-08-22 00:01:08 -07002315 struct fib6_config cfg;
2316 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317
Thomas Graf86872cb2006-08-22 00:01:08 -07002318 err = rtm_to_fib6_config(skb, nlh, &cfg);
2319 if (err < 0)
2320 return err;
2321
2322 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323}
2324
Thomas Graf339bf982006-11-10 14:10:15 -08002325static inline size_t rt6_nlmsg_size(void)
2326{
2327 return NLMSG_ALIGN(sizeof(struct rtmsg))
2328 + nla_total_size(16) /* RTA_SRC */
2329 + nla_total_size(16) /* RTA_DST */
2330 + nla_total_size(16) /* RTA_GATEWAY */
2331 + nla_total_size(16) /* RTA_PREFSRC */
2332 + nla_total_size(4) /* RTA_TABLE */
2333 + nla_total_size(4) /* RTA_IIF */
2334 + nla_total_size(4) /* RTA_OIF */
2335 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002336 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002337 + nla_total_size(sizeof(struct rta_cacheinfo));
2338}
2339
Brian Haley191cd582008-08-14 15:33:21 -07002340static int rt6_fill_node(struct net *net,
2341 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002342 struct in6_addr *dst, struct in6_addr *src,
2343 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002344 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345{
2346 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002347 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002348 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002349 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002350 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351
2352 if (prefix) { /* user wants prefix routes only */
2353 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2354 /* success since this is not a prefix route */
2355 return 1;
2356 }
2357 }
2358
Thomas Graf2d7202b2006-08-22 00:01:27 -07002359 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2360 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002361 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002362
2363 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364 rtm->rtm_family = AF_INET6;
2365 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2366 rtm->rtm_src_len = rt->rt6i_src.plen;
2367 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002368 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002369 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002370 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002371 table = RT6_TABLE_UNSPEC;
2372 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002373 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 if (rt->rt6i_flags&RTF_REJECT)
2375 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002376 else if (rt->rt6i_flags&RTF_LOCAL)
2377 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2379 rtm->rtm_type = RTN_LOCAL;
2380 else
2381 rtm->rtm_type = RTN_UNICAST;
2382 rtm->rtm_flags = 0;
2383 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2384 rtm->rtm_protocol = rt->rt6i_protocol;
2385 if (rt->rt6i_flags&RTF_DYNAMIC)
2386 rtm->rtm_protocol = RTPROT_REDIRECT;
2387 else if (rt->rt6i_flags & RTF_ADDRCONF)
2388 rtm->rtm_protocol = RTPROT_KERNEL;
2389 else if (rt->rt6i_flags&RTF_DEFAULT)
2390 rtm->rtm_protocol = RTPROT_RA;
2391
2392 if (rt->rt6i_flags&RTF_CACHE)
2393 rtm->rtm_flags |= RTM_F_CLONED;
2394
2395 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002396 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002397 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002399 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400#ifdef CONFIG_IPV6_SUBTREES
2401 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002402 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002403 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002405 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002407 if (iif) {
2408#ifdef CONFIG_IPV6_MROUTE
2409 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002410 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002411 if (err <= 0) {
2412 if (!nowait) {
2413 if (err == 0)
2414 return 0;
2415 goto nla_put_failure;
2416 } else {
2417 if (err == -EMSGSIZE)
2418 goto nla_put_failure;
2419 }
2420 }
2421 } else
2422#endif
2423 NLA_PUT_U32(skb, RTA_IIF, iif);
2424 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002426 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002427 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002429
Daniel Walterc3968a82011-04-13 21:10:57 +00002430 if (rt->rt6i_prefsrc.plen) {
2431 struct in6_addr saddr_buf;
2432 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2433 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2434 }
2435
David S. Millerdefb3512010-12-08 21:16:57 -08002436 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002437 goto nla_put_failure;
2438
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002439 rcu_read_lock();
2440 n = dst_get_neighbour(&rt->dst);
2441 if (n)
2442 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2443 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002444
Changli Gaod8d1f302010-06-10 23:31:35 -07002445 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002446 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2447
2448 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002449
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002450 if (!(rt->rt6i_flags & RTF_EXPIRES))
2451 expires = 0;
2452 else if (rt->rt6i_expires - jiffies < INT_MAX)
2453 expires = rt->rt6i_expires - jiffies;
2454 else
2455 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002456
Changli Gaod8d1f302010-06-10 23:31:35 -07002457 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2458 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002459 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460
Thomas Graf2d7202b2006-08-22 00:01:27 -07002461 return nlmsg_end(skb, nlh);
2462
2463nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002464 nlmsg_cancel(skb, nlh);
2465 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466}
2467
Patrick McHardy1b43af52006-08-10 23:11:17 -07002468int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469{
2470 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2471 int prefix;
2472
Thomas Graf2d7202b2006-08-22 00:01:27 -07002473 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2474 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2476 } else
2477 prefix = 0;
2478
Brian Haley191cd582008-08-14 15:33:21 -07002479 return rt6_fill_node(arg->net,
2480 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002482 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483}
2484
Thomas Grafc127ea22007-03-22 11:58:32 -07002485static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002487 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002488 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002490 struct sk_buff *skb;
2491 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002492 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002493 int err, iif = 0;
2494
2495 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2496 if (err < 0)
2497 goto errout;
2498
2499 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002500 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002501
2502 if (tb[RTA_SRC]) {
2503 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2504 goto errout;
2505
David S. Miller4c9483b2011-03-12 16:22:43 -05002506 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002507 }
2508
2509 if (tb[RTA_DST]) {
2510 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2511 goto errout;
2512
David S. Miller4c9483b2011-03-12 16:22:43 -05002513 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002514 }
2515
2516 if (tb[RTA_IIF])
2517 iif = nla_get_u32(tb[RTA_IIF]);
2518
2519 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002520 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002521
2522 if (iif) {
2523 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002524 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002525 if (!dev) {
2526 err = -ENODEV;
2527 goto errout;
2528 }
2529 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530
2531 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002532 if (skb == NULL) {
2533 err = -ENOBUFS;
2534 goto errout;
2535 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536
2537 /* Reserve room for dummy headers, this skb can pass
2538 through good chunk of routing engine.
2539 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002540 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2542
David S. Miller4c9483b2011-03-12 16:22:43 -05002543 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002544 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545
David S. Miller4c9483b2011-03-12 16:22:43 -05002546 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002548 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002550 kfree_skb(skb);
2551 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 }
2553
Daniel Lezcano55786892008-03-04 13:47:47 -08002554 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002555errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557}
2558
Thomas Graf86872cb2006-08-22 00:01:08 -07002559void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560{
2561 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002562 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002563 u32 seq;
2564 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002566 err = -ENOBUFS;
2567 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002568
Thomas Graf339bf982006-11-10 14:10:15 -08002569 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002570 if (skb == NULL)
2571 goto errout;
2572
Brian Haley191cd582008-08-14 15:33:21 -07002573 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002574 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002575 if (err < 0) {
2576 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2577 WARN_ON(err == -EMSGSIZE);
2578 kfree_skb(skb);
2579 goto errout;
2580 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002581 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2582 info->nlh, gfp_any());
2583 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002584errout:
2585 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002586 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587}
2588
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002589static int ip6_route_dev_notify(struct notifier_block *this,
2590 unsigned long event, void *data)
2591{
2592 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002593 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002594
2595 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002596 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002597 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2598#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002599 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002600 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002601 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002602 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2603#endif
2604 }
2605
2606 return NOTIFY_OK;
2607}
2608
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609/*
2610 * /proc
2611 */
2612
2613#ifdef CONFIG_PROC_FS
2614
Linus Torvalds1da177e2005-04-16 15:20:36 -07002615struct rt6_proc_arg
2616{
2617 char *buffer;
2618 int offset;
2619 int length;
2620 int skip;
2621 int len;
2622};
2623
2624static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2625{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002626 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002627 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002629 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630
2631#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002632 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002634 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002636 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002637 n = dst_get_neighbour(&rt->dst);
2638 if (n) {
2639 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002641 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002643 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002644 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002645 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2646 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002647 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648 return 0;
2649}
2650
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002651static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002653 struct net *net = (struct net *)m->private;
2654 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002655 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656}
2657
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002658static int ipv6_route_open(struct inode *inode, struct file *file)
2659{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002660 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002661}
2662
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002663static const struct file_operations ipv6_route_proc_fops = {
2664 .owner = THIS_MODULE,
2665 .open = ipv6_route_open,
2666 .read = seq_read,
2667 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002668 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002669};
2670
Linus Torvalds1da177e2005-04-16 15:20:36 -07002671static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2672{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002673 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002675 net->ipv6.rt6_stats->fib_nodes,
2676 net->ipv6.rt6_stats->fib_route_nodes,
2677 net->ipv6.rt6_stats->fib_rt_alloc,
2678 net->ipv6.rt6_stats->fib_rt_entries,
2679 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002680 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002681 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682
2683 return 0;
2684}
2685
2686static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2687{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002688 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002689}
2690
Arjan van de Ven9a321442007-02-12 00:55:35 -08002691static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692 .owner = THIS_MODULE,
2693 .open = rt6_stats_seq_open,
2694 .read = seq_read,
2695 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002696 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697};
2698#endif /* CONFIG_PROC_FS */
2699
2700#ifdef CONFIG_SYSCTL
2701
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002703int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704 void __user *buffer, size_t *lenp, loff_t *ppos)
2705{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002706 struct net *net;
2707 int delay;
2708 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002709 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002710
2711 net = (struct net *)ctl->extra1;
2712 delay = net->ipv6.sysctl.flush_delay;
2713 proc_dointvec(ctl, write, buffer, lenp, ppos);
2714 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2715 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716}
2717
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002718ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002719 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002720 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002721 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002723 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002724 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002725 },
2726 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002728 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 .maxlen = sizeof(int),
2730 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002731 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732 },
2733 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002734 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002735 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 .maxlen = sizeof(int),
2737 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002738 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002739 },
2740 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002742 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743 .maxlen = sizeof(int),
2744 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002745 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002746 },
2747 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002748 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002749 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750 .maxlen = sizeof(int),
2751 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002752 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753 },
2754 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002755 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002756 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 .maxlen = sizeof(int),
2758 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002759 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002760 },
2761 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002762 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002763 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 .maxlen = sizeof(int),
2765 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002766 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767 },
2768 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002769 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002770 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 .maxlen = sizeof(int),
2772 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002773 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002774 },
2775 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002777 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778 .maxlen = sizeof(int),
2779 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002780 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002781 },
2782 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002783 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002784 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 .maxlen = sizeof(int),
2786 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002787 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002789 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790};
2791
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002792struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002793{
2794 struct ctl_table *table;
2795
2796 table = kmemdup(ipv6_route_table_template,
2797 sizeof(ipv6_route_table_template),
2798 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002799
2800 if (table) {
2801 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002802 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002803 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002804 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2805 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2806 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2807 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2808 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2809 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2810 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002811 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002812 }
2813
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002814 return table;
2815}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002816#endif
2817
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002818static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002819{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002820 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002821
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002822 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2823 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002824
Eric Dumazetfc66f952010-10-08 06:37:34 +00002825 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2826 goto out_ip6_dst_ops;
2827
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002828 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2829 sizeof(*net->ipv6.ip6_null_entry),
2830 GFP_KERNEL);
2831 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002832 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002833 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002834 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002835 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002836 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2837 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002838
2839#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2840 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2841 sizeof(*net->ipv6.ip6_prohibit_entry),
2842 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002843 if (!net->ipv6.ip6_prohibit_entry)
2844 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002845 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002846 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002847 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002848 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2849 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002850
2851 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2852 sizeof(*net->ipv6.ip6_blk_hole_entry),
2853 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002854 if (!net->ipv6.ip6_blk_hole_entry)
2855 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002856 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002857 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002858 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002859 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2860 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002861#endif
2862
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002863 net->ipv6.sysctl.flush_delay = 0;
2864 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2865 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2866 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2867 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2868 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2869 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2870 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2871
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002872#ifdef CONFIG_PROC_FS
2873 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2874 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2875#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002876 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2877
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002878 ret = 0;
2879out:
2880 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002881
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002882#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2883out_ip6_prohibit_entry:
2884 kfree(net->ipv6.ip6_prohibit_entry);
2885out_ip6_null_entry:
2886 kfree(net->ipv6.ip6_null_entry);
2887#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002888out_ip6_dst_entries:
2889 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002890out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002891 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002892}
2893
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002894static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002895{
2896#ifdef CONFIG_PROC_FS
2897 proc_net_remove(net, "ipv6_route");
2898 proc_net_remove(net, "rt6_stats");
2899#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002900 kfree(net->ipv6.ip6_null_entry);
2901#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 kfree(net->ipv6.ip6_prohibit_entry);
2903 kfree(net->ipv6.ip6_blk_hole_entry);
2904#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002905 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002906}
2907
2908static struct pernet_operations ip6_route_net_ops = {
2909 .init = ip6_route_net_init,
2910 .exit = ip6_route_net_exit,
2911};
2912
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002913static struct notifier_block ip6_route_dev_notifier = {
2914 .notifier_call = ip6_route_dev_notify,
2915 .priority = 0,
2916};
2917
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002918int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002919{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002920 int ret;
2921
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002922 ret = -ENOMEM;
2923 ip6_dst_ops_template.kmem_cachep =
2924 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2925 SLAB_HWCACHE_ALIGN, NULL);
2926 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002927 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002928
Eric Dumazetfc66f952010-10-08 06:37:34 +00002929 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002930 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002931 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002932
Eric Dumazetfc66f952010-10-08 06:37:34 +00002933 ret = register_pernet_subsys(&ip6_route_net_ops);
2934 if (ret)
2935 goto out_dst_entries;
2936
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002937 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2938
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002939 /* Registering of the loopback is done before this portion of code,
2940 * the loopback reference in rt6_info will not be taken, do it
2941 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002942 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002943 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2944 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002945 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002946 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002947 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002948 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2949 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002950 ret = fib6_init();
2951 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002952 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002953
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002954 ret = xfrm6_init();
2955 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002956 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002957
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002958 ret = fib6_rules_init();
2959 if (ret)
2960 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002961
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002962 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002963 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2964 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2965 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002966 goto fib6_rules_init;
2967
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002968 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002969 if (ret)
2970 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002971
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002972out:
2973 return ret;
2974
2975fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002976 fib6_rules_cleanup();
2977xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002978 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002979out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002980 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002981out_register_subsys:
2982 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002983out_dst_entries:
2984 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002985out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002986 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002987 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002988}
2989
2990void ip6_route_cleanup(void)
2991{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002992 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002993 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002994 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002996 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002997 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002998 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002999}