blob: 9e69eb0ec6dd48a96d92bcf59c2cd89ad5e8412a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Eric Dumazet21efcfa2011-07-19 20:18:36 +000075static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080078static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080079static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080084static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080091#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080092static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000093 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080095 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080096static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000097 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080099#endif
100
David S. Miller06582542011-01-27 14:58:42 -0800101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
107 if (!rt->rt6i_peer)
108 rt6_bind_peer(rt, 1);
109
110 peer = rt->rt6i_peer;
111 if (peer) {
112 u32 *old_p = __DST_METRICS_PTR(old);
113 unsigned long prev, new;
114
115 p = peer->metrics;
116 if (inet_metrics_new(peer))
117 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118
119 new = (unsigned long) p;
120 prev = cmpxchg(&dst->_metrics, old, new);
121
122 if (prev != old) {
123 p = __DST_METRICS_PTR(prev);
124 if (prev & DST_METRICS_READ_ONLY)
125 p = NULL;
126 }
127 }
128 return p;
129}
130
David S. Millerd3aaeb32011-07-18 00:40:17 -0700131static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
132{
133 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
134}
135
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800136static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800138 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 .gc = ip6_dst_gc,
140 .gc_thresh = 1024,
141 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800142 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800143 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800144 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 .destroy = ip6_dst_destroy,
146 .ifdown = ip6_dst_ifdown,
147 .negative_advice = ip6_negative_advice,
148 .link_failure = ip6_link_failure,
149 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700150 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700151 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152};
153
Roland Dreierec831ea2011-01-31 13:16:00 -0800154static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
155{
156 return 0;
157}
158
David S. Miller14e50e52007-05-24 18:17:54 -0700159static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
160{
161}
162
Held Bernhard0972ddb2011-04-24 22:07:32 +0000163static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
164 unsigned long old)
165{
166 return NULL;
167}
168
David S. Miller14e50e52007-05-24 18:17:54 -0700169static struct dst_ops ip6_dst_blackhole_ops = {
170 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800171 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700172 .destroy = ip6_dst_destroy,
173 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800174 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800175 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700176 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000177 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700178 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700179};
180
David S. Miller62fa8a82011-01-26 20:51:05 -0800181static const u32 ip6_template_metrics[RTAX_MAX] = {
182 [RTAX_HOPLIMIT - 1] = 255,
183};
184
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800185static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700186 .dst = {
187 .__refcnt = ATOMIC_INIT(1),
188 .__use = 1,
189 .obsolete = -1,
190 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700191 .input = ip6_pkt_discard,
192 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 },
194 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700195 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 .rt6i_metric = ~(u32) 0,
197 .rt6i_ref = ATOMIC_INIT(1),
198};
199
Thomas Graf101367c2006-08-04 03:39:02 -0700200#ifdef CONFIG_IPV6_MULTIPLE_TABLES
201
David S. Miller6723ab52006-10-18 21:20:57 -0700202static int ip6_pkt_prohibit(struct sk_buff *skb);
203static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700204
Adrian Bunk280a34c2008-04-21 02:29:32 -0700205static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700211 .input = ip6_pkt_prohibit,
212 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700215 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800220static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700221 .dst = {
222 .__refcnt = ATOMIC_INIT(1),
223 .__use = 1,
224 .obsolete = -1,
225 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700226 .input = dst_discard,
227 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700228 },
229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700230 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700231 .rt6i_metric = ~(u32) 0,
232 .rt6i_ref = ATOMIC_INIT(1),
233};
234
235#endif
236
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700238static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700239 struct net_device *dev,
240 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241{
David S. Miller957c6652011-06-24 15:25:00 -0700242 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700243
244 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
245
246 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
249static void ip6_dst_destroy(struct dst_entry *dst)
250{
251 struct rt6_info *rt = (struct rt6_info *)dst;
252 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800253 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
255 if (idev != NULL) {
256 rt->rt6i_idev = NULL;
257 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900258 }
David S. Millerb3419362010-11-30 12:27:11 -0800259 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800260 rt->rt6i_peer = NULL;
261 inet_putpeer(peer);
262 }
263}
264
David S. Miller6431cbc2011-02-07 20:38:06 -0800265static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
266
267static u32 rt6_peer_genid(void)
268{
269 return atomic_read(&__rt6_peer_genid);
270}
271
David S. Millerb3419362010-11-30 12:27:11 -0800272void rt6_bind_peer(struct rt6_info *rt, int create)
273{
274 struct inet_peer *peer;
275
David S. Millerb3419362010-11-30 12:27:11 -0800276 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
277 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
278 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800279 else
280 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281}
282
283static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
284 int how)
285{
286 struct rt6_info *rt = (struct rt6_info *)dst;
287 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800288 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900289 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800291 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
292 struct inet6_dev *loopback_idev =
293 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 if (loopback_idev != NULL) {
295 rt->rt6i_idev = loopback_idev;
296 in6_dev_put(idev);
297 }
298 }
299}
300
301static __inline__ int rt6_check_expired(const struct rt6_info *rt)
302{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000303 return (rt->rt6i_flags & RTF_EXPIRES) &&
304 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305}
306
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000307static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700308{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000309 return ipv6_addr_type(daddr) &
310 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700311}
312
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700314 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 */
316
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800317static inline struct rt6_info *rt6_device_match(struct net *net,
318 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000319 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700321 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322{
323 struct rt6_info *local = NULL;
324 struct rt6_info *sprt;
325
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900326 if (!oif && ipv6_addr_any(saddr))
327 goto out;
328
Changli Gaod8d1f302010-06-10 23:31:35 -0700329 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900330 struct net_device *dev = sprt->rt6i_dev;
331
332 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 if (dev->ifindex == oif)
334 return sprt;
335 if (dev->flags & IFF_LOOPBACK) {
336 if (sprt->rt6i_idev == NULL ||
337 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700338 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900340 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 local->rt6i_idev->dev->ifindex == oif))
342 continue;
343 }
344 local = sprt;
345 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900346 } else {
347 if (ipv6_chk_addr(net, saddr, dev,
348 flags & RT6_LOOKUP_F_IFACE))
349 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900351 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900353 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 if (local)
355 return local;
356
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700357 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800358 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 return rt;
362}
363
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800364#ifdef CONFIG_IPV6_ROUTER_PREF
365static void rt6_probe(struct rt6_info *rt)
366{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000367 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800368 /*
369 * Okay, this does not seem to be appropriate
370 * for now, however, we need to check if it
371 * is really so; aka Router Reachability Probing.
372 *
373 * Router Reachability Probe MUST be rate-limited
374 * to no more than one per minute.
375 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000376 rcu_read_lock();
377 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800378 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000379 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800380 read_lock_bh(&neigh->lock);
381 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800382 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800383 struct in6_addr mcaddr;
384 struct in6_addr *target;
385
386 neigh->updated = jiffies;
387 read_unlock_bh(&neigh->lock);
388
389 target = (struct in6_addr *)&neigh->primary_key;
390 addrconf_addr_solict_mult(target, &mcaddr);
391 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000392 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800393 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000394 }
395out:
396 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800397}
398#else
399static inline void rt6_probe(struct rt6_info *rt)
400{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800401}
402#endif
403
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800405 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700407static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800409 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700410 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800411 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700412 if ((dev->flags & IFF_LOOPBACK) &&
413 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
414 return 1;
415 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416}
417
Dave Jonesb6f99a22007-03-22 12:27:49 -0700418static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000420 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800421 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000422
423 rcu_read_lock();
424 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700425 if (rt->rt6i_flags & RTF_NONEXTHOP ||
426 !(rt->rt6i_flags & RTF_GATEWAY))
427 m = 1;
428 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800429 read_lock_bh(&neigh->lock);
430 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700431 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800432#ifdef CONFIG_IPV6_ROUTER_PREF
433 else if (neigh->nud_state & NUD_FAILED)
434 m = 0;
435#endif
436 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800437 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800438 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800439 } else
440 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000441 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800442 return m;
443}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800445static int rt6_score_route(struct rt6_info *rt, int oif,
446 int strict)
447{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700448 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900449
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700450 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700451 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800452 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800453#ifdef CONFIG_IPV6_ROUTER_PREF
454 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
455#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700456 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800457 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800458 return -1;
459 return m;
460}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
David S. Millerf11e6652007-03-24 20:36:25 -0700462static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
463 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800464{
David S. Millerf11e6652007-03-24 20:36:25 -0700465 int m;
466
467 if (rt6_check_expired(rt))
468 goto out;
469
470 m = rt6_score_route(rt, oif, strict);
471 if (m < 0)
472 goto out;
473
474 if (m > *mpri) {
475 if (strict & RT6_LOOKUP_F_REACHABLE)
476 rt6_probe(match);
477 *mpri = m;
478 match = rt;
479 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
480 rt6_probe(rt);
481 }
482
483out:
484 return match;
485}
486
487static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
488 struct rt6_info *rr_head,
489 u32 metric, int oif, int strict)
490{
491 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800492 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
David S. Millerf11e6652007-03-24 20:36:25 -0700494 match = NULL;
495 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700496 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700497 match = find_match(rt, oif, strict, &mpri, match);
498 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700499 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700500 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800501
David S. Millerf11e6652007-03-24 20:36:25 -0700502 return match;
503}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800504
David S. Millerf11e6652007-03-24 20:36:25 -0700505static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
506{
507 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800508 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
David S. Millerf11e6652007-03-24 20:36:25 -0700510 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800511 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
David S. Millerf11e6652007-03-24 20:36:25 -0700513 rt0 = fn->rr_ptr;
514 if (!rt0)
515 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
David S. Millerf11e6652007-03-24 20:36:25 -0700517 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800519 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700520 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700521 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700522
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800523 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700524 if (!next || next->rt6i_metric != rt0->rt6i_metric)
525 next = fn->leaf;
526
527 if (next != rt0)
528 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 }
530
David S. Millerf11e6652007-03-24 20:36:25 -0700531 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800532 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900534 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000535 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536}
537
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800538#ifdef CONFIG_IPV6_ROUTE_INFO
539int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000540 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800541{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900542 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800543 struct route_info *rinfo = (struct route_info *) opt;
544 struct in6_addr prefix_buf, *prefix;
545 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900546 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800547 struct rt6_info *rt;
548
549 if (len < sizeof(struct route_info)) {
550 return -EINVAL;
551 }
552
553 /* Sanity check for prefix_len and length */
554 if (rinfo->length > 3) {
555 return -EINVAL;
556 } else if (rinfo->prefix_len > 128) {
557 return -EINVAL;
558 } else if (rinfo->prefix_len > 64) {
559 if (rinfo->length < 2) {
560 return -EINVAL;
561 }
562 } else if (rinfo->prefix_len > 0) {
563 if (rinfo->length < 1) {
564 return -EINVAL;
565 }
566 }
567
568 pref = rinfo->route_pref;
569 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000570 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800571
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900572 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800573
574 if (rinfo->length == 3)
575 prefix = (struct in6_addr *)rinfo->prefix;
576 else {
577 /* this function is safe */
578 ipv6_addr_prefix(&prefix_buf,
579 (struct in6_addr *)rinfo->prefix,
580 rinfo->prefix_len);
581 prefix = &prefix_buf;
582 }
583
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800584 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
585 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800586
587 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700588 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800589 rt = NULL;
590 }
591
592 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800593 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800594 pref);
595 else if (rt)
596 rt->rt6i_flags = RTF_ROUTEINFO |
597 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
598
599 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900600 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800601 rt->rt6i_flags &= ~RTF_EXPIRES;
602 } else {
603 rt->rt6i_expires = jiffies + HZ * lifetime;
604 rt->rt6i_flags |= RTF_EXPIRES;
605 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700606 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800607 }
608 return 0;
609}
610#endif
611
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800612#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700613do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800614 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700615 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700616 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700617 if (fn->fn_flags & RTN_TL_ROOT) \
618 goto out; \
619 pn = fn->parent; \
620 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800621 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700622 else \
623 fn = pn; \
624 if (fn->fn_flags & RTN_RTINFO) \
625 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700626 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700627 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700628} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700629
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800630static struct rt6_info *ip6_pol_route_lookup(struct net *net,
631 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500632 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633{
634 struct fib6_node *fn;
635 struct rt6_info *rt;
636
Thomas Grafc71099a2006-08-04 23:20:06 -0700637 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500638 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700639restart:
640 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500641 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
642 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700643out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700644 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700645 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700646 return rt;
647
648}
649
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900650struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
651 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700652{
David S. Miller4c9483b2011-03-12 16:22:43 -0500653 struct flowi6 fl6 = {
654 .flowi6_oif = oif,
655 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700656 };
657 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700658 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700659
Thomas Grafadaa70b2006-10-13 15:01:03 -0700660 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500661 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700662 flags |= RT6_LOOKUP_F_HAS_SADDR;
663 }
664
David S. Miller4c9483b2011-03-12 16:22:43 -0500665 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700666 if (dst->error == 0)
667 return (struct rt6_info *) dst;
668
669 dst_release(dst);
670
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 return NULL;
672}
673
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900674EXPORT_SYMBOL(rt6_lookup);
675
Thomas Grafc71099a2006-08-04 23:20:06 -0700676/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 It takes new route entry, the addition fails by any reason the
678 route is freed. In any case, if caller does not hold it, it may
679 be destroyed.
680 */
681
Thomas Graf86872cb2006-08-22 00:01:08 -0700682static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683{
684 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700685 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
Thomas Grafc71099a2006-08-04 23:20:06 -0700687 table = rt->rt6i_table;
688 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700689 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700690 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691
692 return err;
693}
694
Thomas Graf40e22e82006-08-22 00:00:45 -0700695int ip6_ins_rt(struct rt6_info *rt)
696{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800697 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900698 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800699 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800700 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700701}
702
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000703static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
704 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000705 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 struct rt6_info *rt;
708
709 /*
710 * Clone the route.
711 */
712
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000713 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800716 struct neighbour *neigh;
717 int attempts = !in_softirq();
718
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900719 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
720 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000721 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900722 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900724 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
726 rt->rt6i_dst.plen = 128;
727 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700728 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
730#ifdef CONFIG_IPV6_SUBTREES
731 if (rt->rt6i_src.plen && saddr) {
732 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
733 rt->rt6i_src.plen = 128;
734 }
735#endif
736
David S. Miller14deae42009-01-04 16:04:39 -0800737 retry:
738 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
739 if (IS_ERR(neigh)) {
740 struct net *net = dev_net(rt->rt6i_dev);
741 int saved_rt_min_interval =
742 net->ipv6.sysctl.ip6_rt_gc_min_interval;
743 int saved_rt_elasticity =
744 net->ipv6.sysctl.ip6_rt_gc_elasticity;
745
746 if (attempts-- > 0) {
747 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
748 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
749
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000750 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800751
752 net->ipv6.sysctl.ip6_rt_gc_elasticity =
753 saved_rt_elasticity;
754 net->ipv6.sysctl.ip6_rt_gc_min_interval =
755 saved_rt_min_interval;
756 goto retry;
757 }
758
759 if (net_ratelimit())
760 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700761 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700762 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800763 return NULL;
764 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700765 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800767 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800769 return rt;
770}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000772static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
773 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800774{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000775 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
776
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800777 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800778 rt->rt6i_dst.plen = 128;
779 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700780 rt->dst.flags |= DST_HOST;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000781 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800782 }
783 return rt;
784}
785
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800786static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500787 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788{
789 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800790 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700791 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800793 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700794 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700796 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
798relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700799 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800801restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500802 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
804restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700805 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800806
David S. Miller4c9483b2011-03-12 16:22:43 -0500807 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800808 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800809 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800810 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
Changli Gaod8d1f302010-06-10 23:31:35 -0700812 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700813 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800814
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000815 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500816 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800817 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500818 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800819 else
820 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800821
Changli Gaod8d1f302010-06-10 23:31:35 -0700822 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800823 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800824
Changli Gaod8d1f302010-06-10 23:31:35 -0700825 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800826 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700827 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800828 if (!err)
829 goto out2;
830 }
831
832 if (--attempts <= 0)
833 goto out2;
834
835 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700836 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800837 * released someone could insert this route. Relookup.
838 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700839 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800840 goto relookup;
841
842out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800843 if (reachable) {
844 reachable = 0;
845 goto restart_2;
846 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700847 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700848 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700850 rt->dst.lastuse = jiffies;
851 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700852
853 return rt;
854}
855
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800856static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500857 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700858{
David S. Miller4c9483b2011-03-12 16:22:43 -0500859 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700860}
861
Thomas Grafc71099a2006-08-04 23:20:06 -0700862void ip6_route_input(struct sk_buff *skb)
863{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000864 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900865 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700866 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500867 struct flowi6 fl6 = {
868 .flowi6_iif = skb->dev->ifindex,
869 .daddr = iph->daddr,
870 .saddr = iph->saddr,
871 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
872 .flowi6_mark = skb->mark,
873 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700874 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700875
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800876 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700877 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700878
David S. Miller4c9483b2011-03-12 16:22:43 -0500879 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700880}
881
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800882static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500883 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700884{
David S. Miller4c9483b2011-03-12 16:22:43 -0500885 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700886}
887
Florian Westphal9c7a4f9c2011-03-22 19:17:36 -0700888struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500889 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700890{
891 int flags = 0;
892
David S. Miller4c9483b2011-03-12 16:22:43 -0500893 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700894 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700895
David S. Miller4c9483b2011-03-12 16:22:43 -0500896 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700897 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000898 else if (sk)
899 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700900
David S. Miller4c9483b2011-03-12 16:22:43 -0500901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902}
903
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900904EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
David S. Miller2774c132011-03-01 14:59:04 -0800906struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700907{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700908 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700909 struct dst_entry *new = NULL;
910
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700911 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700912 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700913 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
914
Changli Gaod8d1f302010-06-10 23:31:35 -0700915 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700916
David S. Miller14e50e52007-05-24 18:17:54 -0700917 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800918 new->input = dst_discard;
919 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700920
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000921 if (dst_metrics_read_only(&ort->dst))
922 new->_metrics = ort->dst._metrics;
923 else
924 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700925 rt->rt6i_idev = ort->rt6i_idev;
926 if (rt->rt6i_idev)
927 in6_dev_hold(rt->rt6i_idev);
928 rt->rt6i_expires = 0;
929
930 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
931 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
932 rt->rt6i_metric = 0;
933
934 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
935#ifdef CONFIG_IPV6_SUBTREES
936 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
937#endif
938
939 dst_free(new);
940 }
941
David S. Miller69ead7a2011-03-01 14:45:33 -0800942 dst_release(dst_orig);
943 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700944}
David S. Miller14e50e52007-05-24 18:17:54 -0700945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946/*
947 * Destination cache support functions
948 */
949
950static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
951{
952 struct rt6_info *rt;
953
954 rt = (struct rt6_info *) dst;
955
David S. Miller6431cbc2011-02-07 20:38:06 -0800956 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
957 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
958 if (!rt->rt6i_peer)
959 rt6_bind_peer(rt, 0);
960 rt->rt6i_peer_genid = rt6_peer_genid();
961 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800963 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return NULL;
965}
966
967static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
968{
969 struct rt6_info *rt = (struct rt6_info *) dst;
970
971 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000972 if (rt->rt6i_flags & RTF_CACHE) {
973 if (rt6_check_expired(rt)) {
974 ip6_del_rt(rt);
975 dst = NULL;
976 }
977 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000979 dst = NULL;
980 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000982 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983}
984
985static void ip6_link_failure(struct sk_buff *skb)
986{
987 struct rt6_info *rt;
988
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000989 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990
Eric Dumazetadf30902009-06-02 05:19:30 +0000991 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 if (rt) {
993 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700994 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 rt->rt6i_flags |= RTF_EXPIRES;
996 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
997 rt->rt6i_node->fn_sernum = -1;
998 }
999}
1000
1001static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1002{
1003 struct rt6_info *rt6 = (struct rt6_info*)dst;
1004
1005 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1006 rt6->rt6i_flags |= RTF_MODIFIED;
1007 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001008 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001010 features |= RTAX_FEATURE_ALLFRAG;
1011 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 }
David S. Millerdefb3512010-12-08 21:16:57 -08001013 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 }
1015}
1016
David S. Miller0dbaee32010-12-13 12:52:14 -08001017static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018{
David S. Miller0dbaee32010-12-13 12:52:14 -08001019 struct net_device *dev = dst->dev;
1020 unsigned int mtu = dst_mtu(dst);
1021 struct net *net = dev_net(dev);
1022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1024
Daniel Lezcano55786892008-03-04 13:47:47 -08001025 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1026 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
1028 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001029 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1030 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1031 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 * rely only on pmtu discovery"
1033 */
1034 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1035 mtu = IPV6_MAXPLEN;
1036 return mtu;
1037}
1038
David S. Millerd33e4552010-12-14 13:01:14 -08001039static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1040{
1041 unsigned int mtu = IPV6_MIN_MTU;
1042 struct inet6_dev *idev;
1043
1044 rcu_read_lock();
1045 idev = __in6_dev_get(dst->dev);
1046 if (idev)
1047 mtu = idev->cnf.mtu6;
1048 rcu_read_unlock();
1049
1050 return mtu;
1051}
1052
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001053static struct dst_entry *icmp6_dst_gc_list;
1054static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001055
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001056struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001058 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059{
1060 struct rt6_info *rt;
1061 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001062 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063
1064 if (unlikely(idev == NULL))
1065 return NULL;
1066
David S. Miller957c6652011-06-24 15:25:00 -07001067 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 if (unlikely(rt == NULL)) {
1069 in6_dev_put(idev);
1070 goto out;
1071 }
1072
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 if (neigh)
1074 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001075 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001077 if (IS_ERR(neigh))
1078 neigh = NULL;
1079 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 rt->rt6i_idev = idev;
David S. Miller69cce1d2011-07-17 23:09:49 -07001082 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001083 atomic_set(&rt->dst.__refcnt, 1);
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001084 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
David S. Millerdefb3512010-12-08 21:16:57 -08001085 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Changli Gaod8d1f302010-06-10 23:31:35 -07001086 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001088 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001089 rt->dst.next = icmp6_dst_gc_list;
1090 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001091 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092
Daniel Lezcano55786892008-03-04 13:47:47 -08001093 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
1095out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001096 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097}
1098
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001099int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100{
Hagen Paul Pfeifere9476e952011-02-25 05:45:19 +00001101 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001102 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001104 spin_lock_bh(&icmp6_dst_lock);
1105 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001106
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 while ((dst = *pprev) != NULL) {
1108 if (!atomic_read(&dst->__refcnt)) {
1109 *pprev = dst->next;
1110 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 } else {
1112 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001113 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 }
1115 }
1116
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001117 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001118
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001119 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120}
1121
David S. Miller1e493d12008-09-10 17:27:15 -07001122static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1123 void *arg)
1124{
1125 struct dst_entry *dst, **pprev;
1126
1127 spin_lock_bh(&icmp6_dst_lock);
1128 pprev = &icmp6_dst_gc_list;
1129 while ((dst = *pprev) != NULL) {
1130 struct rt6_info *rt = (struct rt6_info *) dst;
1131 if (func(rt, arg)) {
1132 *pprev = dst->next;
1133 dst_free(dst);
1134 } else {
1135 pprev = &dst->next;
1136 }
1137 }
1138 spin_unlock_bh(&icmp6_dst_lock);
1139}
1140
Daniel Lezcano569d3642008-01-18 03:56:57 -08001141static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001144 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001145 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1146 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1147 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1148 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1149 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001150 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151
Eric Dumazetfc66f952010-10-08 06:37:34 +00001152 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001153 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001154 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 goto out;
1156
Benjamin Thery6891a342008-03-04 13:49:47 -08001157 net->ipv6.ip6_rt_gc_expire++;
1158 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1159 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001160 entries = dst_entries_get_slow(ops);
1161 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001162 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001164 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001165 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166}
1167
1168/* Clean host part of a prefix. Not necessary in radix tree,
1169 but results in cleaner routing tables.
1170
1171 Remove it only when all the things will work!
1172 */
1173
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001174int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175{
David S. Miller5170ae82010-12-12 21:35:57 -08001176 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001177 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001178 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001179 struct inet6_dev *idev;
1180
1181 rcu_read_lock();
1182 idev = __in6_dev_get(dev);
1183 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001184 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001185 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001186 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001187 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 }
1189 return hoplimit;
1190}
David S. Millerabbf46a2010-12-12 21:14:46 -08001191EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
1193/*
1194 *
1195 */
1196
Thomas Graf86872cb2006-08-22 00:01:08 -07001197int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198{
1199 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001200 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 struct rt6_info *rt = NULL;
1202 struct net_device *dev = NULL;
1203 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001204 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 int addr_type;
1206
Thomas Graf86872cb2006-08-22 00:01:08 -07001207 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 return -EINVAL;
1209#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001210 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 return -EINVAL;
1212#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001213 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001215 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 if (!dev)
1217 goto out;
1218 idev = in6_dev_get(dev);
1219 if (!idev)
1220 goto out;
1221 }
1222
Thomas Graf86872cb2006-08-22 00:01:08 -07001223 if (cfg->fc_metric == 0)
1224 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
Daniel Lezcano55786892008-03-04 13:47:47 -08001226 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001227 if (table == NULL) {
1228 err = -ENOBUFS;
1229 goto out;
1230 }
1231
David S. Miller957c6652011-06-24 15:25:00 -07001232 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
1234 if (rt == NULL) {
1235 err = -ENOMEM;
1236 goto out;
1237 }
1238
Changli Gaod8d1f302010-06-10 23:31:35 -07001239 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001240 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1241 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1242 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
Thomas Graf86872cb2006-08-22 00:01:08 -07001244 if (cfg->fc_protocol == RTPROT_UNSPEC)
1245 cfg->fc_protocol = RTPROT_BOOT;
1246 rt->rt6i_protocol = cfg->fc_protocol;
1247
1248 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
1250 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001251 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001252 else if (cfg->fc_flags & RTF_LOCAL)
1253 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001255 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
Changli Gaod8d1f302010-06-10 23:31:35 -07001257 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
Thomas Graf86872cb2006-08-22 00:01:08 -07001259 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1260 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001262 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
1264#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001265 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1266 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267#endif
1268
Thomas Graf86872cb2006-08-22 00:01:08 -07001269 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
1271 /* We cannot add true routes via loopback here,
1272 they would result in kernel looping; promote them to reject routes
1273 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001275 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1276 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001278 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 if (dev) {
1280 dev_put(dev);
1281 in6_dev_put(idev);
1282 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001283 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 dev_hold(dev);
1285 idev = in6_dev_get(dev);
1286 if (!idev) {
1287 err = -ENODEV;
1288 goto out;
1289 }
1290 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001291 rt->dst.output = ip6_pkt_discard_out;
1292 rt->dst.input = ip6_pkt_discard;
1293 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1295 goto install_route;
1296 }
1297
Thomas Graf86872cb2006-08-22 00:01:08 -07001298 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001299 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 int gwa_type;
1301
Thomas Graf86872cb2006-08-22 00:01:08 -07001302 gw_addr = &cfg->fc_gateway;
1303 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 gwa_type = ipv6_addr_type(gw_addr);
1305
1306 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1307 struct rt6_info *grt;
1308
1309 /* IPv6 strictly inhibits using not link-local
1310 addresses as nexthop address.
1311 Otherwise, router will not able to send redirects.
1312 It is very good, but in some (rare!) circumstances
1313 (SIT, PtP, NBMA NOARP links) it is handy to allow
1314 some exceptions. --ANK
1315 */
1316 err = -EINVAL;
1317 if (!(gwa_type&IPV6_ADDR_UNICAST))
1318 goto out;
1319
Daniel Lezcano55786892008-03-04 13:47:47 -08001320 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
1322 err = -EHOSTUNREACH;
1323 if (grt == NULL)
1324 goto out;
1325 if (dev) {
1326 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001327 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 goto out;
1329 }
1330 } else {
1331 dev = grt->rt6i_dev;
1332 idev = grt->rt6i_idev;
1333 dev_hold(dev);
1334 in6_dev_hold(grt->rt6i_idev);
1335 }
1336 if (!(grt->rt6i_flags&RTF_GATEWAY))
1337 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001338 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
1340 if (err)
1341 goto out;
1342 }
1343 err = -EINVAL;
1344 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1345 goto out;
1346 }
1347
1348 err = -ENODEV;
1349 if (dev == NULL)
1350 goto out;
1351
Daniel Walterc3968a82011-04-13 21:10:57 +00001352 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1353 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1354 err = -EINVAL;
1355 goto out;
1356 }
1357 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1358 rt->rt6i_prefsrc.plen = 128;
1359 } else
1360 rt->rt6i_prefsrc.plen = 0;
1361
Thomas Graf86872cb2006-08-22 00:01:08 -07001362 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001363 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1364 if (IS_ERR(n)) {
1365 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 goto out;
1367 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001368 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 }
1370
Thomas Graf86872cb2006-08-22 00:01:08 -07001371 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
1373install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001374 if (cfg->fc_mx) {
1375 struct nlattr *nla;
1376 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
Thomas Graf86872cb2006-08-22 00:01:08 -07001378 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001379 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001380
1381 if (type) {
1382 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 err = -EINVAL;
1384 goto out;
1385 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001386
David S. Millerdefb3512010-12-08 21:16:57 -08001387 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 }
1390 }
1391
Changli Gaod8d1f302010-06-10 23:31:35 -07001392 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001394 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001395
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001396 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001397
Thomas Graf86872cb2006-08-22 00:01:08 -07001398 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399
1400out:
1401 if (dev)
1402 dev_put(dev);
1403 if (idev)
1404 in6_dev_put(idev);
1405 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001406 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 return err;
1408}
1409
Thomas Graf86872cb2006-08-22 00:01:08 -07001410static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411{
1412 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001413 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001414 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001416 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001417 return -ENOENT;
1418
Thomas Grafc71099a2006-08-04 23:20:06 -07001419 table = rt->rt6i_table;
1420 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421
Thomas Graf86872cb2006-08-22 00:01:08 -07001422 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001423 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424
Thomas Grafc71099a2006-08-04 23:20:06 -07001425 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426
1427 return err;
1428}
1429
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001430int ip6_del_rt(struct rt6_info *rt)
1431{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001432 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001433 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001434 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001435 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001436}
1437
Thomas Graf86872cb2006-08-22 00:01:08 -07001438static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439{
Thomas Grafc71099a2006-08-04 23:20:06 -07001440 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 struct fib6_node *fn;
1442 struct rt6_info *rt;
1443 int err = -ESRCH;
1444
Daniel Lezcano55786892008-03-04 13:47:47 -08001445 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001446 if (table == NULL)
1447 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
Thomas Grafc71099a2006-08-04 23:20:06 -07001449 read_lock_bh(&table->tb6_lock);
1450
1451 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001452 &cfg->fc_dst, cfg->fc_dst_len,
1453 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001454
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001456 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001457 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001459 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001461 if (cfg->fc_flags & RTF_GATEWAY &&
1462 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001464 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001466 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001467 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
Thomas Graf86872cb2006-08-22 00:01:08 -07001469 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 }
1471 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001472 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
1474 return err;
1475}
1476
1477/*
1478 * Handle redirects
1479 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001480struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001481 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001482 struct in6_addr gateway;
1483};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001485static struct rt6_info *__ip6_route_redirect(struct net *net,
1486 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001487 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001488 int flags)
1489{
David S. Miller4c9483b2011-03-12 16:22:43 -05001490 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001491 struct rt6_info *rt;
1492 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001493
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001495 * Get the "current" route for this destination and
1496 * check if the redirect has come from approriate router.
1497 *
1498 * RFC 2461 specifies that redirects should only be
1499 * accepted if they come from the nexthop to the target.
1500 * Due to the way the routes are chosen, this notion
1501 * is a bit fuzzy and one might need to check all possible
1502 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
Thomas Grafc71099a2006-08-04 23:20:06 -07001505 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001506 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001507restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001508 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001509 /*
1510 * Current route is on-link; redirect is always invalid.
1511 *
1512 * Seems, previous statement is not true. It could
1513 * be node, which looks for us as on-link (f.e. proxy ndisc)
1514 * But then router serving it might decide, that we should
1515 * know truth 8)8) --ANK (980726).
1516 */
1517 if (rt6_check_expired(rt))
1518 continue;
1519 if (!(rt->rt6i_flags & RTF_GATEWAY))
1520 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001521 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001522 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001523 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001524 continue;
1525 break;
1526 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001527
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001528 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001529 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001530 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001531out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001532 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001533
1534 read_unlock_bh(&table->tb6_lock);
1535
1536 return rt;
1537};
1538
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001539static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1540 const struct in6_addr *src,
1541 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001542 struct net_device *dev)
1543{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001544 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001545 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001546 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001547 .fl6 = {
1548 .flowi6_oif = dev->ifindex,
1549 .daddr = *dest,
1550 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001551 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001552 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001553
Brian Haley86c36ce2009-10-07 13:58:01 -07001554 ipv6_addr_copy(&rdfl.gateway, gateway);
1555
Thomas Grafadaa70b2006-10-13 15:01:03 -07001556 if (rt6_need_strict(dest))
1557 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001558
David S. Miller4c9483b2011-03-12 16:22:43 -05001559 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001560 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001561}
1562
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001563void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1564 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001565 struct neighbour *neigh, u8 *lladdr, int on_link)
1566{
1567 struct rt6_info *rt, *nrt = NULL;
1568 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001569 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001570
1571 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1572
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001573 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 if (net_ratelimit())
1575 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1576 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001577 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 }
1579
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 /*
1581 * We have finally decided to accept it.
1582 */
1583
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001584 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1586 NEIGH_UPDATE_F_OVERRIDE|
1587 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1588 NEIGH_UPDATE_F_ISROUTER))
1589 );
1590
1591 /*
1592 * Redirect received -> path was valid.
1593 * Look, redirects are sent only in response to data packets,
1594 * so that this nexthop apparently is reachable. --ANK
1595 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001596 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597
1598 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001599 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 goto out;
1601
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001602 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 if (nrt == NULL)
1604 goto out;
1605
1606 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1607 if (on_link)
1608 nrt->rt6i_flags &= ~RTF_GATEWAY;
1609
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001611 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001614 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615
Thomas Graf40e22e82006-08-22 00:00:45 -07001616 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 goto out;
1618
Changli Gaod8d1f302010-06-10 23:31:35 -07001619 netevent.old = &rt->dst;
1620 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001621 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1622
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001624 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 return;
1626 }
1627
1628out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001629 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630}
1631
1632/*
1633 * Handle ICMP "packet too big" messages
1634 * i.e. Path MTU discovery
1635 */
1636
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001637static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001638 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639{
1640 struct rt6_info *rt, *nrt;
1641 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001642again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001643 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 if (rt == NULL)
1645 return;
1646
Andrey Vagind3052b52010-12-11 15:20:11 +00001647 if (rt6_check_expired(rt)) {
1648 ip6_del_rt(rt);
1649 goto again;
1650 }
1651
Changli Gaod8d1f302010-06-10 23:31:35 -07001652 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 goto out;
1654
1655 if (pmtu < IPV6_MIN_MTU) {
1656 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001657 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 * MTU (1280) and a fragment header should always be included
1659 * after a node receiving Too Big message reporting PMTU is
1660 * less than the IPv6 Minimum Link MTU.
1661 */
1662 pmtu = IPV6_MIN_MTU;
1663 allfrag = 1;
1664 }
1665
1666 /* New mtu received -> path was valid.
1667 They are sent only in response to data packets,
1668 so that this nexthop apparently is reachable. --ANK
1669 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001670 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671
1672 /* Host route. If it is static, it would be better
1673 not to override it, but add new one, so that
1674 when cache entry will expire old pmtu
1675 would return automatically.
1676 */
1677 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001678 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1679 if (allfrag) {
1680 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1681 features |= RTAX_FEATURE_ALLFRAG;
1682 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1683 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001684 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1686 goto out;
1687 }
1688
1689 /* Network route.
1690 Two cases are possible:
1691 1. It is connected route. Action: COW
1692 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1693 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001694 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001695 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001696 else
1697 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001698
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001699 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001700 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1701 if (allfrag) {
1702 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1703 features |= RTAX_FEATURE_ALLFRAG;
1704 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1705 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001706
1707 /* According to RFC 1981, detecting PMTU increase shouldn't be
1708 * happened within 5 mins, the recommended timer is 10 mins.
1709 * Here this route expiration time is set to ip6_rt_mtu_expires
1710 * which is 10 mins. After 10 mins the decreased pmtu is expired
1711 * and detecting PMTU increase will be automatically happened.
1712 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001713 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001714 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1715
Thomas Graf40e22e82006-08-22 00:00:45 -07001716 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001719 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720}
1721
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001722void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001723 struct net_device *dev, u32 pmtu)
1724{
1725 struct net *net = dev_net(dev);
1726
1727 /*
1728 * RFC 1981 states that a node "MUST reduce the size of the packets it
1729 * is sending along the path" that caused the Packet Too Big message.
1730 * Since it's not possible in the general case to determine which
1731 * interface was used to send the original packet, we update the MTU
1732 * on the interface that will be used to send future packets. We also
1733 * update the MTU on the interface that received the Packet Too Big in
1734 * case the original packet was forced out that interface with
1735 * SO_BINDTODEVICE or similar. This is the next best thing to the
1736 * correct behaviour, which would be to update the MTU on all
1737 * interfaces.
1738 */
1739 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1740 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1741}
1742
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743/*
1744 * Misc support functions
1745 */
1746
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001747static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1748 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001750 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001751 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001752 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753
1754 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001755 rt->dst.input = ort->dst.input;
1756 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001758 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1759 rt->rt6i_dst.plen = ort->rt6i_dst.plen;
David S. Millerdefb3512010-12-08 21:16:57 -08001760 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001761 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 rt->rt6i_idev = ort->rt6i_idev;
1763 if (rt->rt6i_idev)
1764 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001765 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766 rt->rt6i_expires = 0;
1767
1768 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1769 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1770 rt->rt6i_metric = 0;
1771
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772#ifdef CONFIG_IPV6_SUBTREES
1773 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1774#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001775 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001776 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 }
1778 return rt;
1779}
1780
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001781#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001782static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001783 const struct in6_addr *prefix, int prefixlen,
1784 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001785{
1786 struct fib6_node *fn;
1787 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001788 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001789
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001790 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001791 if (table == NULL)
1792 return NULL;
1793
1794 write_lock_bh(&table->tb6_lock);
1795 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001796 if (!fn)
1797 goto out;
1798
Changli Gaod8d1f302010-06-10 23:31:35 -07001799 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001800 if (rt->rt6i_dev->ifindex != ifindex)
1801 continue;
1802 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1803 continue;
1804 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1805 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001806 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001807 break;
1808 }
1809out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001810 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001811 return rt;
1812}
1813
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001814static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001815 const struct in6_addr *prefix, int prefixlen,
1816 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001817 unsigned pref)
1818{
Thomas Graf86872cb2006-08-22 00:01:08 -07001819 struct fib6_config cfg = {
1820 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001821 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001822 .fc_ifindex = ifindex,
1823 .fc_dst_len = prefixlen,
1824 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1825 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001826 .fc_nlinfo.pid = 0,
1827 .fc_nlinfo.nlh = NULL,
1828 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001829 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001830
Thomas Graf86872cb2006-08-22 00:01:08 -07001831 ipv6_addr_copy(&cfg.fc_dst, prefix);
1832 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1833
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001834 /* We should treat it as a default route if prefix length is 0. */
1835 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001836 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001837
Thomas Graf86872cb2006-08-22 00:01:08 -07001838 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001839
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001840 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001841}
1842#endif
1843
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001844struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001845{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001847 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001849 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001850 if (table == NULL)
1851 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852
Thomas Grafc71099a2006-08-04 23:20:06 -07001853 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001854 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001856 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1858 break;
1859 }
1860 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001861 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001862 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 return rt;
1864}
1865
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001866struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001867 struct net_device *dev,
1868 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869{
Thomas Graf86872cb2006-08-22 00:01:08 -07001870 struct fib6_config cfg = {
1871 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001872 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001873 .fc_ifindex = dev->ifindex,
1874 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1875 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001876 .fc_nlinfo.pid = 0,
1877 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001878 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001879 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880
Thomas Graf86872cb2006-08-22 00:01:08 -07001881 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
Thomas Graf86872cb2006-08-22 00:01:08 -07001883 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 return rt6_get_dflt_router(gwaddr, dev);
1886}
1887
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001888void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889{
1890 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001891 struct fib6_table *table;
1892
1893 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001894 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001895 if (table == NULL)
1896 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897
1898restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001899 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001900 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001902 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001903 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001904 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 goto restart;
1906 }
1907 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001908 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909}
1910
Daniel Lezcano55786892008-03-04 13:47:47 -08001911static void rtmsg_to_fib6_config(struct net *net,
1912 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001913 struct fib6_config *cfg)
1914{
1915 memset(cfg, 0, sizeof(*cfg));
1916
1917 cfg->fc_table = RT6_TABLE_MAIN;
1918 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1919 cfg->fc_metric = rtmsg->rtmsg_metric;
1920 cfg->fc_expires = rtmsg->rtmsg_info;
1921 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1922 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1923 cfg->fc_flags = rtmsg->rtmsg_flags;
1924
Daniel Lezcano55786892008-03-04 13:47:47 -08001925 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001926
Thomas Graf86872cb2006-08-22 00:01:08 -07001927 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1928 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1929 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1930}
1931
Daniel Lezcano55786892008-03-04 13:47:47 -08001932int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933{
Thomas Graf86872cb2006-08-22 00:01:08 -07001934 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 struct in6_rtmsg rtmsg;
1936 int err;
1937
1938 switch(cmd) {
1939 case SIOCADDRT: /* Add a route */
1940 case SIOCDELRT: /* Delete a route */
1941 if (!capable(CAP_NET_ADMIN))
1942 return -EPERM;
1943 err = copy_from_user(&rtmsg, arg,
1944 sizeof(struct in6_rtmsg));
1945 if (err)
1946 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001947
Daniel Lezcano55786892008-03-04 13:47:47 -08001948 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001949
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 rtnl_lock();
1951 switch (cmd) {
1952 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001953 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 break;
1955 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001956 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 break;
1958 default:
1959 err = -EINVAL;
1960 }
1961 rtnl_unlock();
1962
1963 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001964 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965
1966 return -EINVAL;
1967}
1968
1969/*
1970 * Drop the packet on the floor
1971 */
1972
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001973static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001975 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001976 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001977 switch (ipstats_mib_noroutes) {
1978 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001979 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001980 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001981 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1982 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001983 break;
1984 }
1985 /* FALLTHROUGH */
1986 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001987 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1988 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001989 break;
1990 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001991 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 kfree_skb(skb);
1993 return 0;
1994}
1995
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001996static int ip6_pkt_discard(struct sk_buff *skb)
1997{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001998 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001999}
2000
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002001static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002{
Eric Dumazetadf30902009-06-02 05:19:30 +00002003 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002004 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005}
2006
David S. Miller6723ab52006-10-18 21:20:57 -07002007#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2008
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002009static int ip6_pkt_prohibit(struct sk_buff *skb)
2010{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002011 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002012}
2013
2014static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2015{
Eric Dumazetadf30902009-06-02 05:19:30 +00002016 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002017 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002018}
2019
David S. Miller6723ab52006-10-18 21:20:57 -07002020#endif
2021
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022/*
2023 * Allocate a dst for local (unicast / anycast) address.
2024 */
2025
2026struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2027 const struct in6_addr *addr,
2028 int anycast)
2029{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002030 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002031 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002032 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002033 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034
Ben Greear40385652010-11-08 12:33:48 +00002035 if (rt == NULL) {
2036 if (net_ratelimit())
2037 pr_warning("IPv6: Maximum number of routes reached,"
2038 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002040 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 in6_dev_hold(idev);
2043
David S. Miller11d53b42011-06-24 15:23:34 -07002044 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002045 rt->dst.input = ip6_input;
2046 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002048 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049
2050 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002051 if (anycast)
2052 rt->rt6i_flags |= RTF_ANYCAST;
2053 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002055 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2056 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002057 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002058
David S. Miller29546a62011-03-03 12:10:37 -08002059 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002061 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
2063 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2064 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002065 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066
Changli Gaod8d1f302010-06-10 23:31:35 -07002067 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068
2069 return rt;
2070}
2071
Daniel Walterc3968a82011-04-13 21:10:57 +00002072int ip6_route_get_saddr(struct net *net,
2073 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002074 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002075 unsigned int prefs,
2076 struct in6_addr *saddr)
2077{
2078 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2079 int err = 0;
2080 if (rt->rt6i_prefsrc.plen)
2081 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2082 else
2083 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2084 daddr, prefs, saddr);
2085 return err;
2086}
2087
2088/* remove deleted ip from prefsrc entries */
2089struct arg_dev_net_ip {
2090 struct net_device *dev;
2091 struct net *net;
2092 struct in6_addr *addr;
2093};
2094
2095static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2096{
2097 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2098 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2099 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2100
2101 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2102 rt != net->ipv6.ip6_null_entry &&
2103 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2104 /* remove prefsrc entry */
2105 rt->rt6i_prefsrc.plen = 0;
2106 }
2107 return 0;
2108}
2109
2110void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2111{
2112 struct net *net = dev_net(ifp->idev->dev);
2113 struct arg_dev_net_ip adni = {
2114 .dev = ifp->idev->dev,
2115 .net = net,
2116 .addr = &ifp->addr,
2117 };
2118 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2119}
2120
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002121struct arg_dev_net {
2122 struct net_device *dev;
2123 struct net *net;
2124};
2125
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126static int fib6_ifdown(struct rt6_info *rt, void *arg)
2127{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002128 const struct arg_dev_net *adn = arg;
2129 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002130
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002131 if ((rt->rt6i_dev == dev || dev == NULL) &&
2132 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 RT6_TRACE("deleted by ifdown %p\n", rt);
2134 return -1;
2135 }
2136 return 0;
2137}
2138
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002139void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002141 struct arg_dev_net adn = {
2142 .dev = dev,
2143 .net = net,
2144 };
2145
2146 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002147 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148}
2149
2150struct rt6_mtu_change_arg
2151{
2152 struct net_device *dev;
2153 unsigned mtu;
2154};
2155
2156static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2157{
2158 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2159 struct inet6_dev *idev;
2160
2161 /* In IPv6 pmtu discovery is not optional,
2162 so that RTAX_MTU lock cannot disable it.
2163 We still use this lock to block changes
2164 caused by addrconf/ndisc.
2165 */
2166
2167 idev = __in6_dev_get(arg->dev);
2168 if (idev == NULL)
2169 return 0;
2170
2171 /* For administrative MTU increase, there is no way to discover
2172 IPv6 PMTU increase, so PMTU increase should be updated here.
2173 Since RFC 1981 doesn't include administrative MTU increase
2174 update PMTU increase is a MUST. (i.e. jumbo frame)
2175 */
2176 /*
2177 If new MTU is less than route PMTU, this new MTU will be the
2178 lowest MTU in the path, update the route PMTU to reflect PMTU
2179 decreases; if new MTU is greater than route PMTU, and the
2180 old MTU is the lowest MTU in the path, update the route PMTU
2181 to reflect the increase. In this case if the other nodes' MTU
2182 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2183 PMTU discouvery.
2184 */
2185 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002186 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2187 (dst_mtu(&rt->dst) >= arg->mtu ||
2188 (dst_mtu(&rt->dst) < arg->mtu &&
2189 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002190 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002191 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 return 0;
2193}
2194
2195void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2196{
Thomas Grafc71099a2006-08-04 23:20:06 -07002197 struct rt6_mtu_change_arg arg = {
2198 .dev = dev,
2199 .mtu = mtu,
2200 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002202 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203}
2204
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002205static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002206 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002207 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002208 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002209 [RTA_PRIORITY] = { .type = NLA_U32 },
2210 [RTA_METRICS] = { .type = NLA_NESTED },
2211};
2212
2213static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2214 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215{
Thomas Graf86872cb2006-08-22 00:01:08 -07002216 struct rtmsg *rtm;
2217 struct nlattr *tb[RTA_MAX+1];
2218 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
Thomas Graf86872cb2006-08-22 00:01:08 -07002220 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2221 if (err < 0)
2222 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223
Thomas Graf86872cb2006-08-22 00:01:08 -07002224 err = -EINVAL;
2225 rtm = nlmsg_data(nlh);
2226 memset(cfg, 0, sizeof(*cfg));
2227
2228 cfg->fc_table = rtm->rtm_table;
2229 cfg->fc_dst_len = rtm->rtm_dst_len;
2230 cfg->fc_src_len = rtm->rtm_src_len;
2231 cfg->fc_flags = RTF_UP;
2232 cfg->fc_protocol = rtm->rtm_protocol;
2233
2234 if (rtm->rtm_type == RTN_UNREACHABLE)
2235 cfg->fc_flags |= RTF_REJECT;
2236
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002237 if (rtm->rtm_type == RTN_LOCAL)
2238 cfg->fc_flags |= RTF_LOCAL;
2239
Thomas Graf86872cb2006-08-22 00:01:08 -07002240 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2241 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002242 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002243
2244 if (tb[RTA_GATEWAY]) {
2245 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2246 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002248
2249 if (tb[RTA_DST]) {
2250 int plen = (rtm->rtm_dst_len + 7) >> 3;
2251
2252 if (nla_len(tb[RTA_DST]) < plen)
2253 goto errout;
2254
2255 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002257
2258 if (tb[RTA_SRC]) {
2259 int plen = (rtm->rtm_src_len + 7) >> 3;
2260
2261 if (nla_len(tb[RTA_SRC]) < plen)
2262 goto errout;
2263
2264 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002266
Daniel Walterc3968a82011-04-13 21:10:57 +00002267 if (tb[RTA_PREFSRC])
2268 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2269
Thomas Graf86872cb2006-08-22 00:01:08 -07002270 if (tb[RTA_OIF])
2271 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2272
2273 if (tb[RTA_PRIORITY])
2274 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2275
2276 if (tb[RTA_METRICS]) {
2277 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2278 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002280
2281 if (tb[RTA_TABLE])
2282 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2283
2284 err = 0;
2285errout:
2286 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287}
2288
Thomas Grafc127ea22007-03-22 11:58:32 -07002289static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290{
Thomas Graf86872cb2006-08-22 00:01:08 -07002291 struct fib6_config cfg;
2292 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293
Thomas Graf86872cb2006-08-22 00:01:08 -07002294 err = rtm_to_fib6_config(skb, nlh, &cfg);
2295 if (err < 0)
2296 return err;
2297
2298 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299}
2300
Thomas Grafc127ea22007-03-22 11:58:32 -07002301static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302{
Thomas Graf86872cb2006-08-22 00:01:08 -07002303 struct fib6_config cfg;
2304 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305
Thomas Graf86872cb2006-08-22 00:01:08 -07002306 err = rtm_to_fib6_config(skb, nlh, &cfg);
2307 if (err < 0)
2308 return err;
2309
2310 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311}
2312
Thomas Graf339bf982006-11-10 14:10:15 -08002313static inline size_t rt6_nlmsg_size(void)
2314{
2315 return NLMSG_ALIGN(sizeof(struct rtmsg))
2316 + nla_total_size(16) /* RTA_SRC */
2317 + nla_total_size(16) /* RTA_DST */
2318 + nla_total_size(16) /* RTA_GATEWAY */
2319 + nla_total_size(16) /* RTA_PREFSRC */
2320 + nla_total_size(4) /* RTA_TABLE */
2321 + nla_total_size(4) /* RTA_IIF */
2322 + nla_total_size(4) /* RTA_OIF */
2323 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002324 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002325 + nla_total_size(sizeof(struct rta_cacheinfo));
2326}
2327
Brian Haley191cd582008-08-14 15:33:21 -07002328static int rt6_fill_node(struct net *net,
2329 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002330 struct in6_addr *dst, struct in6_addr *src,
2331 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002332 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333{
2334 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002335 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002336 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002337 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002338 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339
2340 if (prefix) { /* user wants prefix routes only */
2341 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2342 /* success since this is not a prefix route */
2343 return 1;
2344 }
2345 }
2346
Thomas Graf2d7202b2006-08-22 00:01:27 -07002347 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2348 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002349 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002350
2351 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 rtm->rtm_family = AF_INET6;
2353 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2354 rtm->rtm_src_len = rt->rt6i_src.plen;
2355 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002356 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002357 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002358 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002359 table = RT6_TABLE_UNSPEC;
2360 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002361 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 if (rt->rt6i_flags&RTF_REJECT)
2363 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002364 else if (rt->rt6i_flags&RTF_LOCAL)
2365 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2367 rtm->rtm_type = RTN_LOCAL;
2368 else
2369 rtm->rtm_type = RTN_UNICAST;
2370 rtm->rtm_flags = 0;
2371 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2372 rtm->rtm_protocol = rt->rt6i_protocol;
2373 if (rt->rt6i_flags&RTF_DYNAMIC)
2374 rtm->rtm_protocol = RTPROT_REDIRECT;
2375 else if (rt->rt6i_flags & RTF_ADDRCONF)
2376 rtm->rtm_protocol = RTPROT_KERNEL;
2377 else if (rt->rt6i_flags&RTF_DEFAULT)
2378 rtm->rtm_protocol = RTPROT_RA;
2379
2380 if (rt->rt6i_flags&RTF_CACHE)
2381 rtm->rtm_flags |= RTM_F_CLONED;
2382
2383 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002384 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002385 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002387 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388#ifdef CONFIG_IPV6_SUBTREES
2389 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002390 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002391 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002393 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002395 if (iif) {
2396#ifdef CONFIG_IPV6_MROUTE
2397 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002398 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002399 if (err <= 0) {
2400 if (!nowait) {
2401 if (err == 0)
2402 return 0;
2403 goto nla_put_failure;
2404 } else {
2405 if (err == -EMSGSIZE)
2406 goto nla_put_failure;
2407 }
2408 }
2409 } else
2410#endif
2411 NLA_PUT_U32(skb, RTA_IIF, iif);
2412 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002414 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002415 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002417
Daniel Walterc3968a82011-04-13 21:10:57 +00002418 if (rt->rt6i_prefsrc.plen) {
2419 struct in6_addr saddr_buf;
2420 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2421 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2422 }
2423
David S. Millerdefb3512010-12-08 21:16:57 -08002424 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002425 goto nla_put_failure;
2426
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002427 rcu_read_lock();
2428 n = dst_get_neighbour(&rt->dst);
2429 if (n)
2430 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2431 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002432
Changli Gaod8d1f302010-06-10 23:31:35 -07002433 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002434 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2435
2436 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002437
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002438 if (!(rt->rt6i_flags & RTF_EXPIRES))
2439 expires = 0;
2440 else if (rt->rt6i_expires - jiffies < INT_MAX)
2441 expires = rt->rt6i_expires - jiffies;
2442 else
2443 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002444
Changli Gaod8d1f302010-06-10 23:31:35 -07002445 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2446 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002447 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448
Thomas Graf2d7202b2006-08-22 00:01:27 -07002449 return nlmsg_end(skb, nlh);
2450
2451nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002452 nlmsg_cancel(skb, nlh);
2453 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454}
2455
Patrick McHardy1b43af52006-08-10 23:11:17 -07002456int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457{
2458 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2459 int prefix;
2460
Thomas Graf2d7202b2006-08-22 00:01:27 -07002461 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2462 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2464 } else
2465 prefix = 0;
2466
Brian Haley191cd582008-08-14 15:33:21 -07002467 return rt6_fill_node(arg->net,
2468 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002470 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471}
2472
Thomas Grafc127ea22007-03-22 11:58:32 -07002473static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002475 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002476 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002478 struct sk_buff *skb;
2479 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002480 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002481 int err, iif = 0;
2482
2483 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2484 if (err < 0)
2485 goto errout;
2486
2487 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002488 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002489
2490 if (tb[RTA_SRC]) {
2491 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2492 goto errout;
2493
David S. Miller4c9483b2011-03-12 16:22:43 -05002494 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002495 }
2496
2497 if (tb[RTA_DST]) {
2498 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2499 goto errout;
2500
David S. Miller4c9483b2011-03-12 16:22:43 -05002501 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002502 }
2503
2504 if (tb[RTA_IIF])
2505 iif = nla_get_u32(tb[RTA_IIF]);
2506
2507 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002508 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002509
2510 if (iif) {
2511 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002512 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002513 if (!dev) {
2514 err = -ENODEV;
2515 goto errout;
2516 }
2517 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518
2519 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002520 if (skb == NULL) {
2521 err = -ENOBUFS;
2522 goto errout;
2523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524
2525 /* Reserve room for dummy headers, this skb can pass
2526 through good chunk of routing engine.
2527 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002528 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2530
David S. Miller4c9483b2011-03-12 16:22:43 -05002531 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002532 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533
David S. Miller4c9483b2011-03-12 16:22:43 -05002534 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002536 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002538 kfree_skb(skb);
2539 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 }
2541
Daniel Lezcano55786892008-03-04 13:47:47 -08002542 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002543errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545}
2546
Thomas Graf86872cb2006-08-22 00:01:08 -07002547void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548{
2549 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002550 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002551 u32 seq;
2552 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002554 err = -ENOBUFS;
2555 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002556
Thomas Graf339bf982006-11-10 14:10:15 -08002557 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002558 if (skb == NULL)
2559 goto errout;
2560
Brian Haley191cd582008-08-14 15:33:21 -07002561 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002562 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002563 if (err < 0) {
2564 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2565 WARN_ON(err == -EMSGSIZE);
2566 kfree_skb(skb);
2567 goto errout;
2568 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002569 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2570 info->nlh, gfp_any());
2571 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002572errout:
2573 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002574 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575}
2576
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002577static int ip6_route_dev_notify(struct notifier_block *this,
2578 unsigned long event, void *data)
2579{
2580 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002581 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002582
2583 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002584 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002585 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2586#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002587 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002588 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002589 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002590 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2591#endif
2592 }
2593
2594 return NOTIFY_OK;
2595}
2596
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597/*
2598 * /proc
2599 */
2600
2601#ifdef CONFIG_PROC_FS
2602
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603struct rt6_proc_arg
2604{
2605 char *buffer;
2606 int offset;
2607 int length;
2608 int skip;
2609 int len;
2610};
2611
2612static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2613{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002614 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002615 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002617 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618
2619#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002620 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002622 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002624 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002625 n = dst_get_neighbour(&rt->dst);
2626 if (n) {
2627 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002629 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002631 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002632 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002633 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2634 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002635 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636 return 0;
2637}
2638
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002639static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002641 struct net *net = (struct net *)m->private;
2642 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002643 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644}
2645
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002646static int ipv6_route_open(struct inode *inode, struct file *file)
2647{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002648 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002649}
2650
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002651static const struct file_operations ipv6_route_proc_fops = {
2652 .owner = THIS_MODULE,
2653 .open = ipv6_route_open,
2654 .read = seq_read,
2655 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002656 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002657};
2658
Linus Torvalds1da177e2005-04-16 15:20:36 -07002659static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2660{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002661 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002663 net->ipv6.rt6_stats->fib_nodes,
2664 net->ipv6.rt6_stats->fib_route_nodes,
2665 net->ipv6.rt6_stats->fib_rt_alloc,
2666 net->ipv6.rt6_stats->fib_rt_entries,
2667 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002668 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002669 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670
2671 return 0;
2672}
2673
2674static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2675{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002676 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002677}
2678
Arjan van de Ven9a321442007-02-12 00:55:35 -08002679static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002680 .owner = THIS_MODULE,
2681 .open = rt6_stats_seq_open,
2682 .read = seq_read,
2683 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002684 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685};
2686#endif /* CONFIG_PROC_FS */
2687
2688#ifdef CONFIG_SYSCTL
2689
Linus Torvalds1da177e2005-04-16 15:20:36 -07002690static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002691int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692 void __user *buffer, size_t *lenp, loff_t *ppos)
2693{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002694 struct net *net;
2695 int delay;
2696 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002698
2699 net = (struct net *)ctl->extra1;
2700 delay = net->ipv6.sysctl.flush_delay;
2701 proc_dointvec(ctl, write, buffer, lenp, ppos);
2702 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2703 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704}
2705
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002706ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002707 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002709 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002710 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002711 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002712 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713 },
2714 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002716 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717 .maxlen = sizeof(int),
2718 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002719 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002720 },
2721 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002723 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 .maxlen = sizeof(int),
2725 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002726 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727 },
2728 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002730 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 .maxlen = sizeof(int),
2732 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002733 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002734 },
2735 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002737 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738 .maxlen = sizeof(int),
2739 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002740 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741 },
2742 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002744 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745 .maxlen = sizeof(int),
2746 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002747 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002748 },
2749 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002751 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 .maxlen = sizeof(int),
2753 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002754 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002755 },
2756 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002758 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 .maxlen = sizeof(int),
2760 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002761 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002762 },
2763 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002765 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 .maxlen = sizeof(int),
2767 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002768 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002769 },
2770 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 .maxlen = sizeof(int),
2774 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002775 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002777 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778};
2779
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002780struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002781{
2782 struct ctl_table *table;
2783
2784 table = kmemdup(ipv6_route_table_template,
2785 sizeof(ipv6_route_table_template),
2786 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002787
2788 if (table) {
2789 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002790 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002791 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002792 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2793 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2794 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2795 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2796 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2797 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2798 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002799 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002800 }
2801
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002802 return table;
2803}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804#endif
2805
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002806static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002807{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002808 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002809
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002810 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2811 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002812
Eric Dumazetfc66f952010-10-08 06:37:34 +00002813 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2814 goto out_ip6_dst_ops;
2815
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002816 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2817 sizeof(*net->ipv6.ip6_null_entry),
2818 GFP_KERNEL);
2819 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002820 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002821 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002822 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002823 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002824 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2825 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002826
2827#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2828 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2829 sizeof(*net->ipv6.ip6_prohibit_entry),
2830 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002831 if (!net->ipv6.ip6_prohibit_entry)
2832 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002833 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002834 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002835 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002836 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2837 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002838
2839 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2840 sizeof(*net->ipv6.ip6_blk_hole_entry),
2841 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002842 if (!net->ipv6.ip6_blk_hole_entry)
2843 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002844 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002845 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002846 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002847 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2848 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002849#endif
2850
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002851 net->ipv6.sysctl.flush_delay = 0;
2852 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2853 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2854 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2855 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2856 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2857 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2858 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2859
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002860#ifdef CONFIG_PROC_FS
2861 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2862 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2863#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002864 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2865
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002866 ret = 0;
2867out:
2868 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002869
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002870#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2871out_ip6_prohibit_entry:
2872 kfree(net->ipv6.ip6_prohibit_entry);
2873out_ip6_null_entry:
2874 kfree(net->ipv6.ip6_null_entry);
2875#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002876out_ip6_dst_entries:
2877 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002878out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002879 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002880}
2881
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002882static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002883{
2884#ifdef CONFIG_PROC_FS
2885 proc_net_remove(net, "ipv6_route");
2886 proc_net_remove(net, "rt6_stats");
2887#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002888 kfree(net->ipv6.ip6_null_entry);
2889#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2890 kfree(net->ipv6.ip6_prohibit_entry);
2891 kfree(net->ipv6.ip6_blk_hole_entry);
2892#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002893 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002894}
2895
2896static struct pernet_operations ip6_route_net_ops = {
2897 .init = ip6_route_net_init,
2898 .exit = ip6_route_net_exit,
2899};
2900
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002901static struct notifier_block ip6_route_dev_notifier = {
2902 .notifier_call = ip6_route_dev_notify,
2903 .priority = 0,
2904};
2905
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002906int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002907{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002908 int ret;
2909
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002910 ret = -ENOMEM;
2911 ip6_dst_ops_template.kmem_cachep =
2912 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2913 SLAB_HWCACHE_ALIGN, NULL);
2914 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002915 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002916
Eric Dumazetfc66f952010-10-08 06:37:34 +00002917 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002918 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002919 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002920
Eric Dumazetfc66f952010-10-08 06:37:34 +00002921 ret = register_pernet_subsys(&ip6_route_net_ops);
2922 if (ret)
2923 goto out_dst_entries;
2924
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002925 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2926
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002927 /* Registering of the loopback is done before this portion of code,
2928 * the loopback reference in rt6_info will not be taken, do it
2929 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002930 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002931 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002933 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002934 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002935 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002936 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2937 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002938 ret = fib6_init();
2939 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002940 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002941
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002942 ret = xfrm6_init();
2943 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002944 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002945
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002946 ret = fib6_rules_init();
2947 if (ret)
2948 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002949
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002950 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002951 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2952 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2953 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002954 goto fib6_rules_init;
2955
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002956 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002957 if (ret)
2958 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002959
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002960out:
2961 return ret;
2962
2963fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002964 fib6_rules_cleanup();
2965xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002966 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002967out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002968 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002969out_register_subsys:
2970 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002971out_dst_entries:
2972 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002973out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002974 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002975 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002976}
2977
2978void ip6_route_cleanup(void)
2979{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002980 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002981 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002982 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002983 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002984 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002985 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002986 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002987}