blob: 904312e25a3c072cd4ae00436f4cde2dbff04732 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Linus Torvalds1da177e2005-04-16 15:20:36 -070075static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080077static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080078static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070079static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800102 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800106 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800107 .default_mtu = ip6_default_mtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700113 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114};
115
Roland Dreierec831ea2011-01-31 13:16:00 -0800116static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
117{
118 return 0;
119}
120
David S. Miller14e50e52007-05-24 18:17:54 -0700121static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
122{
123}
124
125static struct dst_ops ip6_dst_blackhole_ops = {
126 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800127 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700128 .destroy = ip6_dst_destroy,
129 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800130 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800131 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700133};
134
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800135static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700136 .dst = {
137 .__refcnt = ATOMIC_INIT(1),
138 .__use = 1,
139 .obsolete = -1,
140 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700141 .input = ip6_pkt_discard,
142 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 },
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700145 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 .rt6i_metric = ~(u32) 0,
147 .rt6i_ref = ATOMIC_INIT(1),
148};
149
Thomas Graf101367c2006-08-04 03:39:02 -0700150#ifdef CONFIG_IPV6_MULTIPLE_TABLES
151
David S. Miller6723ab52006-10-18 21:20:57 -0700152static int ip6_pkt_prohibit(struct sk_buff *skb);
153static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700154
Adrian Bunk280a34c2008-04-21 02:29:32 -0700155static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
159 .obsolete = -1,
160 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700161 .input = ip6_pkt_prohibit,
162 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700163 },
164 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700165 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700166 .rt6i_metric = ~(u32) 0,
167 .rt6i_ref = ATOMIC_INIT(1),
168};
169
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800170static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700180 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800190 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800197 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900202 }
David S. Millerb3419362010-11-30 12:27:11 -0800203 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800204 rt->rt6i_peer = NULL;
205 inet_putpeer(peer);
206 }
207}
208
209void rt6_bind_peer(struct rt6_info *rt, int create)
210{
211 struct inet_peer *peer;
212
David S. Millerb3419362010-11-30 12:27:11 -0800213 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
214 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
215 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
218static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219 int how)
220{
221 struct rt6_info *rt = (struct rt6_info *)dst;
222 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800223 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900224 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800226 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
227 struct inet6_dev *loopback_idev =
228 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 if (loopback_idev != NULL) {
230 rt->rt6i_idev = loopback_idev;
231 in6_dev_put(idev);
232 }
233 }
234}
235
236static __inline__ int rt6_check_expired(const struct rt6_info *rt)
237{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000238 return (rt->rt6i_flags & RTF_EXPIRES) &&
239 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
Thomas Grafc71099a2006-08-04 23:20:06 -0700242static inline int rt6_need_strict(struct in6_addr *daddr)
243{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000244 return ipv6_addr_type(daddr) &
245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700246}
247
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700249 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 */
251
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800252static inline struct rt6_info *rt6_device_match(struct net *net,
253 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900254 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700256 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257{
258 struct rt6_info *local = NULL;
259 struct rt6_info *sprt;
260
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900261 if (!oif && ipv6_addr_any(saddr))
262 goto out;
263
Changli Gaod8d1f302010-06-10 23:31:35 -0700264 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900265 struct net_device *dev = sprt->rt6i_dev;
266
267 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 if (dev->ifindex == oif)
269 return sprt;
270 if (dev->flags & IFF_LOOPBACK) {
271 if (sprt->rt6i_idev == NULL ||
272 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700273 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900275 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 local->rt6i_idev->dev->ifindex == oif))
277 continue;
278 }
279 local = sprt;
280 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900281 } else {
282 if (ipv6_chk_addr(net, saddr, dev,
283 flags & RT6_LOOKUP_F_IFACE))
284 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900286 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900288 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 if (local)
290 return local;
291
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700292 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800293 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900295out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 return rt;
297}
298
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800299#ifdef CONFIG_IPV6_ROUTER_PREF
300static void rt6_probe(struct rt6_info *rt)
301{
302 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
303 /*
304 * Okay, this does not seem to be appropriate
305 * for now, however, we need to check if it
306 * is really so; aka Router Reachability Probing.
307 *
308 * Router Reachability Probe MUST be rate-limited
309 * to no more than one per minute.
310 */
311 if (!neigh || (neigh->nud_state & NUD_VALID))
312 return;
313 read_lock_bh(&neigh->lock);
314 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800315 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800316 struct in6_addr mcaddr;
317 struct in6_addr *target;
318
319 neigh->updated = jiffies;
320 read_unlock_bh(&neigh->lock);
321
322 target = (struct in6_addr *)&neigh->primary_key;
323 addrconf_addr_solict_mult(target, &mcaddr);
324 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
325 } else
326 read_unlock_bh(&neigh->lock);
327}
328#else
329static inline void rt6_probe(struct rt6_info *rt)
330{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800331}
332#endif
333
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700337static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800339 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700340 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700342 if ((dev->flags & IFF_LOOPBACK) &&
343 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
344 return 1;
345 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346}
347
Dave Jonesb6f99a22007-03-22 12:27:49 -0700348static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800351 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700352 if (rt->rt6i_flags & RTF_NONEXTHOP ||
353 !(rt->rt6i_flags & RTF_GATEWAY))
354 m = 1;
355 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 read_lock_bh(&neigh->lock);
357 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700358 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800359#ifdef CONFIG_IPV6_ROUTER_PREF
360 else if (neigh->nud_state & NUD_FAILED)
361 m = 0;
362#endif
363 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800364 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800365 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800366 } else
367 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800368 return m;
369}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800371static int rt6_score_route(struct rt6_info *rt, int oif,
372 int strict)
373{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700374 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900375
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700376 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700377 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800379#ifdef CONFIG_IPV6_ROUTER_PREF
380 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
381#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700382 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800383 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800384 return -1;
385 return m;
386}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
David S. Millerf11e6652007-03-24 20:36:25 -0700388static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
389 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800390{
David S. Millerf11e6652007-03-24 20:36:25 -0700391 int m;
392
393 if (rt6_check_expired(rt))
394 goto out;
395
396 m = rt6_score_route(rt, oif, strict);
397 if (m < 0)
398 goto out;
399
400 if (m > *mpri) {
401 if (strict & RT6_LOOKUP_F_REACHABLE)
402 rt6_probe(match);
403 *mpri = m;
404 match = rt;
405 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
406 rt6_probe(rt);
407 }
408
409out:
410 return match;
411}
412
413static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
414 struct rt6_info *rr_head,
415 u32 metric, int oif, int strict)
416{
417 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
David S. Millerf11e6652007-03-24 20:36:25 -0700420 match = NULL;
421 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700422 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700423 match = find_match(rt, oif, strict, &mpri, match);
424 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700425 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700426 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800427
David S. Millerf11e6652007-03-24 20:36:25 -0700428 return match;
429}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800430
David S. Millerf11e6652007-03-24 20:36:25 -0700431static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
432{
433 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800434 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
David S. Millerf11e6652007-03-24 20:36:25 -0700436 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800437 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
David S. Millerf11e6652007-03-24 20:36:25 -0700439 rt0 = fn->rr_ptr;
440 if (!rt0)
441 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
David S. Millerf11e6652007-03-24 20:36:25 -0700443 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800445 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700446 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700447 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700448
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800449 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700450 if (!next || next->rt6i_metric != rt0->rt6i_metric)
451 next = fn->leaf;
452
453 if (next != rt0)
454 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 }
456
David S. Millerf11e6652007-03-24 20:36:25 -0700457 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800458 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900460 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000461 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462}
463
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800464#ifdef CONFIG_IPV6_ROUTE_INFO
465int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
466 struct in6_addr *gwaddr)
467{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900468 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800469 struct route_info *rinfo = (struct route_info *) opt;
470 struct in6_addr prefix_buf, *prefix;
471 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900472 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800473 struct rt6_info *rt;
474
475 if (len < sizeof(struct route_info)) {
476 return -EINVAL;
477 }
478
479 /* Sanity check for prefix_len and length */
480 if (rinfo->length > 3) {
481 return -EINVAL;
482 } else if (rinfo->prefix_len > 128) {
483 return -EINVAL;
484 } else if (rinfo->prefix_len > 64) {
485 if (rinfo->length < 2) {
486 return -EINVAL;
487 }
488 } else if (rinfo->prefix_len > 0) {
489 if (rinfo->length < 1) {
490 return -EINVAL;
491 }
492 }
493
494 pref = rinfo->route_pref;
495 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000496 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900498 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800499
500 if (rinfo->length == 3)
501 prefix = (struct in6_addr *)rinfo->prefix;
502 else {
503 /* this function is safe */
504 ipv6_addr_prefix(&prefix_buf,
505 (struct in6_addr *)rinfo->prefix,
506 rinfo->prefix_len);
507 prefix = &prefix_buf;
508 }
509
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800510 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
511 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800512
513 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700514 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800515 rt = NULL;
516 }
517
518 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800519 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800520 pref);
521 else if (rt)
522 rt->rt6i_flags = RTF_ROUTEINFO |
523 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
524
525 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900526 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800527 rt->rt6i_flags &= ~RTF_EXPIRES;
528 } else {
529 rt->rt6i_expires = jiffies + HZ * lifetime;
530 rt->rt6i_flags |= RTF_EXPIRES;
531 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700532 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800533 }
534 return 0;
535}
536#endif
537
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800538#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700539do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800540 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700541 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700542 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700543 if (fn->fn_flags & RTN_TL_ROOT) \
544 goto out; \
545 pn = fn->parent; \
546 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800547 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700548 else \
549 fn = pn; \
550 if (fn->fn_flags & RTN_RTINFO) \
551 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700553 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700554} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700555
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800556static struct rt6_info *ip6_pol_route_lookup(struct net *net,
557 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700558 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559{
560 struct fib6_node *fn;
561 struct rt6_info *rt;
562
Thomas Grafc71099a2006-08-04 23:20:06 -0700563 read_lock_bh(&table->tb6_lock);
564 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
565restart:
566 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900567 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800568 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700569out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700570 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700571 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 return rt;
573
574}
575
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900576struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
577 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700578{
579 struct flowi fl = {
580 .oif = oif,
Changli Gao58116622010-11-12 18:43:55 +0000581 .fl6_dst = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700582 };
583 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700584 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700585
Thomas Grafadaa70b2006-10-13 15:01:03 -0700586 if (saddr) {
587 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
588 flags |= RT6_LOOKUP_F_HAS_SADDR;
589 }
590
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800591 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700592 if (dst->error == 0)
593 return (struct rt6_info *) dst;
594
595 dst_release(dst);
596
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 return NULL;
598}
599
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900600EXPORT_SYMBOL(rt6_lookup);
601
Thomas Grafc71099a2006-08-04 23:20:06 -0700602/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 It takes new route entry, the addition fails by any reason the
604 route is freed. In any case, if caller does not hold it, it may
605 be destroyed.
606 */
607
Thomas Graf86872cb2006-08-22 00:01:08 -0700608static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609{
610 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700611 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612
Thomas Grafc71099a2006-08-04 23:20:06 -0700613 table = rt->rt6i_table;
614 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700615 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700616 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617
618 return err;
619}
620
Thomas Graf40e22e82006-08-22 00:00:45 -0700621int ip6_ins_rt(struct rt6_info *rt)
622{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800623 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900624 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800625 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800626 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700627}
628
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800629static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
630 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 struct rt6_info *rt;
633
634 /*
635 * Clone the route.
636 */
637
638 rt = ip6_rt_copy(ort);
639
640 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800641 struct neighbour *neigh;
642 int attempts = !in_softirq();
643
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900644 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
645 if (rt->rt6i_dst.plen != 128 &&
646 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
647 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900649 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900651 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 rt->rt6i_dst.plen = 128;
653 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700654 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
656#ifdef CONFIG_IPV6_SUBTREES
657 if (rt->rt6i_src.plen && saddr) {
658 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
659 rt->rt6i_src.plen = 128;
660 }
661#endif
662
David S. Miller14deae42009-01-04 16:04:39 -0800663 retry:
664 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
665 if (IS_ERR(neigh)) {
666 struct net *net = dev_net(rt->rt6i_dev);
667 int saved_rt_min_interval =
668 net->ipv6.sysctl.ip6_rt_gc_min_interval;
669 int saved_rt_elasticity =
670 net->ipv6.sysctl.ip6_rt_gc_elasticity;
671
672 if (attempts-- > 0) {
673 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
674 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
675
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000676 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800677
678 net->ipv6.sysctl.ip6_rt_gc_elasticity =
679 saved_rt_elasticity;
680 net->ipv6.sysctl.ip6_rt_gc_min_interval =
681 saved_rt_min_interval;
682 goto retry;
683 }
684
685 if (net_ratelimit())
686 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700687 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700688 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800689 return NULL;
690 }
691 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800693 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800695 return rt;
696}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800698static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
699{
700 struct rt6_info *rt = ip6_rt_copy(ort);
701 if (rt) {
702 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
703 rt->rt6i_dst.plen = 128;
704 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700705 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800706 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
707 }
708 return rt;
709}
710
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800711static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
712 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713{
714 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800715 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800718 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700719 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700721 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800726restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700727 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
729restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700730 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800731
732 BACKTRACK(net, &fl->fl6_src);
733 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800734 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800735 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Changli Gaod8d1f302010-06-10 23:31:35 -0700737 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700738 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800739
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
David S. Millerd80bc0f2011-01-24 16:01:58 -0800742 else
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800744
Changli Gaod8d1f302010-06-10 23:31:35 -0700745 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800746 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800747
Changli Gaod8d1f302010-06-10 23:31:35 -0700748 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800749 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700750 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800751 if (!err)
752 goto out2;
753 }
754
755 if (--attempts <= 0)
756 goto out2;
757
758 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700759 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800760 * released someone could insert this route. Relookup.
761 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700762 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800763 goto relookup;
764
765out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
769 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700770 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700771 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700775
776 return rt;
777}
778
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800779static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700780 struct flowi *fl, int flags)
781{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800782 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700783}
784
Thomas Grafc71099a2006-08-04 23:20:06 -0700785void ip6_route_input(struct sk_buff *skb)
786{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700787 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900788 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700789 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +0000792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900795 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700796 .proto = iph->nexthdr,
797 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700798
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700800 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700801
Eric Dumazetadf30902009-06-02 05:19:30 +0000802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700803}
804
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800805static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700806 struct flowi *fl, int flags)
807{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800808 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700809}
810
Daniel Lezcano4591db42008-03-05 10:48:10 -0800811struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700813{
814 int flags = 0;
815
Brian Haley6057fd72010-05-28 23:02:35 -0700816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700817 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700818
Thomas Grafadaa70b2006-10-13 15:01:03 -0700819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700823
Daniel Lezcano4591db42008-03-05 10:48:10 -0800824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825}
826
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900827EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
David S. Miller14e50e52007-05-24 18:17:54 -0700829int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
830{
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
835
836 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700837 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700838
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800841 new->input = dst_discard;
842 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700843
David S. Millerdefb3512010-12-08 21:16:57 -0800844 dst_copy_metrics(new, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -0700845 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
852
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
856
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858#ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860#endif
861
862 dst_free(new);
863 }
864
865 dst_release(*dstp);
866 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000867 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700868}
869EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871/*
872 * Destination cache support functions
873 */
874
875static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
876{
877 struct rt6_info *rt;
878
879 rt = (struct rt6_info *) dst;
880
Herbert Xu10414442010-03-18 23:00:22 +0000881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 return dst;
883
884 return NULL;
885}
886
887static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
888{
889 struct rt6_info *rt = (struct rt6_info *) dst;
890
891 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
896 }
897 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000899 dst = NULL;
900 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000902 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903}
904
905static void ip6_link_failure(struct sk_buff *skb)
906{
907 struct rt6_info *rt;
908
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
Eric Dumazetadf30902009-06-02 05:19:30 +0000911 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700914 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919}
920
921static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922{
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -0800928 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -0800930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 }
David S. Millerdefb3512010-12-08 21:16:57 -0800933 dst_metric_set(dst, RTAX_MTU, mtu);
Tom Tucker8d717402006-07-30 20:43:36 -0700934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 }
936}
937
David S. Miller0dbaee32010-12-13 12:52:14 -0800938static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939{
David S. Miller0dbaee32010-12-13 12:52:14 -0800940 struct net_device *dev = dst->dev;
941 unsigned int mtu = dst_mtu(dst);
942 struct net *net = dev_net(dev);
943
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
945
Daniel Lezcano55786892008-03-04 13:47:47 -0800946 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
947 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
949 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900950 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
951 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
952 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 * rely only on pmtu discovery"
954 */
955 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
956 mtu = IPV6_MAXPLEN;
957 return mtu;
958}
959
David S. Millerd33e4552010-12-14 13:01:14 -0800960static unsigned int ip6_default_mtu(const struct dst_entry *dst)
961{
962 unsigned int mtu = IPV6_MIN_MTU;
963 struct inet6_dev *idev;
964
965 rcu_read_lock();
966 idev = __in6_dev_get(dst->dev);
967 if (idev)
968 mtu = idev->cnf.mtu6;
969 rcu_read_unlock();
970
971 return mtu;
972}
973
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800974static struct dst_entry *icmp6_dst_gc_list;
975static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700976
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800977struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900979 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980{
981 struct rt6_info *rt;
982 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900983 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985 if (unlikely(idev == NULL))
986 return NULL;
987
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000988 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 if (unlikely(rt == NULL)) {
990 in6_dev_put(idev);
991 goto out;
992 }
993
994 dev_hold(dev);
995 if (neigh)
996 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800997 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800999 if (IS_ERR(neigh))
1000 neigh = NULL;
1001 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
1003 rt->rt6i_dev = dev;
1004 rt->rt6i_idev = idev;
1005 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -07001006 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001007 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Changli Gaod8d1f302010-06-10 23:31:35 -07001008 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
1010#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -07001011 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001012 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 : 0;
1014 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1015 rt->rt6i_dst.plen = 128;
1016#endif
1017
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001018 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001019 rt->dst.next = icmp6_dst_gc_list;
1020 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001021 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
Daniel Lezcano55786892008-03-04 13:47:47 -08001023 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024
1025out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001026 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027}
1028
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001029int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030{
1031 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001032 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033
1034 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001035
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001038
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 while ((dst = *pprev) != NULL) {
1040 if (!atomic_read(&dst->__refcnt)) {
1041 *pprev = dst->next;
1042 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 } else {
1044 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001045 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 }
1047 }
1048
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001049 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001050
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001051 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052}
1053
David S. Miller1e493d12008-09-10 17:27:15 -07001054static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1055 void *arg)
1056{
1057 struct dst_entry *dst, **pprev;
1058
1059 spin_lock_bh(&icmp6_dst_lock);
1060 pprev = &icmp6_dst_gc_list;
1061 while ((dst = *pprev) != NULL) {
1062 struct rt6_info *rt = (struct rt6_info *) dst;
1063 if (func(rt, arg)) {
1064 *pprev = dst->next;
1065 dst_free(dst);
1066 } else {
1067 pprev = &dst->next;
1068 }
1069 }
1070 spin_unlock_bh(&icmp6_dst_lock);
1071}
1072
Daniel Lezcano569d3642008-01-18 03:56:57 -08001073static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001076 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001077 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1078 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1079 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1080 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1081 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001082 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
Eric Dumazetfc66f952010-10-08 06:37:34 +00001084 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001085 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001086 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 goto out;
1088
Benjamin Thery6891a342008-03-04 13:49:47 -08001089 net->ipv6.ip6_rt_gc_expire++;
1090 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1091 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001092 entries = dst_entries_get_slow(ops);
1093 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001094 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001096 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001097 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098}
1099
1100/* Clean host part of a prefix. Not necessary in radix tree,
1101 but results in cleaner routing tables.
1102
1103 Remove it only when all the things will work!
1104 */
1105
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001106int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
David S. Miller5170ae82010-12-12 21:35:57 -08001108 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001109 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001110 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001111 struct inet6_dev *idev;
1112
1113 rcu_read_lock();
1114 idev = __in6_dev_get(dev);
1115 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001116 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001117 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001118 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001119 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 }
1121 return hoplimit;
1122}
David S. Millerabbf46a2010-12-12 21:14:46 -08001123EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124
1125/*
1126 *
1127 */
1128
Thomas Graf86872cb2006-08-22 00:01:08 -07001129int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130{
1131 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001132 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 struct rt6_info *rt = NULL;
1134 struct net_device *dev = NULL;
1135 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001136 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 int addr_type;
1138
Thomas Graf86872cb2006-08-22 00:01:08 -07001139 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 return -EINVAL;
1141#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001142 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 return -EINVAL;
1144#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001145 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001147 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 if (!dev)
1149 goto out;
1150 idev = in6_dev_get(dev);
1151 if (!idev)
1152 goto out;
1153 }
1154
Thomas Graf86872cb2006-08-22 00:01:08 -07001155 if (cfg->fc_metric == 0)
1156 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
Daniel Lezcano55786892008-03-04 13:47:47 -08001158 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001159 if (table == NULL) {
1160 err = -ENOBUFS;
1161 goto out;
1162 }
1163
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001164 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
1166 if (rt == NULL) {
1167 err = -ENOMEM;
1168 goto out;
1169 }
1170
Changli Gaod8d1f302010-06-10 23:31:35 -07001171 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001172 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1173 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1174 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175
Thomas Graf86872cb2006-08-22 00:01:08 -07001176 if (cfg->fc_protocol == RTPROT_UNSPEC)
1177 cfg->fc_protocol = RTPROT_BOOT;
1178 rt->rt6i_protocol = cfg->fc_protocol;
1179
1180 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
1182 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001183 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001184 else if (cfg->fc_flags & RTF_LOCAL)
1185 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001187 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188
Changli Gaod8d1f302010-06-10 23:31:35 -07001189 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190
Thomas Graf86872cb2006-08-22 00:01:08 -07001191 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1192 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001194 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195
1196#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001197 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1198 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199#endif
1200
Thomas Graf86872cb2006-08-22 00:01:08 -07001201 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202
1203 /* We cannot add true routes via loopback here,
1204 they would result in kernel looping; promote them to reject routes
1205 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001206 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001207 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1208 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001210 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 if (dev) {
1212 dev_put(dev);
1213 in6_dev_put(idev);
1214 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001215 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 dev_hold(dev);
1217 idev = in6_dev_get(dev);
1218 if (!idev) {
1219 err = -ENODEV;
1220 goto out;
1221 }
1222 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001223 rt->dst.output = ip6_pkt_discard_out;
1224 rt->dst.input = ip6_pkt_discard;
1225 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1227 goto install_route;
1228 }
1229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 struct in6_addr *gw_addr;
1232 int gwa_type;
1233
Thomas Graf86872cb2006-08-22 00:01:08 -07001234 gw_addr = &cfg->fc_gateway;
1235 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 gwa_type = ipv6_addr_type(gw_addr);
1237
1238 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1239 struct rt6_info *grt;
1240
1241 /* IPv6 strictly inhibits using not link-local
1242 addresses as nexthop address.
1243 Otherwise, router will not able to send redirects.
1244 It is very good, but in some (rare!) circumstances
1245 (SIT, PtP, NBMA NOARP links) it is handy to allow
1246 some exceptions. --ANK
1247 */
1248 err = -EINVAL;
1249 if (!(gwa_type&IPV6_ADDR_UNICAST))
1250 goto out;
1251
Daniel Lezcano55786892008-03-04 13:47:47 -08001252 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253
1254 err = -EHOSTUNREACH;
1255 if (grt == NULL)
1256 goto out;
1257 if (dev) {
1258 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001259 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 goto out;
1261 }
1262 } else {
1263 dev = grt->rt6i_dev;
1264 idev = grt->rt6i_idev;
1265 dev_hold(dev);
1266 in6_dev_hold(grt->rt6i_idev);
1267 }
1268 if (!(grt->rt6i_flags&RTF_GATEWAY))
1269 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001270 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271
1272 if (err)
1273 goto out;
1274 }
1275 err = -EINVAL;
1276 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1277 goto out;
1278 }
1279
1280 err = -ENODEV;
1281 if (dev == NULL)
1282 goto out;
1283
Thomas Graf86872cb2006-08-22 00:01:08 -07001284 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1286 if (IS_ERR(rt->rt6i_nexthop)) {
1287 err = PTR_ERR(rt->rt6i_nexthop);
1288 rt->rt6i_nexthop = NULL;
1289 goto out;
1290 }
1291 }
1292
Thomas Graf86872cb2006-08-22 00:01:08 -07001293 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294
1295install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001296 if (cfg->fc_mx) {
1297 struct nlattr *nla;
1298 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
Thomas Graf86872cb2006-08-22 00:01:08 -07001300 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001301 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001302
1303 if (type) {
1304 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 err = -EINVAL;
1306 goto out;
1307 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001308
David S. Millerdefb3512010-12-08 21:16:57 -08001309 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 }
1312 }
1313
Changli Gaod8d1f302010-06-10 23:31:35 -07001314 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001316 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001317
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001318 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001319
Thomas Graf86872cb2006-08-22 00:01:08 -07001320 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
1322out:
1323 if (dev)
1324 dev_put(dev);
1325 if (idev)
1326 in6_dev_put(idev);
1327 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001328 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 return err;
1330}
1331
Thomas Graf86872cb2006-08-22 00:01:08 -07001332static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333{
1334 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001335 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001336 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001338 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001339 return -ENOENT;
1340
Thomas Grafc71099a2006-08-04 23:20:06 -07001341 table = rt->rt6i_table;
1342 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Thomas Graf86872cb2006-08-22 00:01:08 -07001344 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001345 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346
Thomas Grafc71099a2006-08-04 23:20:06 -07001347 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348
1349 return err;
1350}
1351
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001352int ip6_del_rt(struct rt6_info *rt)
1353{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001354 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001355 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001356 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001357 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001358}
1359
Thomas Graf86872cb2006-08-22 00:01:08 -07001360static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361{
Thomas Grafc71099a2006-08-04 23:20:06 -07001362 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 struct fib6_node *fn;
1364 struct rt6_info *rt;
1365 int err = -ESRCH;
1366
Daniel Lezcano55786892008-03-04 13:47:47 -08001367 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001368 if (table == NULL)
1369 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370
Thomas Grafc71099a2006-08-04 23:20:06 -07001371 read_lock_bh(&table->tb6_lock);
1372
1373 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001374 &cfg->fc_dst, cfg->fc_dst_len,
1375 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001376
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001378 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001379 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001381 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001383 if (cfg->fc_flags & RTF_GATEWAY &&
1384 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001386 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001388 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001389 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
Thomas Graf86872cb2006-08-22 00:01:08 -07001391 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 }
1393 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001394 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
1396 return err;
1397}
1398
1399/*
1400 * Handle redirects
1401 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001402struct ip6rd_flowi {
1403 struct flowi fl;
1404 struct in6_addr gateway;
1405};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001407static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001409 struct flowi *fl,
1410 int flags)
1411{
1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1413 struct rt6_info *rt;
1414 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001415
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001417 * Get the "current" route for this destination and
1418 * check if the redirect has come from approriate router.
1419 *
1420 * RFC 2461 specifies that redirects should only be
1421 * accepted if they come from the nexthop to the target.
1422 * Due to the way the routes are chosen, this notion
1423 * is a bit fuzzy and one might need to check all possible
1424 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426
Thomas Grafc71099a2006-08-04 23:20:06 -07001427 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001428 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001429restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001430 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001431 /*
1432 * Current route is on-link; redirect is always invalid.
1433 *
1434 * Seems, previous statement is not true. It could
1435 * be node, which looks for us as on-link (f.e. proxy ndisc)
1436 * But then router serving it might decide, that we should
1437 * know truth 8)8) --ANK (980726).
1438 */
1439 if (rt6_check_expired(rt))
1440 continue;
1441 if (!(rt->rt6i_flags & RTF_GATEWAY))
1442 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001443 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001444 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001446 continue;
1447 break;
1448 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001449
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001450 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001451 rt = net->ipv6.ip6_null_entry;
1452 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001453out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001454 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001455
1456 read_unlock_bh(&table->tb6_lock);
1457
1458 return rt;
1459};
1460
1461static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1462 struct in6_addr *src,
1463 struct in6_addr *gateway,
1464 struct net_device *dev)
1465{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001466 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001467 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001468 struct ip6rd_flowi rdfl = {
1469 .fl = {
1470 .oif = dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +00001471 .fl6_dst = *dest,
1472 .fl6_src = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001473 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001474 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001475
Brian Haley86c36ce2009-10-07 13:58:01 -07001476 ipv6_addr_copy(&rdfl.gateway, gateway);
1477
Thomas Grafadaa70b2006-10-13 15:01:03 -07001478 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001480
Daniel Lezcano55786892008-03-04 13:47:47 -08001481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001482 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001483}
1484
1485void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1486 struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link)
1488{
1489 struct rt6_info *rt, *nrt = NULL;
1490 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001491 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001492
1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001495 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 if (net_ratelimit())
1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1498 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001499 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 }
1501
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 /*
1503 * We have finally decided to accept it.
1504 */
1505
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001506 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1508 NEIGH_UPDATE_F_OVERRIDE|
1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1510 NEIGH_UPDATE_F_ISROUTER))
1511 );
1512
1513 /*
1514 * Redirect received -> path was valid.
1515 * Look, redirects are sent only in response to data packets,
1516 * so that this nexthop apparently is reachable. --ANK
1517 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001518 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
1520 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001521 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 goto out;
1523
1524 nrt = ip6_rt_copy(rt);
1525 if (nrt == NULL)
1526 goto out;
1527
1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 if (on_link)
1530 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531
1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1533 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001534 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1537 nrt->rt6i_nexthop = neigh_clone(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538
Thomas Graf40e22e82006-08-22 00:00:45 -07001539 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 goto out;
1541
Changli Gaod8d1f302010-06-10 23:31:35 -07001542 netevent.old = &rt->dst;
1543 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1545
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001547 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 return;
1549 }
1550
1551out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001552 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553}
1554
1555/*
1556 * Handle ICMP "packet too big" messages
1557 * i.e. Path MTU discovery
1558 */
1559
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001560static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1561 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562{
1563 struct rt6_info *rt, *nrt;
1564 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001565again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001566 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 if (rt == NULL)
1568 return;
1569
Andrey Vagind3052b52010-12-11 15:20:11 +00001570 if (rt6_check_expired(rt)) {
1571 ip6_del_rt(rt);
1572 goto again;
1573 }
1574
Changli Gaod8d1f302010-06-10 23:31:35 -07001575 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 goto out;
1577
1578 if (pmtu < IPV6_MIN_MTU) {
1579 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 * MTU (1280) and a fragment header should always be included
1582 * after a node receiving Too Big message reporting PMTU is
1583 * less than the IPv6 Minimum Link MTU.
1584 */
1585 pmtu = IPV6_MIN_MTU;
1586 allfrag = 1;
1587 }
1588
1589 /* New mtu received -> path was valid.
1590 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK
1592 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001593 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594
1595 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that
1597 when cache entry will expire old pmtu
1598 would return automatically.
1599 */
1600 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001601 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1602 if (allfrag) {
1603 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1604 features |= RTAX_FEATURE_ALLFRAG;
1605 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1606 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001607 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1609 goto out;
1610 }
1611
1612 /* Network route.
1613 Two cases are possible:
1614 1. It is connected route. Action: COW
1615 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1616 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001617 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001618 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001619 else
1620 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001621
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001622 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001623 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1624 if (allfrag) {
1625 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1626 features |= RTAX_FEATURE_ALLFRAG;
1627 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1628 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001629
1630 /* According to RFC 1981, detecting PMTU increase shouldn't be
1631 * happened within 5 mins, the recommended timer is 10 mins.
1632 * Here this route expiration time is set to ip6_rt_mtu_expires
1633 * which is 10 mins. After 10 mins the decreased pmtu is expired
1634 * and detecting PMTU increase will be automatically happened.
1635 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001636 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001637 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1638
Thomas Graf40e22e82006-08-22 00:00:45 -07001639 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001642 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643}
1644
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001645void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1646 struct net_device *dev, u32 pmtu)
1647{
1648 struct net *net = dev_net(dev);
1649
1650 /*
1651 * RFC 1981 states that a node "MUST reduce the size of the packets it
1652 * is sending along the path" that caused the Packet Too Big message.
1653 * Since it's not possible in the general case to determine which
1654 * interface was used to send the original packet, we update the MTU
1655 * on the interface that will be used to send future packets. We also
1656 * update the MTU on the interface that received the Packet Too Big in
1657 * case the original packet was forced out that interface with
1658 * SO_BINDTODEVICE or similar. This is the next best thing to the
1659 * correct behaviour, which would be to update the MTU on all
1660 * interfaces.
1661 */
1662 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1663 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1664}
1665
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666/*
1667 * Misc support functions
1668 */
1669
1670static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1671{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001672 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001673 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
1675 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001676 rt->dst.input = ort->dst.input;
1677 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678
David S. Millerdefb3512010-12-08 21:16:57 -08001679 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001680 rt->dst.error = ort->dst.error;
1681 rt->dst.dev = ort->dst.dev;
1682 if (rt->dst.dev)
1683 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 rt->rt6i_idev = ort->rt6i_idev;
1685 if (rt->rt6i_idev)
1686 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001687 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 rt->rt6i_expires = 0;
1689
1690 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1691 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1692 rt->rt6i_metric = 0;
1693
1694 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1695#ifdef CONFIG_IPV6_SUBTREES
1696 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1697#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001698 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 }
1700 return rt;
1701}
1702
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001703#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001704static struct rt6_info *rt6_get_route_info(struct net *net,
1705 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001706 struct in6_addr *gwaddr, int ifindex)
1707{
1708 struct fib6_node *fn;
1709 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001710 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001711
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001712 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001713 if (table == NULL)
1714 return NULL;
1715
1716 write_lock_bh(&table->tb6_lock);
1717 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001718 if (!fn)
1719 goto out;
1720
Changli Gaod8d1f302010-06-10 23:31:35 -07001721 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001722 if (rt->rt6i_dev->ifindex != ifindex)
1723 continue;
1724 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1725 continue;
1726 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1727 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001728 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001729 break;
1730 }
1731out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001732 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001733 return rt;
1734}
1735
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001736static struct rt6_info *rt6_add_route_info(struct net *net,
1737 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001738 struct in6_addr *gwaddr, int ifindex,
1739 unsigned pref)
1740{
Thomas Graf86872cb2006-08-22 00:01:08 -07001741 struct fib6_config cfg = {
1742 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001743 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001744 .fc_ifindex = ifindex,
1745 .fc_dst_len = prefixlen,
1746 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1747 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001748 .fc_nlinfo.pid = 0,
1749 .fc_nlinfo.nlh = NULL,
1750 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001751 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001752
Thomas Graf86872cb2006-08-22 00:01:08 -07001753 ipv6_addr_copy(&cfg.fc_dst, prefix);
1754 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1755
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001756 /* We should treat it as a default route if prefix length is 0. */
1757 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001758 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001759
Thomas Graf86872cb2006-08-22 00:01:08 -07001760 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001761
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001762 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001763}
1764#endif
1765
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001767{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001769 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001771 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001772 if (table == NULL)
1773 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774
Thomas Grafc71099a2006-08-04 23:20:06 -07001775 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001776 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001778 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1780 break;
1781 }
1782 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001783 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001784 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 return rt;
1786}
1787
1788struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001789 struct net_device *dev,
1790 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791{
Thomas Graf86872cb2006-08-22 00:01:08 -07001792 struct fib6_config cfg = {
1793 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001794 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001795 .fc_ifindex = dev->ifindex,
1796 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1797 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001798 .fc_nlinfo.pid = 0,
1799 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001800 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001801 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Thomas Graf86872cb2006-08-22 00:01:08 -07001803 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
Thomas Graf86872cb2006-08-22 00:01:08 -07001805 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 return rt6_get_dflt_router(gwaddr, dev);
1808}
1809
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001810void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811{
1812 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001813 struct fib6_table *table;
1814
1815 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001816 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001817 if (table == NULL)
1818 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001821 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001822 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001824 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001825 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001826 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 goto restart;
1828 }
1829 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001830 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831}
1832
Daniel Lezcano55786892008-03-04 13:47:47 -08001833static void rtmsg_to_fib6_config(struct net *net,
1834 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001835 struct fib6_config *cfg)
1836{
1837 memset(cfg, 0, sizeof(*cfg));
1838
1839 cfg->fc_table = RT6_TABLE_MAIN;
1840 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1841 cfg->fc_metric = rtmsg->rtmsg_metric;
1842 cfg->fc_expires = rtmsg->rtmsg_info;
1843 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1844 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1845 cfg->fc_flags = rtmsg->rtmsg_flags;
1846
Daniel Lezcano55786892008-03-04 13:47:47 -08001847 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001848
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1850 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1851 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1852}
1853
Daniel Lezcano55786892008-03-04 13:47:47 -08001854int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855{
Thomas Graf86872cb2006-08-22 00:01:08 -07001856 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 struct in6_rtmsg rtmsg;
1858 int err;
1859
1860 switch(cmd) {
1861 case SIOCADDRT: /* Add a route */
1862 case SIOCDELRT: /* Delete a route */
1863 if (!capable(CAP_NET_ADMIN))
1864 return -EPERM;
1865 err = copy_from_user(&rtmsg, arg,
1866 sizeof(struct in6_rtmsg));
1867 if (err)
1868 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001869
Daniel Lezcano55786892008-03-04 13:47:47 -08001870 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001871
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 rtnl_lock();
1873 switch (cmd) {
1874 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001875 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 break;
1877 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001878 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 break;
1880 default:
1881 err = -EINVAL;
1882 }
1883 rtnl_unlock();
1884
1885 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001886 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887
1888 return -EINVAL;
1889}
1890
1891/*
1892 * Drop the packet on the floor
1893 */
1894
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001895static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001897 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001898 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001899 switch (ipstats_mib_noroutes) {
1900 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001901 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001902 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001903 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1904 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001905 break;
1906 }
1907 /* FALLTHROUGH */
1908 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001909 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1910 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001911 break;
1912 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001913 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 kfree_skb(skb);
1915 return 0;
1916}
1917
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001918static int ip6_pkt_discard(struct sk_buff *skb)
1919{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001920 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001921}
1922
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001923static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924{
Eric Dumazetadf30902009-06-02 05:19:30 +00001925 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001926 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927}
1928
David S. Miller6723ab52006-10-18 21:20:57 -07001929#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1930
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001931static int ip6_pkt_prohibit(struct sk_buff *skb)
1932{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001933 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001934}
1935
1936static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1937{
Eric Dumazetadf30902009-06-02 05:19:30 +00001938 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001939 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001940}
1941
David S. Miller6723ab52006-10-18 21:20:57 -07001942#endif
1943
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944/*
1945 * Allocate a dst for local (unicast / anycast) address.
1946 */
1947
1948struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1949 const struct in6_addr *addr,
1950 int anycast)
1951{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001952 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001953 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001954 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955
Ben Greear40385652010-11-08 12:33:48 +00001956 if (rt == NULL) {
1957 if (net_ratelimit())
1958 pr_warning("IPv6: Maximum number of routes reached,"
1959 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00001961 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962
Daniel Lezcano55786892008-03-04 13:47:47 -08001963 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 in6_dev_hold(idev);
1965
Changli Gaod8d1f302010-06-10 23:31:35 -07001966 rt->dst.flags = DST_HOST;
1967 rt->dst.input = ip6_input;
1968 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001969 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 rt->rt6i_idev = idev;
David S. Millerdefb3512010-12-08 21:16:57 -08001971 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
Changli Gaod8d1f302010-06-10 23:31:35 -07001972 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
1974 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001975 if (anycast)
1976 rt->rt6i_flags |= RTF_ANYCAST;
1977 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001979 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1980 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001981 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08001982
1983 /* We are casting this because that is the return
1984 * value type. But an errno encoded pointer is the
1985 * same regardless of the underlying pointer type,
1986 * and that's what we are returning. So this is OK.
1987 */
1988 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989 }
David S. Miller14deae42009-01-04 16:04:39 -08001990 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
1992 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1993 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001994 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995
Changli Gaod8d1f302010-06-10 23:31:35 -07001996 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997
1998 return rt;
1999}
2000
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002001struct arg_dev_net {
2002 struct net_device *dev;
2003 struct net *net;
2004};
2005
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006static int fib6_ifdown(struct rt6_info *rt, void *arg)
2007{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002008 const struct arg_dev_net *adn = arg;
2009 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002010
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002011 if ((rt->rt6i_dev == dev || dev == NULL) &&
2012 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 RT6_TRACE("deleted by ifdown %p\n", rt);
2014 return -1;
2015 }
2016 return 0;
2017}
2018
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002019void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002021 struct arg_dev_net adn = {
2022 .dev = dev,
2023 .net = net,
2024 };
2025
2026 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002027 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028}
2029
2030struct rt6_mtu_change_arg
2031{
2032 struct net_device *dev;
2033 unsigned mtu;
2034};
2035
2036static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2037{
2038 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2039 struct inet6_dev *idev;
2040
2041 /* In IPv6 pmtu discovery is not optional,
2042 so that RTAX_MTU lock cannot disable it.
2043 We still use this lock to block changes
2044 caused by addrconf/ndisc.
2045 */
2046
2047 idev = __in6_dev_get(arg->dev);
2048 if (idev == NULL)
2049 return 0;
2050
2051 /* For administrative MTU increase, there is no way to discover
2052 IPv6 PMTU increase, so PMTU increase should be updated here.
2053 Since RFC 1981 doesn't include administrative MTU increase
2054 update PMTU increase is a MUST. (i.e. jumbo frame)
2055 */
2056 /*
2057 If new MTU is less than route PMTU, this new MTU will be the
2058 lowest MTU in the path, update the route PMTU to reflect PMTU
2059 decreases; if new MTU is greater than route PMTU, and the
2060 old MTU is the lowest MTU in the path, update the route PMTU
2061 to reflect the increase. In this case if the other nodes' MTU
2062 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2063 PMTU discouvery.
2064 */
2065 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002066 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2067 (dst_mtu(&rt->dst) >= arg->mtu ||
2068 (dst_mtu(&rt->dst) < arg->mtu &&
2069 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002070 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002071 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 return 0;
2073}
2074
2075void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2076{
Thomas Grafc71099a2006-08-04 23:20:06 -07002077 struct rt6_mtu_change_arg arg = {
2078 .dev = dev,
2079 .mtu = mtu,
2080 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002082 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083}
2084
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002085static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002086 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002087 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002088 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002089 [RTA_PRIORITY] = { .type = NLA_U32 },
2090 [RTA_METRICS] = { .type = NLA_NESTED },
2091};
2092
2093static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2094 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095{
Thomas Graf86872cb2006-08-22 00:01:08 -07002096 struct rtmsg *rtm;
2097 struct nlattr *tb[RTA_MAX+1];
2098 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099
Thomas Graf86872cb2006-08-22 00:01:08 -07002100 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2101 if (err < 0)
2102 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103
Thomas Graf86872cb2006-08-22 00:01:08 -07002104 err = -EINVAL;
2105 rtm = nlmsg_data(nlh);
2106 memset(cfg, 0, sizeof(*cfg));
2107
2108 cfg->fc_table = rtm->rtm_table;
2109 cfg->fc_dst_len = rtm->rtm_dst_len;
2110 cfg->fc_src_len = rtm->rtm_src_len;
2111 cfg->fc_flags = RTF_UP;
2112 cfg->fc_protocol = rtm->rtm_protocol;
2113
2114 if (rtm->rtm_type == RTN_UNREACHABLE)
2115 cfg->fc_flags |= RTF_REJECT;
2116
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002117 if (rtm->rtm_type == RTN_LOCAL)
2118 cfg->fc_flags |= RTF_LOCAL;
2119
Thomas Graf86872cb2006-08-22 00:01:08 -07002120 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2121 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002122 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002123
2124 if (tb[RTA_GATEWAY]) {
2125 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2126 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002128
2129 if (tb[RTA_DST]) {
2130 int plen = (rtm->rtm_dst_len + 7) >> 3;
2131
2132 if (nla_len(tb[RTA_DST]) < plen)
2133 goto errout;
2134
2135 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002137
2138 if (tb[RTA_SRC]) {
2139 int plen = (rtm->rtm_src_len + 7) >> 3;
2140
2141 if (nla_len(tb[RTA_SRC]) < plen)
2142 goto errout;
2143
2144 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002146
2147 if (tb[RTA_OIF])
2148 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2149
2150 if (tb[RTA_PRIORITY])
2151 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2152
2153 if (tb[RTA_METRICS]) {
2154 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2155 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002157
2158 if (tb[RTA_TABLE])
2159 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2160
2161 err = 0;
2162errout:
2163 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164}
2165
Thomas Grafc127ea22007-03-22 11:58:32 -07002166static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167{
Thomas Graf86872cb2006-08-22 00:01:08 -07002168 struct fib6_config cfg;
2169 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170
Thomas Graf86872cb2006-08-22 00:01:08 -07002171 err = rtm_to_fib6_config(skb, nlh, &cfg);
2172 if (err < 0)
2173 return err;
2174
2175 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176}
2177
Thomas Grafc127ea22007-03-22 11:58:32 -07002178static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179{
Thomas Graf86872cb2006-08-22 00:01:08 -07002180 struct fib6_config cfg;
2181 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182
Thomas Graf86872cb2006-08-22 00:01:08 -07002183 err = rtm_to_fib6_config(skb, nlh, &cfg);
2184 if (err < 0)
2185 return err;
2186
2187 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188}
2189
Thomas Graf339bf982006-11-10 14:10:15 -08002190static inline size_t rt6_nlmsg_size(void)
2191{
2192 return NLMSG_ALIGN(sizeof(struct rtmsg))
2193 + nla_total_size(16) /* RTA_SRC */
2194 + nla_total_size(16) /* RTA_DST */
2195 + nla_total_size(16) /* RTA_GATEWAY */
2196 + nla_total_size(16) /* RTA_PREFSRC */
2197 + nla_total_size(4) /* RTA_TABLE */
2198 + nla_total_size(4) /* RTA_IIF */
2199 + nla_total_size(4) /* RTA_OIF */
2200 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002201 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002202 + nla_total_size(sizeof(struct rta_cacheinfo));
2203}
2204
Brian Haley191cd582008-08-14 15:33:21 -07002205static int rt6_fill_node(struct net *net,
2206 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002207 struct in6_addr *dst, struct in6_addr *src,
2208 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002209 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210{
2211 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002212 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002213 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002214 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215
2216 if (prefix) { /* user wants prefix routes only */
2217 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2218 /* success since this is not a prefix route */
2219 return 1;
2220 }
2221 }
2222
Thomas Graf2d7202b2006-08-22 00:01:27 -07002223 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2224 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002225 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002226
2227 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 rtm->rtm_family = AF_INET6;
2229 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2230 rtm->rtm_src_len = rt->rt6i_src.plen;
2231 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002232 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002233 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002234 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002235 table = RT6_TABLE_UNSPEC;
2236 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002237 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 if (rt->rt6i_flags&RTF_REJECT)
2239 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002240 else if (rt->rt6i_flags&RTF_LOCAL)
2241 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2243 rtm->rtm_type = RTN_LOCAL;
2244 else
2245 rtm->rtm_type = RTN_UNICAST;
2246 rtm->rtm_flags = 0;
2247 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2248 rtm->rtm_protocol = rt->rt6i_protocol;
2249 if (rt->rt6i_flags&RTF_DYNAMIC)
2250 rtm->rtm_protocol = RTPROT_REDIRECT;
2251 else if (rt->rt6i_flags & RTF_ADDRCONF)
2252 rtm->rtm_protocol = RTPROT_KERNEL;
2253 else if (rt->rt6i_flags&RTF_DEFAULT)
2254 rtm->rtm_protocol = RTPROT_RA;
2255
2256 if (rt->rt6i_flags&RTF_CACHE)
2257 rtm->rtm_flags |= RTM_F_CLONED;
2258
2259 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002260 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002261 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002263 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264#ifdef CONFIG_IPV6_SUBTREES
2265 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002266 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002267 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002269 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002271 if (iif) {
2272#ifdef CONFIG_IPV6_MROUTE
2273 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002274 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002275 if (err <= 0) {
2276 if (!nowait) {
2277 if (err == 0)
2278 return 0;
2279 goto nla_put_failure;
2280 } else {
2281 if (err == -EMSGSIZE)
2282 goto nla_put_failure;
2283 }
2284 }
2285 } else
2286#endif
2287 NLA_PUT_U32(skb, RTA_IIF, iif);
2288 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002289 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002291 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002292 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002293 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002295
David S. Millerdefb3512010-12-08 21:16:57 -08002296 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002297 goto nla_put_failure;
2298
Changli Gaod8d1f302010-06-10 23:31:35 -07002299 if (rt->dst.neighbour)
2300 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002301
Changli Gaod8d1f302010-06-10 23:31:35 -07002302 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002303 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2304
2305 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002306
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002307 if (!(rt->rt6i_flags & RTF_EXPIRES))
2308 expires = 0;
2309 else if (rt->rt6i_expires - jiffies < INT_MAX)
2310 expires = rt->rt6i_expires - jiffies;
2311 else
2312 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002313
Changli Gaod8d1f302010-06-10 23:31:35 -07002314 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2315 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002316 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317
Thomas Graf2d7202b2006-08-22 00:01:27 -07002318 return nlmsg_end(skb, nlh);
2319
2320nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002321 nlmsg_cancel(skb, nlh);
2322 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323}
2324
Patrick McHardy1b43af52006-08-10 23:11:17 -07002325int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326{
2327 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2328 int prefix;
2329
Thomas Graf2d7202b2006-08-22 00:01:27 -07002330 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2331 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2333 } else
2334 prefix = 0;
2335
Brian Haley191cd582008-08-14 15:33:21 -07002336 return rt6_fill_node(arg->net,
2337 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002339 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340}
2341
Thomas Grafc127ea22007-03-22 11:58:32 -07002342static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002344 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002345 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002347 struct sk_buff *skb;
2348 struct rtmsg *rtm;
2349 struct flowi fl;
2350 int err, iif = 0;
2351
2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2353 if (err < 0)
2354 goto errout;
2355
2356 err = -EINVAL;
2357 memset(&fl, 0, sizeof(fl));
2358
2359 if (tb[RTA_SRC]) {
2360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2361 goto errout;
2362
2363 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2364 }
2365
2366 if (tb[RTA_DST]) {
2367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2368 goto errout;
2369
2370 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2371 }
2372
2373 if (tb[RTA_IIF])
2374 iif = nla_get_u32(tb[RTA_IIF]);
2375
2376 if (tb[RTA_OIF])
2377 fl.oif = nla_get_u32(tb[RTA_OIF]);
2378
2379 if (iif) {
2380 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002381 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002382 if (!dev) {
2383 err = -ENODEV;
2384 goto errout;
2385 }
2386 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387
2388 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002389 if (skb == NULL) {
2390 err = -ENOBUFS;
2391 goto errout;
2392 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393
2394 /* Reserve room for dummy headers, this skb can pass
2395 through good chunk of routing engine.
2396 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002397 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2399
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002400 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002401 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402
Brian Haley191cd582008-08-14 15:33:21 -07002403 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002405 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002407 kfree_skb(skb);
2408 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 }
2410
Daniel Lezcano55786892008-03-04 13:47:47 -08002411 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002412errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414}
2415
Thomas Graf86872cb2006-08-22 00:01:08 -07002416void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417{
2418 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002419 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002420 u32 seq;
2421 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002423 err = -ENOBUFS;
2424 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002425
Thomas Graf339bf982006-11-10 14:10:15 -08002426 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002427 if (skb == NULL)
2428 goto errout;
2429
Brian Haley191cd582008-08-14 15:33:21 -07002430 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002431 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002432 if (err < 0) {
2433 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2434 WARN_ON(err == -EMSGSIZE);
2435 kfree_skb(skb);
2436 goto errout;
2437 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002438 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2439 info->nlh, gfp_any());
2440 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002441errout:
2442 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002443 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444}
2445
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002446static int ip6_route_dev_notify(struct notifier_block *this,
2447 unsigned long event, void *data)
2448{
2449 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002450 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002451
2452 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002453 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002454 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2455#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002456 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002457 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002458 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002459 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2460#endif
2461 }
2462
2463 return NOTIFY_OK;
2464}
2465
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466/*
2467 * /proc
2468 */
2469
2470#ifdef CONFIG_PROC_FS
2471
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472struct rt6_proc_arg
2473{
2474 char *buffer;
2475 int offset;
2476 int length;
2477 int skip;
2478 int len;
2479};
2480
2481static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2482{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002483 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486
2487#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002490 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491#endif
2492
2493 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002494 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002496 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002498 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002499 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2500 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002501 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502 return 0;
2503}
2504
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002505static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002507 struct net *net = (struct net *)m->private;
2508 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002509 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510}
2511
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002512static int ipv6_route_open(struct inode *inode, struct file *file)
2513{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002514 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002515}
2516
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002517static const struct file_operations ipv6_route_proc_fops = {
2518 .owner = THIS_MODULE,
2519 .open = ipv6_route_open,
2520 .read = seq_read,
2521 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002522 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002523};
2524
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2526{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002527 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002529 net->ipv6.rt6_stats->fib_nodes,
2530 net->ipv6.rt6_stats->fib_route_nodes,
2531 net->ipv6.rt6_stats->fib_rt_alloc,
2532 net->ipv6.rt6_stats->fib_rt_entries,
2533 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002534 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002535 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536
2537 return 0;
2538}
2539
2540static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2541{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002542 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002543}
2544
Arjan van de Ven9a321442007-02-12 00:55:35 -08002545static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 .owner = THIS_MODULE,
2547 .open = rt6_stats_seq_open,
2548 .read = seq_read,
2549 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002550 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551};
2552#endif /* CONFIG_PROC_FS */
2553
2554#ifdef CONFIG_SYSCTL
2555
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002557int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 void __user *buffer, size_t *lenp, loff_t *ppos)
2559{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002560 struct net *net;
2561 int delay;
2562 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002564
2565 net = (struct net *)ctl->extra1;
2566 delay = net->ipv6.sysctl.flush_delay;
2567 proc_dointvec(ctl, write, buffer, lenp, ppos);
2568 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2569 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570}
2571
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002572ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002573 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002575 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002577 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002578 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 },
2580 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002582 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583 .maxlen = sizeof(int),
2584 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002585 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 },
2587 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002589 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 .maxlen = sizeof(int),
2591 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002592 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 },
2594 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002596 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 .maxlen = sizeof(int),
2598 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002599 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 },
2601 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002603 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 .maxlen = sizeof(int),
2605 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002606 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607 },
2608 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002610 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 .maxlen = sizeof(int),
2612 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002613 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 },
2615 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 .maxlen = sizeof(int),
2619 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002620 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 },
2622 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002624 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 .maxlen = sizeof(int),
2626 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002627 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 },
2629 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002631 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632 .maxlen = sizeof(int),
2633 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002634 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 },
2636 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002638 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 .maxlen = sizeof(int),
2640 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002641 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002643 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644};
2645
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002646struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002647{
2648 struct ctl_table *table;
2649
2650 table = kmemdup(ipv6_route_table_template,
2651 sizeof(ipv6_route_table_template),
2652 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002653
2654 if (table) {
2655 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002656 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002657 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002658 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2659 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2661 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2662 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2663 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2664 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002665 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002666 }
2667
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002668 return table;
2669}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670#endif
2671
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002672static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002673{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002674 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002675
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002676 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2677 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002678
Eric Dumazetfc66f952010-10-08 06:37:34 +00002679 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2680 goto out_ip6_dst_ops;
2681
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002682 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2683 sizeof(*net->ipv6.ip6_null_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002686 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002687 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002688 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Millerdefb3512010-12-08 21:16:57 -08002690 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002691
2692#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2694 sizeof(*net->ipv6.ip6_prohibit_entry),
2695 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002696 if (!net->ipv6.ip6_prohibit_entry)
2697 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002698 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002699 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002700 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Millerdefb3512010-12-08 21:16:57 -08002701 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002702
2703 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2704 sizeof(*net->ipv6.ip6_blk_hole_entry),
2705 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002706 if (!net->ipv6.ip6_blk_hole_entry)
2707 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002708 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002709 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002710 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Millerdefb3512010-12-08 21:16:57 -08002711 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002712#endif
2713
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002714 net->ipv6.sysctl.flush_delay = 0;
2715 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2716 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2717 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2718 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2719 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2720 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2721 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2722
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002723#ifdef CONFIG_PROC_FS
2724 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2725 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2726#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002727 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2728
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002729 ret = 0;
2730out:
2731 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002732
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002733#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2734out_ip6_prohibit_entry:
2735 kfree(net->ipv6.ip6_prohibit_entry);
2736out_ip6_null_entry:
2737 kfree(net->ipv6.ip6_null_entry);
2738#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002739out_ip6_dst_entries:
2740 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002741out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002742 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002743}
2744
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002745static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002746{
2747#ifdef CONFIG_PROC_FS
2748 proc_net_remove(net, "ipv6_route");
2749 proc_net_remove(net, "rt6_stats");
2750#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002751 kfree(net->ipv6.ip6_null_entry);
2752#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2753 kfree(net->ipv6.ip6_prohibit_entry);
2754 kfree(net->ipv6.ip6_blk_hole_entry);
2755#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002756 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002757}
2758
2759static struct pernet_operations ip6_route_net_ops = {
2760 .init = ip6_route_net_init,
2761 .exit = ip6_route_net_exit,
2762};
2763
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002764static struct notifier_block ip6_route_dev_notifier = {
2765 .notifier_call = ip6_route_dev_notify,
2766 .priority = 0,
2767};
2768
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002769int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002771 int ret;
2772
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002773 ret = -ENOMEM;
2774 ip6_dst_ops_template.kmem_cachep =
2775 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2776 SLAB_HWCACHE_ALIGN, NULL);
2777 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002778 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002779
Eric Dumazetfc66f952010-10-08 06:37:34 +00002780 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002781 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002782 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002783
Eric Dumazetfc66f952010-10-08 06:37:34 +00002784 ret = register_pernet_subsys(&ip6_route_net_ops);
2785 if (ret)
2786 goto out_dst_entries;
2787
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002788 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2789
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002790 /* Registering of the loopback is done before this portion of code,
2791 * the loopback reference in rt6_info will not be taken, do it
2792 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002793 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002794 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2795 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002796 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002797 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002798 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002799 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2800 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002801 ret = fib6_init();
2802 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002803 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002804
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002805 ret = xfrm6_init();
2806 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002807 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002808
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002809 ret = fib6_rules_init();
2810 if (ret)
2811 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002812
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002813 ret = -ENOBUFS;
2814 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2815 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2816 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2817 goto fib6_rules_init;
2818
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002819 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002820 if (ret)
2821 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002822
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002823out:
2824 return ret;
2825
2826fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002827 fib6_rules_cleanup();
2828xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002829 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002830out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002831 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002832out_register_subsys:
2833 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002834out_dst_entries:
2835 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002836out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002837 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002838 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002839}
2840
2841void ip6_route_cleanup(void)
2842{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002843 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002844 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002845 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002846 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002847 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002848 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002849 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002850}