blob: 1534508f6c68a3c4f010657e94051e06a7d727c4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Linus Torvalds1da177e2005-04-16 15:20:36 -070075static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080077static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080078static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070079static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800102 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800106 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800107 .default_mtu = ip6_default_mtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700113 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114};
115
David S. Miller14e50e52007-05-24 18:17:54 -0700116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800122 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700126};
127
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800128static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700134 .input = ip6_pkt_discard,
135 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700138 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
Thomas Graf101367c2006-08-04 03:39:02 -0700143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
David S. Miller6723ab52006-10-18 21:20:57 -0700145static int ip6_pkt_prohibit(struct sk_buff *skb);
146static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700147
Adrian Bunk280a34c2008-04-21 02:29:32 -0700148static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700149 .dst = {
150 .__refcnt = ATOMIC_INIT(1),
151 .__use = 1,
152 .obsolete = -1,
153 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700158 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800163static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700169 .input = dst_discard,
170 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700171 },
172 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700173 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700174 .rt6i_metric = ~(u32) 0,
175 .rt6i_ref = ATOMIC_INIT(1),
176};
177
178#endif
179
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800181static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800183 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184}
185
186static void ip6_dst_destroy(struct dst_entry *dst)
187{
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800190 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900195 }
David S. Millerb3419362010-11-30 12:27:11 -0800196 if (peer) {
197 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
198 rt->rt6i_peer = NULL;
199 inet_putpeer(peer);
200 }
201}
202
203void rt6_bind_peer(struct rt6_info *rt, int create)
204{
205 struct inet_peer *peer;
206
207 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
208 return;
209
210 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
211 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
212 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
216 int how)
217{
218 struct rt6_info *rt = (struct rt6_info *)dst;
219 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800220 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900221 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800223 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
224 struct inet6_dev *loopback_idev =
225 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 if (loopback_idev != NULL) {
227 rt->rt6i_idev = loopback_idev;
228 in6_dev_put(idev);
229 }
230 }
231}
232
233static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000235 return (rt->rt6i_flags & RTF_EXPIRES) &&
236 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237}
238
Thomas Grafc71099a2006-08-04 23:20:06 -0700239static inline int rt6_need_strict(struct in6_addr *daddr)
240{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000241 return ipv6_addr_type(daddr) &
242 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700243}
244
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700246 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 */
248
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800249static inline struct rt6_info *rt6_device_match(struct net *net,
250 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900251 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700253 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254{
255 struct rt6_info *local = NULL;
256 struct rt6_info *sprt;
257
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900258 if (!oif && ipv6_addr_any(saddr))
259 goto out;
260
Changli Gaod8d1f302010-06-10 23:31:35 -0700261 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900262 struct net_device *dev = sprt->rt6i_dev;
263
264 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 if (dev->ifindex == oif)
266 return sprt;
267 if (dev->flags & IFF_LOOPBACK) {
268 if (sprt->rt6i_idev == NULL ||
269 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700270 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900272 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 local->rt6i_idev->dev->ifindex == oif))
274 continue;
275 }
276 local = sprt;
277 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900278 } else {
279 if (ipv6_chk_addr(net, saddr, dev,
280 flags & RT6_LOOKUP_F_IFACE))
281 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900283 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900285 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 if (local)
287 return local;
288
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700289 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800290 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900292out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 return rt;
294}
295
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800296#ifdef CONFIG_IPV6_ROUTER_PREF
297static void rt6_probe(struct rt6_info *rt)
298{
299 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
300 /*
301 * Okay, this does not seem to be appropriate
302 * for now, however, we need to check if it
303 * is really so; aka Router Reachability Probing.
304 *
305 * Router Reachability Probe MUST be rate-limited
306 * to no more than one per minute.
307 */
308 if (!neigh || (neigh->nud_state & NUD_VALID))
309 return;
310 read_lock_bh(&neigh->lock);
311 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800312 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800313 struct in6_addr mcaddr;
314 struct in6_addr *target;
315
316 neigh->updated = jiffies;
317 read_unlock_bh(&neigh->lock);
318
319 target = (struct in6_addr *)&neigh->primary_key;
320 addrconf_addr_solict_mult(target, &mcaddr);
321 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
322 } else
323 read_unlock_bh(&neigh->lock);
324}
325#else
326static inline void rt6_probe(struct rt6_info *rt)
327{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800328}
329#endif
330
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800332 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700334static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800336 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700337 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800338 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700339 if ((dev->flags & IFF_LOOPBACK) &&
340 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
341 return 1;
342 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343}
344
Dave Jonesb6f99a22007-03-22 12:27:49 -0700345static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800347 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800348 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700349 if (rt->rt6i_flags & RTF_NONEXTHOP ||
350 !(rt->rt6i_flags & RTF_GATEWAY))
351 m = 1;
352 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353 read_lock_bh(&neigh->lock);
354 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700355 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800356#ifdef CONFIG_IPV6_ROUTER_PREF
357 else if (neigh->nud_state & NUD_FAILED)
358 m = 0;
359#endif
360 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800361 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800363 } else
364 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800365 return m;
366}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800368static int rt6_score_route(struct rt6_info *rt, int oif,
369 int strict)
370{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700371 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900372
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700373 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700374 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800375 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800376#ifdef CONFIG_IPV6_ROUTER_PREF
377 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
378#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700379 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800380 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800381 return -1;
382 return m;
383}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384
David S. Millerf11e6652007-03-24 20:36:25 -0700385static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
386 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800387{
David S. Millerf11e6652007-03-24 20:36:25 -0700388 int m;
389
390 if (rt6_check_expired(rt))
391 goto out;
392
393 m = rt6_score_route(rt, oif, strict);
394 if (m < 0)
395 goto out;
396
397 if (m > *mpri) {
398 if (strict & RT6_LOOKUP_F_REACHABLE)
399 rt6_probe(match);
400 *mpri = m;
401 match = rt;
402 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
403 rt6_probe(rt);
404 }
405
406out:
407 return match;
408}
409
410static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
411 struct rt6_info *rr_head,
412 u32 metric, int oif, int strict)
413{
414 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
David S. Millerf11e6652007-03-24 20:36:25 -0700417 match = NULL;
418 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700419 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700420 match = find_match(rt, oif, strict, &mpri, match);
421 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700422 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700423 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800424
David S. Millerf11e6652007-03-24 20:36:25 -0700425 return match;
426}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800427
David S. Millerf11e6652007-03-24 20:36:25 -0700428static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
429{
430 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800431 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
David S. Millerf11e6652007-03-24 20:36:25 -0700433 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800434 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
David S. Millerf11e6652007-03-24 20:36:25 -0700436 rt0 = fn->rr_ptr;
437 if (!rt0)
438 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
David S. Millerf11e6652007-03-24 20:36:25 -0700440 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800442 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700443 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700444 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700445
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800446 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700447 if (!next || next->rt6i_metric != rt0->rt6i_metric)
448 next = fn->leaf;
449
450 if (next != rt0)
451 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 }
453
David S. Millerf11e6652007-03-24 20:36:25 -0700454 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800455 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900457 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000458 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459}
460
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461#ifdef CONFIG_IPV6_ROUTE_INFO
462int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
463 struct in6_addr *gwaddr)
464{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900465 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800466 struct route_info *rinfo = (struct route_info *) opt;
467 struct in6_addr prefix_buf, *prefix;
468 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900469 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800470 struct rt6_info *rt;
471
472 if (len < sizeof(struct route_info)) {
473 return -EINVAL;
474 }
475
476 /* Sanity check for prefix_len and length */
477 if (rinfo->length > 3) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 128) {
480 return -EINVAL;
481 } else if (rinfo->prefix_len > 64) {
482 if (rinfo->length < 2) {
483 return -EINVAL;
484 }
485 } else if (rinfo->prefix_len > 0) {
486 if (rinfo->length < 1) {
487 return -EINVAL;
488 }
489 }
490
491 pref = rinfo->route_pref;
492 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000493 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800494
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900495 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800496
497 if (rinfo->length == 3)
498 prefix = (struct in6_addr *)rinfo->prefix;
499 else {
500 /* this function is safe */
501 ipv6_addr_prefix(&prefix_buf,
502 (struct in6_addr *)rinfo->prefix,
503 rinfo->prefix_len);
504 prefix = &prefix_buf;
505 }
506
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800507 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
508 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800509
510 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700511 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800512 rt = NULL;
513 }
514
515 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800516 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800517 pref);
518 else if (rt)
519 rt->rt6i_flags = RTF_ROUTEINFO |
520 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
521
522 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900523 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800524 rt->rt6i_flags &= ~RTF_EXPIRES;
525 } else {
526 rt->rt6i_expires = jiffies + HZ * lifetime;
527 rt->rt6i_flags |= RTF_EXPIRES;
528 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700529 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800530 }
531 return 0;
532}
533#endif
534
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800535#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800537 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700538 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700539 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700540 if (fn->fn_flags & RTN_TL_ROOT) \
541 goto out; \
542 pn = fn->parent; \
543 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800544 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700545 else \
546 fn = pn; \
547 if (fn->fn_flags & RTN_RTINFO) \
548 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700549 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700550 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700551} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700552
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800553static struct rt6_info *ip6_pol_route_lookup(struct net *net,
554 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556{
557 struct fib6_node *fn;
558 struct rt6_info *rt;
559
Thomas Grafc71099a2006-08-04 23:20:06 -0700560 read_lock_bh(&table->tb6_lock);
561 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
562restart:
563 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900564 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800565 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700566out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700567 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700568 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700569 return rt;
570
571}
572
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900573struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
574 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700575{
576 struct flowi fl = {
577 .oif = oif,
Changli Gao58116622010-11-12 18:43:55 +0000578 .fl6_dst = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700579 };
580 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700581 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700582
Thomas Grafadaa70b2006-10-13 15:01:03 -0700583 if (saddr) {
584 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
585 flags |= RT6_LOOKUP_F_HAS_SADDR;
586 }
587
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800588 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700589 if (dst->error == 0)
590 return (struct rt6_info *) dst;
591
592 dst_release(dst);
593
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 return NULL;
595}
596
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900597EXPORT_SYMBOL(rt6_lookup);
598
Thomas Grafc71099a2006-08-04 23:20:06 -0700599/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 It takes new route entry, the addition fails by any reason the
601 route is freed. In any case, if caller does not hold it, it may
602 be destroyed.
603 */
604
Thomas Graf86872cb2006-08-22 00:01:08 -0700605static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606{
607 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700608 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
Thomas Grafc71099a2006-08-04 23:20:06 -0700610 table = rt->rt6i_table;
611 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700612 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700613 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
615 return err;
616}
617
Thomas Graf40e22e82006-08-22 00:00:45 -0700618int ip6_ins_rt(struct rt6_info *rt)
619{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800620 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900621 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800622 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800623 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700624}
625
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800626static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
627 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 struct rt6_info *rt;
630
631 /*
632 * Clone the route.
633 */
634
635 rt = ip6_rt_copy(ort);
636
637 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800638 struct neighbour *neigh;
639 int attempts = !in_softirq();
640
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900641 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
642 if (rt->rt6i_dst.plen != 128 &&
643 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
644 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900646 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900648 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 rt->rt6i_dst.plen = 128;
650 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700651 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653#ifdef CONFIG_IPV6_SUBTREES
654 if (rt->rt6i_src.plen && saddr) {
655 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
656 rt->rt6i_src.plen = 128;
657 }
658#endif
659
David S. Miller14deae42009-01-04 16:04:39 -0800660 retry:
661 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
662 if (IS_ERR(neigh)) {
663 struct net *net = dev_net(rt->rt6i_dev);
664 int saved_rt_min_interval =
665 net->ipv6.sysctl.ip6_rt_gc_min_interval;
666 int saved_rt_elasticity =
667 net->ipv6.sysctl.ip6_rt_gc_elasticity;
668
669 if (attempts-- > 0) {
670 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
671 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
672
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000673 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800674
675 net->ipv6.sysctl.ip6_rt_gc_elasticity =
676 saved_rt_elasticity;
677 net->ipv6.sysctl.ip6_rt_gc_min_interval =
678 saved_rt_min_interval;
679 goto retry;
680 }
681
682 if (net_ratelimit())
683 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700684 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700685 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800686 return NULL;
687 }
688 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800690 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800692 return rt;
693}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800695static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
696{
697 struct rt6_info *rt = ip6_rt_copy(ort);
698 if (rt) {
699 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
700 rt->rt6i_dst.plen = 128;
701 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700702 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800703 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
704 }
705 return rt;
706}
707
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800708static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
709 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710{
711 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800712 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700713 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800715 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700716 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700718 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719
720relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700721 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800723restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
726restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700727 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800728
729 BACKTRACK(net, &fl->fl6_src);
730 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800731 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800732 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Changli Gaod8d1f302010-06-10 23:31:35 -0700734 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700735 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800736
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800737 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800738 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
David S. Millerd80bc0f2011-01-24 16:01:58 -0800739 else
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800740 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800741
Changli Gaod8d1f302010-06-10 23:31:35 -0700742 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800743 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800744
Changli Gaod8d1f302010-06-10 23:31:35 -0700745 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800746 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700747 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800757 * released someone could insert this route. Relookup.
758 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700759 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800760 goto relookup;
761
762out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700767 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700770 rt->dst.lastuse = jiffies;
771 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
773 return rt;
774}
775
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700777 struct flowi *fl, int flags)
778{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800779 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700780}
781
Thomas Grafc71099a2006-08-04 23:20:06 -0700782void ip6_route_input(struct sk_buff *skb)
783{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700784 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900785 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700786 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +0000789 .fl6_dst = iph->daddr,
790 .fl6_src = iph->saddr,
791 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900792 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700793 .proto = iph->nexthdr,
794 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700795
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800796 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700797 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700798
Eric Dumazetadf30902009-06-02 05:19:30 +0000799 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700800}
801
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800802static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700803 struct flowi *fl, int flags)
804{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800805 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700806}
807
Daniel Lezcano4591db42008-03-05 10:48:10 -0800808struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
809 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700810{
811 int flags = 0;
812
Brian Haley6057fd72010-05-28 23:02:35 -0700813 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700814 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700815
Thomas Grafadaa70b2006-10-13 15:01:03 -0700816 if (!ipv6_addr_any(&fl->fl6_src))
817 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000818 else if (sk)
819 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700820
Daniel Lezcano4591db42008-03-05 10:48:10 -0800821 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822}
823
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900824EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
David S. Miller14e50e52007-05-24 18:17:54 -0700826int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
827{
828 struct rt6_info *ort = (struct rt6_info *) *dstp;
829 struct rt6_info *rt = (struct rt6_info *)
830 dst_alloc(&ip6_dst_blackhole_ops);
831 struct dst_entry *new = NULL;
832
833 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700834 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700835
836 atomic_set(&new->__refcnt, 1);
837 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800838 new->input = dst_discard;
839 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700840
David S. Millerdefb3512010-12-08 21:16:57 -0800841 dst_copy_metrics(new, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -0700842 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700843 if (new->dev)
844 dev_hold(new->dev);
845 rt->rt6i_idev = ort->rt6i_idev;
846 if (rt->rt6i_idev)
847 in6_dev_hold(rt->rt6i_idev);
848 rt->rt6i_expires = 0;
849
850 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
851 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
852 rt->rt6i_metric = 0;
853
854 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
855#ifdef CONFIG_IPV6_SUBTREES
856 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
857#endif
858
859 dst_free(new);
860 }
861
862 dst_release(*dstp);
863 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000864 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700865}
866EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
867
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868/*
869 * Destination cache support functions
870 */
871
872static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
873{
874 struct rt6_info *rt;
875
876 rt = (struct rt6_info *) dst;
877
Herbert Xu10414442010-03-18 23:00:22 +0000878 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 return dst;
880
881 return NULL;
882}
883
884static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
885{
886 struct rt6_info *rt = (struct rt6_info *) dst;
887
888 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000889 if (rt->rt6i_flags & RTF_CACHE) {
890 if (rt6_check_expired(rt)) {
891 ip6_del_rt(rt);
892 dst = NULL;
893 }
894 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000896 dst = NULL;
897 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000899 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900}
901
902static void ip6_link_failure(struct sk_buff *skb)
903{
904 struct rt6_info *rt;
905
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000906 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
Eric Dumazetadf30902009-06-02 05:19:30 +0000908 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 if (rt) {
910 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700911 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 rt->rt6i_flags |= RTF_EXPIRES;
913 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
914 rt->rt6i_node->fn_sernum = -1;
915 }
916}
917
918static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
919{
920 struct rt6_info *rt6 = (struct rt6_info*)dst;
921
922 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
923 rt6->rt6i_flags |= RTF_MODIFIED;
924 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -0800925 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -0800927 features |= RTAX_FEATURE_ALLFRAG;
928 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 }
David S. Millerdefb3512010-12-08 21:16:57 -0800930 dst_metric_set(dst, RTAX_MTU, mtu);
Tom Tucker8d717402006-07-30 20:43:36 -0700931 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 }
933}
934
David S. Miller0dbaee32010-12-13 12:52:14 -0800935static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936{
David S. Miller0dbaee32010-12-13 12:52:14 -0800937 struct net_device *dev = dst->dev;
938 unsigned int mtu = dst_mtu(dst);
939 struct net *net = dev_net(dev);
940
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
942
Daniel Lezcano55786892008-03-04 13:47:47 -0800943 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
944 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
946 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900947 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
948 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
949 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 * rely only on pmtu discovery"
951 */
952 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
953 mtu = IPV6_MAXPLEN;
954 return mtu;
955}
956
David S. Millerd33e4552010-12-14 13:01:14 -0800957static unsigned int ip6_default_mtu(const struct dst_entry *dst)
958{
959 unsigned int mtu = IPV6_MIN_MTU;
960 struct inet6_dev *idev;
961
962 rcu_read_lock();
963 idev = __in6_dev_get(dst->dev);
964 if (idev)
965 mtu = idev->cnf.mtu6;
966 rcu_read_unlock();
967
968 return mtu;
969}
970
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800971static struct dst_entry *icmp6_dst_gc_list;
972static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700973
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800974struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900976 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977{
978 struct rt6_info *rt;
979 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900980 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981
982 if (unlikely(idev == NULL))
983 return NULL;
984
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000985 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 if (unlikely(rt == NULL)) {
987 in6_dev_put(idev);
988 goto out;
989 }
990
991 dev_hold(dev);
992 if (neigh)
993 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800994 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800996 if (IS_ERR(neigh))
997 neigh = NULL;
998 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999
1000 rt->rt6i_dev = dev;
1001 rt->rt6i_idev = idev;
1002 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -07001003 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001004 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Changli Gaod8d1f302010-06-10 23:31:35 -07001005 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
1007#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -07001008 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001009 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 : 0;
1011 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1012 rt->rt6i_dst.plen = 128;
1013#endif
1014
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001015 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001016 rt->dst.next = icmp6_dst_gc_list;
1017 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001018 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
Daniel Lezcano55786892008-03-04 13:47:47 -08001020 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021
1022out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001023 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024}
1025
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001026int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027{
1028 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001029 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
1031 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001032
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001033 spin_lock_bh(&icmp6_dst_lock);
1034 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001035
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 while ((dst = *pprev) != NULL) {
1037 if (!atomic_read(&dst->__refcnt)) {
1038 *pprev = dst->next;
1039 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 } else {
1041 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001042 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 }
1044 }
1045
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001046 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001047
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001048 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049}
1050
David S. Miller1e493d12008-09-10 17:27:15 -07001051static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1052 void *arg)
1053{
1054 struct dst_entry *dst, **pprev;
1055
1056 spin_lock_bh(&icmp6_dst_lock);
1057 pprev = &icmp6_dst_gc_list;
1058 while ((dst = *pprev) != NULL) {
1059 struct rt6_info *rt = (struct rt6_info *) dst;
1060 if (func(rt, arg)) {
1061 *pprev = dst->next;
1062 dst_free(dst);
1063 } else {
1064 pprev = &dst->next;
1065 }
1066 }
1067 spin_unlock_bh(&icmp6_dst_lock);
1068}
1069
Daniel Lezcano569d3642008-01-18 03:56:57 -08001070static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001073 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001074 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1075 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1076 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1077 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1078 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001079 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
Eric Dumazetfc66f952010-10-08 06:37:34 +00001081 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001082 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001083 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 goto out;
1085
Benjamin Thery6891a342008-03-04 13:49:47 -08001086 net->ipv6.ip6_rt_gc_expire++;
1087 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1088 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001089 entries = dst_entries_get_slow(ops);
1090 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001091 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001093 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001094 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095}
1096
1097/* Clean host part of a prefix. Not necessary in radix tree,
1098 but results in cleaner routing tables.
1099
1100 Remove it only when all the things will work!
1101 */
1102
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001103int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
David S. Miller5170ae82010-12-12 21:35:57 -08001105 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001106 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001107 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001108 struct inet6_dev *idev;
1109
1110 rcu_read_lock();
1111 idev = __in6_dev_get(dev);
1112 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001113 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001114 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001115 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001116 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 }
1118 return hoplimit;
1119}
David S. Millerabbf46a2010-12-12 21:14:46 -08001120EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
1122/*
1123 *
1124 */
1125
Thomas Graf86872cb2006-08-22 00:01:08 -07001126int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127{
1128 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001129 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 struct rt6_info *rt = NULL;
1131 struct net_device *dev = NULL;
1132 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001133 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 int addr_type;
1135
Thomas Graf86872cb2006-08-22 00:01:08 -07001136 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 return -EINVAL;
1138#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001139 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 return -EINVAL;
1141#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001142 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001144 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 if (!dev)
1146 goto out;
1147 idev = in6_dev_get(dev);
1148 if (!idev)
1149 goto out;
1150 }
1151
Thomas Graf86872cb2006-08-22 00:01:08 -07001152 if (cfg->fc_metric == 0)
1153 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
Daniel Lezcano55786892008-03-04 13:47:47 -08001155 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001156 if (table == NULL) {
1157 err = -ENOBUFS;
1158 goto out;
1159 }
1160
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001161 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 if (rt == NULL) {
1164 err = -ENOMEM;
1165 goto out;
1166 }
1167
Changli Gaod8d1f302010-06-10 23:31:35 -07001168 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001169 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1170 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1171 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
Thomas Graf86872cb2006-08-22 00:01:08 -07001173 if (cfg->fc_protocol == RTPROT_UNSPEC)
1174 cfg->fc_protocol = RTPROT_BOOT;
1175 rt->rt6i_protocol = cfg->fc_protocol;
1176
1177 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178
1179 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001180 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001181 else if (cfg->fc_flags & RTF_LOCAL)
1182 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001184 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
Changli Gaod8d1f302010-06-10 23:31:35 -07001186 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1189 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001191 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
1193#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001194 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1195 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196#endif
1197
Thomas Graf86872cb2006-08-22 00:01:08 -07001198 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200 /* We cannot add true routes via loopback here,
1201 they would result in kernel looping; promote them to reject routes
1202 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001203 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001204 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1205 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001207 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 if (dev) {
1209 dev_put(dev);
1210 in6_dev_put(idev);
1211 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001212 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 dev_hold(dev);
1214 idev = in6_dev_get(dev);
1215 if (!idev) {
1216 err = -ENODEV;
1217 goto out;
1218 }
1219 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001220 rt->dst.output = ip6_pkt_discard_out;
1221 rt->dst.input = ip6_pkt_discard;
1222 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1224 goto install_route;
1225 }
1226
Thomas Graf86872cb2006-08-22 00:01:08 -07001227 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 struct in6_addr *gw_addr;
1229 int gwa_type;
1230
Thomas Graf86872cb2006-08-22 00:01:08 -07001231 gw_addr = &cfg->fc_gateway;
1232 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 gwa_type = ipv6_addr_type(gw_addr);
1234
1235 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1236 struct rt6_info *grt;
1237
1238 /* IPv6 strictly inhibits using not link-local
1239 addresses as nexthop address.
1240 Otherwise, router will not able to send redirects.
1241 It is very good, but in some (rare!) circumstances
1242 (SIT, PtP, NBMA NOARP links) it is handy to allow
1243 some exceptions. --ANK
1244 */
1245 err = -EINVAL;
1246 if (!(gwa_type&IPV6_ADDR_UNICAST))
1247 goto out;
1248
Daniel Lezcano55786892008-03-04 13:47:47 -08001249 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
1251 err = -EHOSTUNREACH;
1252 if (grt == NULL)
1253 goto out;
1254 if (dev) {
1255 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001256 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 goto out;
1258 }
1259 } else {
1260 dev = grt->rt6i_dev;
1261 idev = grt->rt6i_idev;
1262 dev_hold(dev);
1263 in6_dev_hold(grt->rt6i_idev);
1264 }
1265 if (!(grt->rt6i_flags&RTF_GATEWAY))
1266 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001267 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
1269 if (err)
1270 goto out;
1271 }
1272 err = -EINVAL;
1273 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1274 goto out;
1275 }
1276
1277 err = -ENODEV;
1278 if (dev == NULL)
1279 goto out;
1280
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1283 if (IS_ERR(rt->rt6i_nexthop)) {
1284 err = PTR_ERR(rt->rt6i_nexthop);
1285 rt->rt6i_nexthop = NULL;
1286 goto out;
1287 }
1288 }
1289
Thomas Graf86872cb2006-08-22 00:01:08 -07001290 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291
1292install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001293 if (cfg->fc_mx) {
1294 struct nlattr *nla;
1295 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296
Thomas Graf86872cb2006-08-22 00:01:08 -07001297 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001298 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001299
1300 if (type) {
1301 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 err = -EINVAL;
1303 goto out;
1304 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001305
David S. Millerdefb3512010-12-08 21:16:57 -08001306 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 }
1309 }
1310
Changli Gaod8d1f302010-06-10 23:31:35 -07001311 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001313 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001314
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001315 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001316
Thomas Graf86872cb2006-08-22 00:01:08 -07001317 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
1319out:
1320 if (dev)
1321 dev_put(dev);
1322 if (idev)
1323 in6_dev_put(idev);
1324 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001325 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 return err;
1327}
1328
Thomas Graf86872cb2006-08-22 00:01:08 -07001329static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330{
1331 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001332 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001333 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001335 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001336 return -ENOENT;
1337
Thomas Grafc71099a2006-08-04 23:20:06 -07001338 table = rt->rt6i_table;
1339 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340
Thomas Graf86872cb2006-08-22 00:01:08 -07001341 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001342 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Thomas Grafc71099a2006-08-04 23:20:06 -07001344 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
1346 return err;
1347}
1348
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001349int ip6_del_rt(struct rt6_info *rt)
1350{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001351 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001352 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001353 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001354 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001355}
1356
Thomas Graf86872cb2006-08-22 00:01:08 -07001357static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358{
Thomas Grafc71099a2006-08-04 23:20:06 -07001359 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 struct fib6_node *fn;
1361 struct rt6_info *rt;
1362 int err = -ESRCH;
1363
Daniel Lezcano55786892008-03-04 13:47:47 -08001364 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001365 if (table == NULL)
1366 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
Thomas Grafc71099a2006-08-04 23:20:06 -07001368 read_lock_bh(&table->tb6_lock);
1369
1370 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001371 &cfg->fc_dst, cfg->fc_dst_len,
1372 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001373
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001375 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001376 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001378 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001380 if (cfg->fc_flags & RTF_GATEWAY &&
1381 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001383 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001385 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001386 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
Thomas Graf86872cb2006-08-22 00:01:08 -07001388 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 }
1390 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001391 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
1393 return err;
1394}
1395
1396/*
1397 * Handle redirects
1398 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001399struct ip6rd_flowi {
1400 struct flowi fl;
1401 struct in6_addr gateway;
1402};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001404static struct rt6_info *__ip6_route_redirect(struct net *net,
1405 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001406 struct flowi *fl,
1407 int flags)
1408{
1409 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1410 struct rt6_info *rt;
1411 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001412
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001414 * Get the "current" route for this destination and
1415 * check if the redirect has come from approriate router.
1416 *
1417 * RFC 2461 specifies that redirects should only be
1418 * accepted if they come from the nexthop to the target.
1419 * Due to the way the routes are chosen, this notion
1420 * is a bit fuzzy and one might need to check all possible
1421 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423
Thomas Grafc71099a2006-08-04 23:20:06 -07001424 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001425 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001426restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001427 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001428 /*
1429 * Current route is on-link; redirect is always invalid.
1430 *
1431 * Seems, previous statement is not true. It could
1432 * be node, which looks for us as on-link (f.e. proxy ndisc)
1433 * But then router serving it might decide, that we should
1434 * know truth 8)8) --ANK (980726).
1435 */
1436 if (rt6_check_expired(rt))
1437 continue;
1438 if (!(rt->rt6i_flags & RTF_GATEWAY))
1439 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001440 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001441 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001442 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001443 continue;
1444 break;
1445 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001446
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001447 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001448 rt = net->ipv6.ip6_null_entry;
1449 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001450out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001451 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001452
1453 read_unlock_bh(&table->tb6_lock);
1454
1455 return rt;
1456};
1457
1458static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1459 struct in6_addr *src,
1460 struct in6_addr *gateway,
1461 struct net_device *dev)
1462{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001463 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001464 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001465 struct ip6rd_flowi rdfl = {
1466 .fl = {
1467 .oif = dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +00001468 .fl6_dst = *dest,
1469 .fl6_src = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001470 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001471 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001472
Brian Haley86c36ce2009-10-07 13:58:01 -07001473 ipv6_addr_copy(&rdfl.gateway, gateway);
1474
Thomas Grafadaa70b2006-10-13 15:01:03 -07001475 if (rt6_need_strict(dest))
1476 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001477
Daniel Lezcano55786892008-03-04 13:47:47 -08001478 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001479 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001480}
1481
1482void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1483 struct in6_addr *saddr,
1484 struct neighbour *neigh, u8 *lladdr, int on_link)
1485{
1486 struct rt6_info *rt, *nrt = NULL;
1487 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001488 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001489
1490 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1491
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001492 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 if (net_ratelimit())
1494 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1495 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001496 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 }
1498
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 /*
1500 * We have finally decided to accept it.
1501 */
1502
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001503 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1505 NEIGH_UPDATE_F_OVERRIDE|
1506 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1507 NEIGH_UPDATE_F_ISROUTER))
1508 );
1509
1510 /*
1511 * Redirect received -> path was valid.
1512 * Look, redirects are sent only in response to data packets,
1513 * so that this nexthop apparently is reachable. --ANK
1514 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001515 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516
1517 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001518 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 goto out;
1520
1521 nrt = ip6_rt_copy(rt);
1522 if (nrt == NULL)
1523 goto out;
1524
1525 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1526 if (on_link)
1527 nrt->rt6i_flags &= ~RTF_GATEWAY;
1528
1529 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1530 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001531 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532
1533 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1534 nrt->rt6i_nexthop = neigh_clone(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
Thomas Graf40e22e82006-08-22 00:00:45 -07001536 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 goto out;
1538
Changli Gaod8d1f302010-06-10 23:31:35 -07001539 netevent.old = &rt->dst;
1540 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001541 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1542
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001544 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 return;
1546 }
1547
1548out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001549 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550}
1551
1552/*
1553 * Handle ICMP "packet too big" messages
1554 * i.e. Path MTU discovery
1555 */
1556
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001557static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1558 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559{
1560 struct rt6_info *rt, *nrt;
1561 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001562again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001563 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 if (rt == NULL)
1565 return;
1566
Andrey Vagind3052b52010-12-11 15:20:11 +00001567 if (rt6_check_expired(rt)) {
1568 ip6_del_rt(rt);
1569 goto again;
1570 }
1571
Changli Gaod8d1f302010-06-10 23:31:35 -07001572 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 goto out;
1574
1575 if (pmtu < IPV6_MIN_MTU) {
1576 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1581 */
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1584 }
1585
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1589 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001590 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1596 */
1597 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001598 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1599 if (allfrag) {
1600 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1601 features |= RTAX_FEATURE_ALLFRAG;
1602 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1603 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001604 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out;
1607 }
1608
1609 /* Network route.
1610 Two cases are possible:
1611 1. It is connected route. Action: COW
1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001615 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001616 else
1617 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001618
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001619 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001620 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1621 if (allfrag) {
1622 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1623 features |= RTAX_FEATURE_ALLFRAG;
1624 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1625 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001626
1627 /* According to RFC 1981, detecting PMTU increase shouldn't be
1628 * happened within 5 mins, the recommended timer is 10 mins.
1629 * Here this route expiration time is set to ip6_rt_mtu_expires
1630 * which is 10 mins. After 10 mins the decreased pmtu is expired
1631 * and detecting PMTU increase will be automatically happened.
1632 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001633 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001634 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1635
Thomas Graf40e22e82006-08-22 00:00:45 -07001636 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001639 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640}
1641
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001642void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1643 struct net_device *dev, u32 pmtu)
1644{
1645 struct net *net = dev_net(dev);
1646
1647 /*
1648 * RFC 1981 states that a node "MUST reduce the size of the packets it
1649 * is sending along the path" that caused the Packet Too Big message.
1650 * Since it's not possible in the general case to determine which
1651 * interface was used to send the original packet, we update the MTU
1652 * on the interface that will be used to send future packets. We also
1653 * update the MTU on the interface that received the Packet Too Big in
1654 * case the original packet was forced out that interface with
1655 * SO_BINDTODEVICE or similar. This is the next best thing to the
1656 * correct behaviour, which would be to update the MTU on all
1657 * interfaces.
1658 */
1659 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1660 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1661}
1662
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663/*
1664 * Misc support functions
1665 */
1666
1667static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1668{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001669 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001670 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671
1672 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001673 rt->dst.input = ort->dst.input;
1674 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675
David S. Millerdefb3512010-12-08 21:16:57 -08001676 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001677 rt->dst.error = ort->dst.error;
1678 rt->dst.dev = ort->dst.dev;
1679 if (rt->dst.dev)
1680 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 rt->rt6i_idev = ort->rt6i_idev;
1682 if (rt->rt6i_idev)
1683 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001684 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 rt->rt6i_expires = 0;
1686
1687 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1688 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1689 rt->rt6i_metric = 0;
1690
1691 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1692#ifdef CONFIG_IPV6_SUBTREES
1693 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1694#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001695 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 }
1697 return rt;
1698}
1699
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001700#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001701static struct rt6_info *rt6_get_route_info(struct net *net,
1702 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001703 struct in6_addr *gwaddr, int ifindex)
1704{
1705 struct fib6_node *fn;
1706 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001707 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001708
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001709 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001710 if (table == NULL)
1711 return NULL;
1712
1713 write_lock_bh(&table->tb6_lock);
1714 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001715 if (!fn)
1716 goto out;
1717
Changli Gaod8d1f302010-06-10 23:31:35 -07001718 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001719 if (rt->rt6i_dev->ifindex != ifindex)
1720 continue;
1721 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1722 continue;
1723 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1724 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001725 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001726 break;
1727 }
1728out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001729 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001730 return rt;
1731}
1732
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001733static struct rt6_info *rt6_add_route_info(struct net *net,
1734 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001735 struct in6_addr *gwaddr, int ifindex,
1736 unsigned pref)
1737{
Thomas Graf86872cb2006-08-22 00:01:08 -07001738 struct fib6_config cfg = {
1739 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001740 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001741 .fc_ifindex = ifindex,
1742 .fc_dst_len = prefixlen,
1743 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1744 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001745 .fc_nlinfo.pid = 0,
1746 .fc_nlinfo.nlh = NULL,
1747 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001748 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001749
Thomas Graf86872cb2006-08-22 00:01:08 -07001750 ipv6_addr_copy(&cfg.fc_dst, prefix);
1751 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1752
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001753 /* We should treat it as a default route if prefix length is 0. */
1754 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001755 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001756
Thomas Graf86872cb2006-08-22 00:01:08 -07001757 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001758
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001759 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001760}
1761#endif
1762
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001764{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001766 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001768 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001769 if (table == NULL)
1770 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771
Thomas Grafc71099a2006-08-04 23:20:06 -07001772 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001773 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001775 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1777 break;
1778 }
1779 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001780 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001781 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 return rt;
1783}
1784
1785struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001786 struct net_device *dev,
1787 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788{
Thomas Graf86872cb2006-08-22 00:01:08 -07001789 struct fib6_config cfg = {
1790 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001791 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001792 .fc_ifindex = dev->ifindex,
1793 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1794 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001795 .fc_nlinfo.pid = 0,
1796 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001797 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001798 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799
Thomas Graf86872cb2006-08-22 00:01:08 -07001800 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
Thomas Graf86872cb2006-08-22 00:01:08 -07001802 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 return rt6_get_dflt_router(gwaddr, dev);
1805}
1806
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001807void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808{
1809 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001810 struct fib6_table *table;
1811
1812 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001813 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001814 if (table == NULL)
1815 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816
1817restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001818 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001819 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001821 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001822 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001823 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 goto restart;
1825 }
1826 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001827 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828}
1829
Daniel Lezcano55786892008-03-04 13:47:47 -08001830static void rtmsg_to_fib6_config(struct net *net,
1831 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001832 struct fib6_config *cfg)
1833{
1834 memset(cfg, 0, sizeof(*cfg));
1835
1836 cfg->fc_table = RT6_TABLE_MAIN;
1837 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1838 cfg->fc_metric = rtmsg->rtmsg_metric;
1839 cfg->fc_expires = rtmsg->rtmsg_info;
1840 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1841 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1842 cfg->fc_flags = rtmsg->rtmsg_flags;
1843
Daniel Lezcano55786892008-03-04 13:47:47 -08001844 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001845
Thomas Graf86872cb2006-08-22 00:01:08 -07001846 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1847 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1848 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1849}
1850
Daniel Lezcano55786892008-03-04 13:47:47 -08001851int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852{
Thomas Graf86872cb2006-08-22 00:01:08 -07001853 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 struct in6_rtmsg rtmsg;
1855 int err;
1856
1857 switch(cmd) {
1858 case SIOCADDRT: /* Add a route */
1859 case SIOCDELRT: /* Delete a route */
1860 if (!capable(CAP_NET_ADMIN))
1861 return -EPERM;
1862 err = copy_from_user(&rtmsg, arg,
1863 sizeof(struct in6_rtmsg));
1864 if (err)
1865 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001866
Daniel Lezcano55786892008-03-04 13:47:47 -08001867 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001868
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 rtnl_lock();
1870 switch (cmd) {
1871 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001872 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 break;
1874 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001875 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 break;
1877 default:
1878 err = -EINVAL;
1879 }
1880 rtnl_unlock();
1881
1882 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001883 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884
1885 return -EINVAL;
1886}
1887
1888/*
1889 * Drop the packet on the floor
1890 */
1891
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001892static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001894 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001895 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001896 switch (ipstats_mib_noroutes) {
1897 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001898 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001899 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001900 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1901 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001902 break;
1903 }
1904 /* FALLTHROUGH */
1905 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001908 break;
1909 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001910 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 kfree_skb(skb);
1912 return 0;
1913}
1914
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001915static int ip6_pkt_discard(struct sk_buff *skb)
1916{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001917 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001918}
1919
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001920static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921{
Eric Dumazetadf30902009-06-02 05:19:30 +00001922 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924}
1925
David S. Miller6723ab52006-10-18 21:20:57 -07001926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1927
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001928static int ip6_pkt_prohibit(struct sk_buff *skb)
1929{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001930 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001931}
1932
1933static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1934{
Eric Dumazetadf30902009-06-02 05:19:30 +00001935 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001937}
1938
David S. Miller6723ab52006-10-18 21:20:57 -07001939#endif
1940
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941/*
1942 * Allocate a dst for local (unicast / anycast) address.
1943 */
1944
1945struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1946 const struct in6_addr *addr,
1947 int anycast)
1948{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001949 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001950 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001951 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952
Ben Greear40385652010-11-08 12:33:48 +00001953 if (rt == NULL) {
1954 if (net_ratelimit())
1955 pr_warning("IPv6: Maximum number of routes reached,"
1956 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00001958 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959
Daniel Lezcano55786892008-03-04 13:47:47 -08001960 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 in6_dev_hold(idev);
1962
Changli Gaod8d1f302010-06-10 23:31:35 -07001963 rt->dst.flags = DST_HOST;
1964 rt->dst.input = ip6_input;
1965 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001966 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 rt->rt6i_idev = idev;
David S. Millerdefb3512010-12-08 21:16:57 -08001968 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
Changli Gaod8d1f302010-06-10 23:31:35 -07001969 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970
1971 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001972 if (anycast)
1973 rt->rt6i_flags |= RTF_ANYCAST;
1974 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001976 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1977 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001978 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08001979
1980 /* We are casting this because that is the return
1981 * value type. But an errno encoded pointer is the
1982 * same regardless of the underlying pointer type,
1983 * and that's what we are returning. So this is OK.
1984 */
1985 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986 }
David S. Miller14deae42009-01-04 16:04:39 -08001987 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988
1989 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1990 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001991 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992
Changli Gaod8d1f302010-06-10 23:31:35 -07001993 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
1995 return rt;
1996}
1997
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001998struct arg_dev_net {
1999 struct net_device *dev;
2000 struct net *net;
2001};
2002
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003static int fib6_ifdown(struct rt6_info *rt, void *arg)
2004{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002005 const struct arg_dev_net *adn = arg;
2006 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002007
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002008 if ((rt->rt6i_dev == dev || dev == NULL) &&
2009 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 RT6_TRACE("deleted by ifdown %p\n", rt);
2011 return -1;
2012 }
2013 return 0;
2014}
2015
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002016void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002018 struct arg_dev_net adn = {
2019 .dev = dev,
2020 .net = net,
2021 };
2022
2023 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002024 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025}
2026
2027struct rt6_mtu_change_arg
2028{
2029 struct net_device *dev;
2030 unsigned mtu;
2031};
2032
2033static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2034{
2035 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2036 struct inet6_dev *idev;
2037
2038 /* In IPv6 pmtu discovery is not optional,
2039 so that RTAX_MTU lock cannot disable it.
2040 We still use this lock to block changes
2041 caused by addrconf/ndisc.
2042 */
2043
2044 idev = __in6_dev_get(arg->dev);
2045 if (idev == NULL)
2046 return 0;
2047
2048 /* For administrative MTU increase, there is no way to discover
2049 IPv6 PMTU increase, so PMTU increase should be updated here.
2050 Since RFC 1981 doesn't include administrative MTU increase
2051 update PMTU increase is a MUST. (i.e. jumbo frame)
2052 */
2053 /*
2054 If new MTU is less than route PMTU, this new MTU will be the
2055 lowest MTU in the path, update the route PMTU to reflect PMTU
2056 decreases; if new MTU is greater than route PMTU, and the
2057 old MTU is the lowest MTU in the path, update the route PMTU
2058 to reflect the increase. In this case if the other nodes' MTU
2059 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2060 PMTU discouvery.
2061 */
2062 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002063 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2064 (dst_mtu(&rt->dst) >= arg->mtu ||
2065 (dst_mtu(&rt->dst) < arg->mtu &&
2066 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002067 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002068 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 return 0;
2070}
2071
2072void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2073{
Thomas Grafc71099a2006-08-04 23:20:06 -07002074 struct rt6_mtu_change_arg arg = {
2075 .dev = dev,
2076 .mtu = mtu,
2077 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002079 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080}
2081
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002082static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002083 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002084 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002085 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002086 [RTA_PRIORITY] = { .type = NLA_U32 },
2087 [RTA_METRICS] = { .type = NLA_NESTED },
2088};
2089
2090static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2091 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092{
Thomas Graf86872cb2006-08-22 00:01:08 -07002093 struct rtmsg *rtm;
2094 struct nlattr *tb[RTA_MAX+1];
2095 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096
Thomas Graf86872cb2006-08-22 00:01:08 -07002097 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2098 if (err < 0)
2099 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100
Thomas Graf86872cb2006-08-22 00:01:08 -07002101 err = -EINVAL;
2102 rtm = nlmsg_data(nlh);
2103 memset(cfg, 0, sizeof(*cfg));
2104
2105 cfg->fc_table = rtm->rtm_table;
2106 cfg->fc_dst_len = rtm->rtm_dst_len;
2107 cfg->fc_src_len = rtm->rtm_src_len;
2108 cfg->fc_flags = RTF_UP;
2109 cfg->fc_protocol = rtm->rtm_protocol;
2110
2111 if (rtm->rtm_type == RTN_UNREACHABLE)
2112 cfg->fc_flags |= RTF_REJECT;
2113
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002114 if (rtm->rtm_type == RTN_LOCAL)
2115 cfg->fc_flags |= RTF_LOCAL;
2116
Thomas Graf86872cb2006-08-22 00:01:08 -07002117 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2118 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002119 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002120
2121 if (tb[RTA_GATEWAY]) {
2122 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2123 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002125
2126 if (tb[RTA_DST]) {
2127 int plen = (rtm->rtm_dst_len + 7) >> 3;
2128
2129 if (nla_len(tb[RTA_DST]) < plen)
2130 goto errout;
2131
2132 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002134
2135 if (tb[RTA_SRC]) {
2136 int plen = (rtm->rtm_src_len + 7) >> 3;
2137
2138 if (nla_len(tb[RTA_SRC]) < plen)
2139 goto errout;
2140
2141 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002143
2144 if (tb[RTA_OIF])
2145 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2146
2147 if (tb[RTA_PRIORITY])
2148 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2149
2150 if (tb[RTA_METRICS]) {
2151 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2152 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002154
2155 if (tb[RTA_TABLE])
2156 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2157
2158 err = 0;
2159errout:
2160 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161}
2162
Thomas Grafc127ea22007-03-22 11:58:32 -07002163static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164{
Thomas Graf86872cb2006-08-22 00:01:08 -07002165 struct fib6_config cfg;
2166 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
Thomas Graf86872cb2006-08-22 00:01:08 -07002168 err = rtm_to_fib6_config(skb, nlh, &cfg);
2169 if (err < 0)
2170 return err;
2171
2172 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173}
2174
Thomas Grafc127ea22007-03-22 11:58:32 -07002175static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176{
Thomas Graf86872cb2006-08-22 00:01:08 -07002177 struct fib6_config cfg;
2178 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179
Thomas Graf86872cb2006-08-22 00:01:08 -07002180 err = rtm_to_fib6_config(skb, nlh, &cfg);
2181 if (err < 0)
2182 return err;
2183
2184 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185}
2186
Thomas Graf339bf982006-11-10 14:10:15 -08002187static inline size_t rt6_nlmsg_size(void)
2188{
2189 return NLMSG_ALIGN(sizeof(struct rtmsg))
2190 + nla_total_size(16) /* RTA_SRC */
2191 + nla_total_size(16) /* RTA_DST */
2192 + nla_total_size(16) /* RTA_GATEWAY */
2193 + nla_total_size(16) /* RTA_PREFSRC */
2194 + nla_total_size(4) /* RTA_TABLE */
2195 + nla_total_size(4) /* RTA_IIF */
2196 + nla_total_size(4) /* RTA_OIF */
2197 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002198 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002199 + nla_total_size(sizeof(struct rta_cacheinfo));
2200}
2201
Brian Haley191cd582008-08-14 15:33:21 -07002202static int rt6_fill_node(struct net *net,
2203 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002204 struct in6_addr *dst, struct in6_addr *src,
2205 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002206 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207{
2208 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002209 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002210 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002211 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212
2213 if (prefix) { /* user wants prefix routes only */
2214 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2215 /* success since this is not a prefix route */
2216 return 1;
2217 }
2218 }
2219
Thomas Graf2d7202b2006-08-22 00:01:27 -07002220 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2221 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002222 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002223
2224 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 rtm->rtm_family = AF_INET6;
2226 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2227 rtm->rtm_src_len = rt->rt6i_src.plen;
2228 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002229 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002230 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002231 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002232 table = RT6_TABLE_UNSPEC;
2233 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002234 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 if (rt->rt6i_flags&RTF_REJECT)
2236 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002237 else if (rt->rt6i_flags&RTF_LOCAL)
2238 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2240 rtm->rtm_type = RTN_LOCAL;
2241 else
2242 rtm->rtm_type = RTN_UNICAST;
2243 rtm->rtm_flags = 0;
2244 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2245 rtm->rtm_protocol = rt->rt6i_protocol;
2246 if (rt->rt6i_flags&RTF_DYNAMIC)
2247 rtm->rtm_protocol = RTPROT_REDIRECT;
2248 else if (rt->rt6i_flags & RTF_ADDRCONF)
2249 rtm->rtm_protocol = RTPROT_KERNEL;
2250 else if (rt->rt6i_flags&RTF_DEFAULT)
2251 rtm->rtm_protocol = RTPROT_RA;
2252
2253 if (rt->rt6i_flags&RTF_CACHE)
2254 rtm->rtm_flags |= RTM_F_CLONED;
2255
2256 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002257 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002258 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002260 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261#ifdef CONFIG_IPV6_SUBTREES
2262 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002263 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002264 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002266 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002268 if (iif) {
2269#ifdef CONFIG_IPV6_MROUTE
2270 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002271 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002272 if (err <= 0) {
2273 if (!nowait) {
2274 if (err == 0)
2275 return 0;
2276 goto nla_put_failure;
2277 } else {
2278 if (err == -EMSGSIZE)
2279 goto nla_put_failure;
2280 }
2281 }
2282 } else
2283#endif
2284 NLA_PUT_U32(skb, RTA_IIF, iif);
2285 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002286 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002288 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002289 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002290 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002292
David S. Millerdefb3512010-12-08 21:16:57 -08002293 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002294 goto nla_put_failure;
2295
Changli Gaod8d1f302010-06-10 23:31:35 -07002296 if (rt->dst.neighbour)
2297 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002298
Changli Gaod8d1f302010-06-10 23:31:35 -07002299 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002300 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2301
2302 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002303
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002304 if (!(rt->rt6i_flags & RTF_EXPIRES))
2305 expires = 0;
2306 else if (rt->rt6i_expires - jiffies < INT_MAX)
2307 expires = rt->rt6i_expires - jiffies;
2308 else
2309 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002310
Changli Gaod8d1f302010-06-10 23:31:35 -07002311 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2312 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002313 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314
Thomas Graf2d7202b2006-08-22 00:01:27 -07002315 return nlmsg_end(skb, nlh);
2316
2317nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002318 nlmsg_cancel(skb, nlh);
2319 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320}
2321
Patrick McHardy1b43af52006-08-10 23:11:17 -07002322int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323{
2324 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2325 int prefix;
2326
Thomas Graf2d7202b2006-08-22 00:01:27 -07002327 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2328 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2330 } else
2331 prefix = 0;
2332
Brian Haley191cd582008-08-14 15:33:21 -07002333 return rt6_fill_node(arg->net,
2334 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002336 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337}
2338
Thomas Grafc127ea22007-03-22 11:58:32 -07002339static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002341 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002342 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002344 struct sk_buff *skb;
2345 struct rtmsg *rtm;
2346 struct flowi fl;
2347 int err, iif = 0;
2348
2349 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2350 if (err < 0)
2351 goto errout;
2352
2353 err = -EINVAL;
2354 memset(&fl, 0, sizeof(fl));
2355
2356 if (tb[RTA_SRC]) {
2357 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2358 goto errout;
2359
2360 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2361 }
2362
2363 if (tb[RTA_DST]) {
2364 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2365 goto errout;
2366
2367 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2368 }
2369
2370 if (tb[RTA_IIF])
2371 iif = nla_get_u32(tb[RTA_IIF]);
2372
2373 if (tb[RTA_OIF])
2374 fl.oif = nla_get_u32(tb[RTA_OIF]);
2375
2376 if (iif) {
2377 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002378 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002379 if (!dev) {
2380 err = -ENODEV;
2381 goto errout;
2382 }
2383 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384
2385 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002386 if (skb == NULL) {
2387 err = -ENOBUFS;
2388 goto errout;
2389 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390
2391 /* Reserve room for dummy headers, this skb can pass
2392 through good chunk of routing engine.
2393 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002394 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2396
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002397 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002398 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399
Brian Haley191cd582008-08-14 15:33:21 -07002400 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002402 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002404 kfree_skb(skb);
2405 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 }
2407
Daniel Lezcano55786892008-03-04 13:47:47 -08002408 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002409errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411}
2412
Thomas Graf86872cb2006-08-22 00:01:08 -07002413void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414{
2415 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002416 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002417 u32 seq;
2418 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002420 err = -ENOBUFS;
2421 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002422
Thomas Graf339bf982006-11-10 14:10:15 -08002423 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002424 if (skb == NULL)
2425 goto errout;
2426
Brian Haley191cd582008-08-14 15:33:21 -07002427 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002428 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002429 if (err < 0) {
2430 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2431 WARN_ON(err == -EMSGSIZE);
2432 kfree_skb(skb);
2433 goto errout;
2434 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002435 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2436 info->nlh, gfp_any());
2437 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002438errout:
2439 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002440 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441}
2442
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002443static int ip6_route_dev_notify(struct notifier_block *this,
2444 unsigned long event, void *data)
2445{
2446 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002447 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002448
2449 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002450 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002451 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2452#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002453 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002454 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002455 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002456 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2457#endif
2458 }
2459
2460 return NOTIFY_OK;
2461}
2462
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463/*
2464 * /proc
2465 */
2466
2467#ifdef CONFIG_PROC_FS
2468
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469struct rt6_proc_arg
2470{
2471 char *buffer;
2472 int offset;
2473 int length;
2474 int skip;
2475 int len;
2476};
2477
2478static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2479{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002480 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002482 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483
2484#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002487 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488#endif
2489
2490 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002491 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002493 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002495 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002496 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2497 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002498 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 return 0;
2500}
2501
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002502static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002504 struct net *net = (struct net *)m->private;
2505 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002506 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507}
2508
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002509static int ipv6_route_open(struct inode *inode, struct file *file)
2510{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002511 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002512}
2513
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002514static const struct file_operations ipv6_route_proc_fops = {
2515 .owner = THIS_MODULE,
2516 .open = ipv6_route_open,
2517 .read = seq_read,
2518 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002519 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002520};
2521
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2523{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002524 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002526 net->ipv6.rt6_stats->fib_nodes,
2527 net->ipv6.rt6_stats->fib_route_nodes,
2528 net->ipv6.rt6_stats->fib_rt_alloc,
2529 net->ipv6.rt6_stats->fib_rt_entries,
2530 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002531 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002532 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533
2534 return 0;
2535}
2536
2537static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2538{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002539 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002540}
2541
Arjan van de Ven9a321442007-02-12 00:55:35 -08002542static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 .owner = THIS_MODULE,
2544 .open = rt6_stats_seq_open,
2545 .read = seq_read,
2546 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002547 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548};
2549#endif /* CONFIG_PROC_FS */
2550
2551#ifdef CONFIG_SYSCTL
2552
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002554int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 void __user *buffer, size_t *lenp, loff_t *ppos)
2556{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002557 struct net *net = current->nsproxy->net_ns;
2558 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002560 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002561 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 return 0;
2563 } else
2564 return -EINVAL;
2565}
2566
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002567ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002568 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002570 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002572 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002573 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 },
2575 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002577 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .maxlen = sizeof(int),
2579 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002580 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 },
2582 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002584 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 .maxlen = sizeof(int),
2586 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002587 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 },
2589 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002591 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 .maxlen = sizeof(int),
2593 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002594 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 },
2596 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002598 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 .maxlen = sizeof(int),
2600 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002601 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 },
2603 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002605 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 .maxlen = sizeof(int),
2607 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002608 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609 },
2610 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002612 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613 .maxlen = sizeof(int),
2614 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002615 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 },
2617 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002619 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620 .maxlen = sizeof(int),
2621 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002622 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 },
2624 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002626 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627 .maxlen = sizeof(int),
2628 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002629 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 },
2631 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002633 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634 .maxlen = sizeof(int),
2635 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002636 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002638 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639};
2640
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002641struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002642{
2643 struct ctl_table *table;
2644
2645 table = kmemdup(ipv6_route_table_template,
2646 sizeof(ipv6_route_table_template),
2647 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002648
2649 if (table) {
2650 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002651 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002652 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2653 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2654 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2655 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2656 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2657 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2658 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002659 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002660 }
2661
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002662 return table;
2663}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664#endif
2665
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002666static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002667{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002668 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002669
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002670 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2671 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002672
Eric Dumazetfc66f952010-10-08 06:37:34 +00002673 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2674 goto out_ip6_dst_ops;
2675
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002676 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2677 sizeof(*net->ipv6.ip6_null_entry),
2678 GFP_KERNEL);
2679 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002680 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002681 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002682 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002683 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Millerdefb3512010-12-08 21:16:57 -08002684 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002685
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2688 sizeof(*net->ipv6.ip6_prohibit_entry),
2689 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002690 if (!net->ipv6.ip6_prohibit_entry)
2691 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002692 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002693 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002694 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Millerdefb3512010-12-08 21:16:57 -08002695 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002696
2697 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2698 sizeof(*net->ipv6.ip6_blk_hole_entry),
2699 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002700 if (!net->ipv6.ip6_blk_hole_entry)
2701 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002702 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002703 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002704 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Millerdefb3512010-12-08 21:16:57 -08002705 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002706#endif
2707
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002708 net->ipv6.sysctl.flush_delay = 0;
2709 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2710 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2711 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2712 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2713 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2714 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2715 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2716
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002717#ifdef CONFIG_PROC_FS
2718 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2719 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2720#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002721 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2722
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002723 ret = 0;
2724out:
2725 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002726
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002727#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2728out_ip6_prohibit_entry:
2729 kfree(net->ipv6.ip6_prohibit_entry);
2730out_ip6_null_entry:
2731 kfree(net->ipv6.ip6_null_entry);
2732#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002733out_ip6_dst_entries:
2734 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002735out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002736 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002737}
2738
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002739static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002740{
2741#ifdef CONFIG_PROC_FS
2742 proc_net_remove(net, "ipv6_route");
2743 proc_net_remove(net, "rt6_stats");
2744#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002745 kfree(net->ipv6.ip6_null_entry);
2746#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2747 kfree(net->ipv6.ip6_prohibit_entry);
2748 kfree(net->ipv6.ip6_blk_hole_entry);
2749#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002750 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002751}
2752
2753static struct pernet_operations ip6_route_net_ops = {
2754 .init = ip6_route_net_init,
2755 .exit = ip6_route_net_exit,
2756};
2757
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002758static struct notifier_block ip6_route_dev_notifier = {
2759 .notifier_call = ip6_route_dev_notify,
2760 .priority = 0,
2761};
2762
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002763int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002765 int ret;
2766
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002767 ret = -ENOMEM;
2768 ip6_dst_ops_template.kmem_cachep =
2769 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2770 SLAB_HWCACHE_ALIGN, NULL);
2771 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002772 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002773
Eric Dumazetfc66f952010-10-08 06:37:34 +00002774 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002775 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002776 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002777
Eric Dumazetfc66f952010-10-08 06:37:34 +00002778 ret = register_pernet_subsys(&ip6_route_net_ops);
2779 if (ret)
2780 goto out_dst_entries;
2781
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002782 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2783
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002784 /* Registering of the loopback is done before this portion of code,
2785 * the loopback reference in rt6_info will not be taken, do it
2786 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002787 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002788 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2789 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002790 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002791 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002792 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002793 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2794 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002795 ret = fib6_init();
2796 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002797 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002798
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002799 ret = xfrm6_init();
2800 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002801 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002802
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002803 ret = fib6_rules_init();
2804 if (ret)
2805 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002806
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002807 ret = -ENOBUFS;
2808 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2809 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2810 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2811 goto fib6_rules_init;
2812
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002813 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002814 if (ret)
2815 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002816
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002817out:
2818 return ret;
2819
2820fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002821 fib6_rules_cleanup();
2822xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002823 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002824out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002825 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002826out_register_subsys:
2827 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002828out_dst_entries:
2829 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002830out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002831 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002832 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002833}
2834
2835void ip6_route_cleanup(void)
2836{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002837 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002838 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002839 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002840 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002841 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002842 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002843 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002844}