blob: 96455ffb76fb8b92aa90c3a711aa6635d45b91fa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800102 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700111 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800120 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700124};
125
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800126static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 },
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700137 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
Thomas Graf101367c2006-08-04 03:39:02 -0700142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
David S. Miller6723ab52006-10-18 21:20:57 -0700144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700146
Adrian Bunk280a34c2008-04-21 02:29:32 -0700147static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700148 .dst = {
149 .__refcnt = ATOMIC_INIT(1),
150 .__use = 1,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700158 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800163static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700172 },
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700174 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
177};
178
179#endif
180
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800182static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800184 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185}
186
187static void ip6_dst_destroy(struct dst_entry *dst)
188{
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900195 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196}
197
198static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
199 int how)
200{
201 struct rt6_info *rt = (struct rt6_info *)dst;
202 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800203 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900204 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
207 struct inet6_dev *loopback_idev =
208 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 if (loopback_idev != NULL) {
210 rt->rt6i_idev = loopback_idev;
211 in6_dev_put(idev);
212 }
213 }
214}
215
216static __inline__ int rt6_check_expired(const struct rt6_info *rt)
217{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000218 return (rt->rt6i_flags & RTF_EXPIRES) &&
219 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220}
221
Thomas Grafc71099a2006-08-04 23:20:06 -0700222static inline int rt6_need_strict(struct in6_addr *daddr)
223{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000224 return ipv6_addr_type(daddr) &
225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700226}
227
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700229 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 */
231
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800232static inline struct rt6_info *rt6_device_match(struct net *net,
233 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900234 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700236 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900241 if (!oif && ipv6_addr_any(saddr))
242 goto out;
243
Changli Gaod8d1f302010-06-10 23:31:35 -0700244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900245 struct net_device *dev = sprt->rt6i_dev;
246
247 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700253 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900255 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900261 } else {
262 if (ipv6_chk_addr(net, saddr, dev,
263 flags & RT6_LOOKUP_F_IFACE))
264 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900266 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900268 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 if (local)
270 return local;
271
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700272 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800273 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900275out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 return rt;
277}
278
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800311}
312#endif
313
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800315 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700317static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700320 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
Dave Jonesb6f99a22007-03-22 12:27:49 -0700328static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800330 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800331 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700338 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339#ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342#endif
343 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800344 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800345 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800346 } else
347 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 return m;
349}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
353{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700354 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900355
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700356 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800364 return -1;
365 return m;
366}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
David S. Millerf11e6652007-03-24 20:36:25 -0700368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800370{
David S. Millerf11e6652007-03-24 20:36:25 -0700371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800398 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
David S. Millerf11e6652007-03-24 20:36:25 -0700400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700402 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700405 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700406 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 return match;
409}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800410
David S. Millerf11e6652007-03-24 20:36:25 -0700411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800414 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800417 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
David S. Millerf11e6652007-03-24 20:36:25 -0700419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
David S. Millerf11e6652007-03-24 20:36:25 -0700423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800425 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700426 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700427 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700428
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800429 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
436
David S. Millerf11e6652007-03-24 20:36:25 -0700437 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800438 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900440 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000441 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442}
443
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800444#ifdef CONFIG_IPV6_ROUTE_INFO
445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
447{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900448 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900452 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800453 struct rt6_info *rt;
454
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
457 }
458
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
467 }
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
471 }
472 }
473
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000476 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800477
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800479
480 if (rinfo->length == 3)
481 prefix = (struct in6_addr *)rinfo->prefix;
482 else {
483 /* this function is safe */
484 ipv6_addr_prefix(&prefix_buf,
485 (struct in6_addr *)rinfo->prefix,
486 rinfo->prefix_len);
487 prefix = &prefix_buf;
488 }
489
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
491 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800492
493 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700494 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800495 rt = NULL;
496 }
497
498 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500 pref);
501 else if (rt)
502 rt->rt6i_flags = RTF_ROUTEINFO |
503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
504
505 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900506 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800507 rt->rt6i_flags &= ~RTF_EXPIRES;
508 } else {
509 rt->rt6i_expires = jiffies + HZ * lifetime;
510 rt->rt6i_flags |= RTF_EXPIRES;
511 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700512 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800513 }
514 return 0;
515}
516#endif
517
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800518#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700519do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800520 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700521 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700522 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700523 if (fn->fn_flags & RTN_TL_ROOT) \
524 goto out; \
525 pn = fn->parent; \
526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 else \
529 fn = pn; \
530 if (fn->fn_flags & RTN_RTINFO) \
531 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700532 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700533 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700534} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700535
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800536static struct rt6_info *ip6_pol_route_lookup(struct net *net,
537 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700538 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539{
540 struct fib6_node *fn;
541 struct rt6_info *rt;
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545restart:
546 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800548 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700549out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700550 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 return rt;
553
554}
555
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900556struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
557 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700558{
559 struct flowi fl = {
560 .oif = oif,
561 .nl_u = {
562 .ip6_u = {
563 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700564 },
565 },
566 };
567 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700569
Thomas Grafadaa70b2006-10-13 15:01:03 -0700570 if (saddr) {
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
573 }
574
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800575 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700576 if (dst->error == 0)
577 return (struct rt6_info *) dst;
578
579 dst_release(dst);
580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 return NULL;
582}
583
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900584EXPORT_SYMBOL(rt6_lookup);
585
Thomas Grafc71099a2006-08-04 23:20:06 -0700586/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
589 be destroyed.
590 */
591
Thomas Graf86872cb2006-08-22 00:01:08 -0700592static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593{
594 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700595 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
Thomas Grafc71099a2006-08-04 23:20:06 -0700597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700599 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700600 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
602 return err;
603}
604
Thomas Graf40e22e82006-08-22 00:00:45 -0700605int ip6_ins_rt(struct rt6_info *rt)
606{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800607 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900608 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800609 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800610 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700611}
612
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800613static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 struct rt6_info *rt;
617
618 /*
619 * Clone the route.
620 */
621
622 rt = ip6_rt_copy(ort);
623
624 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800625 struct neighbour *neigh;
626 int attempts = !in_softirq();
627
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900628 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
629 if (rt->rt6i_dst.plen != 128 &&
630 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
631 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900633 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900635 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 rt->rt6i_dst.plen = 128;
637 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700638 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640#ifdef CONFIG_IPV6_SUBTREES
641 if (rt->rt6i_src.plen && saddr) {
642 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
643 rt->rt6i_src.plen = 128;
644 }
645#endif
646
David S. Miller14deae42009-01-04 16:04:39 -0800647 retry:
648 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649 if (IS_ERR(neigh)) {
650 struct net *net = dev_net(rt->rt6i_dev);
651 int saved_rt_min_interval =
652 net->ipv6.sysctl.ip6_rt_gc_min_interval;
653 int saved_rt_elasticity =
654 net->ipv6.sysctl.ip6_rt_gc_elasticity;
655
656 if (attempts-- > 0) {
657 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
658 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
659
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000660 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800661
662 net->ipv6.sysctl.ip6_rt_gc_elasticity =
663 saved_rt_elasticity;
664 net->ipv6.sysctl.ip6_rt_gc_min_interval =
665 saved_rt_min_interval;
666 goto retry;
667 }
668
669 if (net_ratelimit())
670 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700671 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700672 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800673 return NULL;
674 }
675 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800677 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800679 return rt;
680}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800682static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
683{
684 struct rt6_info *rt = ip6_rt_copy(ort);
685 if (rt) {
686 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
687 rt->rt6i_dst.plen = 128;
688 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700689 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800690 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
691 }
692 return rt;
693}
694
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800695static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
696 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
698 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800699 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700700 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800702 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700703 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700705 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700708 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800710restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700711 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
713restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700714 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800715
716 BACKTRACK(net, &fl->fl6_src);
717 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800718 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800719 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
Changli Gaod8d1f302010-06-10 23:31:35 -0700721 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700722 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800723
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800724 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800725 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800726 else {
727#if CLONE_OFFLINK_ROUTE
728 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
729#else
730 goto out2;
731#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800733
Changli Gaod8d1f302010-06-10 23:31:35 -0700734 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800735 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800736
Changli Gaod8d1f302010-06-10 23:31:35 -0700737 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800738 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700739 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800740 if (!err)
741 goto out2;
742 }
743
744 if (--attempts <= 0)
745 goto out2;
746
747 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700748 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800749 * released someone could insert this route. Relookup.
750 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700751 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800752 goto relookup;
753
754out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800755 if (reachable) {
756 reachable = 0;
757 goto restart_2;
758 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700759 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700760 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700762 rt->dst.lastuse = jiffies;
763 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700764
765 return rt;
766}
767
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800768static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700769 struct flowi *fl, int flags)
770{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800771 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700772}
773
Thomas Grafc71099a2006-08-04 23:20:06 -0700774void ip6_route_input(struct sk_buff *skb)
775{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700776 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900777 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700778 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700779 struct flowi fl = {
780 .iif = skb->dev->ifindex,
781 .nl_u = {
782 .ip6_u = {
783 .daddr = iph->daddr,
784 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800785 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700786 },
787 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900788 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700789 .proto = iph->nexthdr,
790 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700791
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800792 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700793 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700794
Eric Dumazetadf30902009-06-02 05:19:30 +0000795 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700796}
797
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800798static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700799 struct flowi *fl, int flags)
800{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800801 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700802}
803
Daniel Lezcano4591db42008-03-05 10:48:10 -0800804struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
805 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700806{
807 int flags = 0;
808
Brian Haley6057fd72010-05-28 23:02:35 -0700809 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700810 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700811
Thomas Grafadaa70b2006-10-13 15:01:03 -0700812 if (!ipv6_addr_any(&fl->fl6_src))
813 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000814 else if (sk)
815 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700816
Daniel Lezcano4591db42008-03-05 10:48:10 -0800817 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818}
819
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900820EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821
David S. Miller14e50e52007-05-24 18:17:54 -0700822int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
823{
824 struct rt6_info *ort = (struct rt6_info *) *dstp;
825 struct rt6_info *rt = (struct rt6_info *)
826 dst_alloc(&ip6_dst_blackhole_ops);
827 struct dst_entry *new = NULL;
828
829 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700830 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700831
832 atomic_set(&new->__refcnt, 1);
833 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800834 new->input = dst_discard;
835 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700836
Changli Gaod8d1f302010-06-10 23:31:35 -0700837 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
838 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700839 if (new->dev)
840 dev_hold(new->dev);
841 rt->rt6i_idev = ort->rt6i_idev;
842 if (rt->rt6i_idev)
843 in6_dev_hold(rt->rt6i_idev);
844 rt->rt6i_expires = 0;
845
846 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
847 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
848 rt->rt6i_metric = 0;
849
850 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
851#ifdef CONFIG_IPV6_SUBTREES
852 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
853#endif
854
855 dst_free(new);
856 }
857
858 dst_release(*dstp);
859 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000860 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700861}
862EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
863
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864/*
865 * Destination cache support functions
866 */
867
868static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
869{
870 struct rt6_info *rt;
871
872 rt = (struct rt6_info *) dst;
873
Herbert Xu10414442010-03-18 23:00:22 +0000874 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 return dst;
876
877 return NULL;
878}
879
880static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
881{
882 struct rt6_info *rt = (struct rt6_info *) dst;
883
884 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000885 if (rt->rt6i_flags & RTF_CACHE) {
886 if (rt6_check_expired(rt)) {
887 ip6_del_rt(rt);
888 dst = NULL;
889 }
890 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000892 dst = NULL;
893 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000895 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896}
897
898static void ip6_link_failure(struct sk_buff *skb)
899{
900 struct rt6_info *rt;
901
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000902 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903
Eric Dumazetadf30902009-06-02 05:19:30 +0000904 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 if (rt) {
906 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700907 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 rt->rt6i_flags |= RTF_EXPIRES;
909 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
910 rt->rt6i_node->fn_sernum = -1;
911 }
912}
913
914static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
915{
916 struct rt6_info *rt6 = (struct rt6_info*)dst;
917
918 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
919 rt6->rt6i_flags |= RTF_MODIFIED;
920 if (mtu < IPV6_MIN_MTU) {
921 mtu = IPV6_MIN_MTU;
922 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
923 }
924 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700925 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 }
927}
928
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929static int ipv6_get_mtu(struct net_device *dev);
930
Daniel Lezcano55786892008-03-04 13:47:47 -0800931static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932{
933 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
934
Daniel Lezcano55786892008-03-04 13:47:47 -0800935 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
936 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900939 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
940 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
941 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 * rely only on pmtu discovery"
943 */
944 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
945 mtu = IPV6_MAXPLEN;
946 return mtu;
947}
948
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800949static struct dst_entry *icmp6_dst_gc_list;
950static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700951
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800952struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900954 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955{
956 struct rt6_info *rt;
957 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900958 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959
960 if (unlikely(idev == NULL))
961 return NULL;
962
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000963 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 if (unlikely(rt == NULL)) {
965 in6_dev_put(idev);
966 goto out;
967 }
968
969 dev_hold(dev);
970 if (neigh)
971 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800972 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800974 if (IS_ERR(neigh))
975 neigh = NULL;
976 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977
978 rt->rt6i_dev = dev;
979 rt->rt6i_idev = idev;
980 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -0700981 atomic_set(&rt->dst.__refcnt, 1);
982 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
983 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
984 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
985 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986
987#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -0700988 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900989 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 : 0;
991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
992 rt->rt6i_dst.plen = 128;
993#endif
994
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800995 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -0700996 rt->dst.next = icmp6_dst_gc_list;
997 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800998 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999
Daniel Lezcano55786892008-03-04 13:47:47 -08001000 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001
1002out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001003 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004}
1005
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001006int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007{
1008 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001009 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010
1011 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001012
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001013 spin_lock_bh(&icmp6_dst_lock);
1014 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001015
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 while ((dst = *pprev) != NULL) {
1017 if (!atomic_read(&dst->__refcnt)) {
1018 *pprev = dst->next;
1019 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 } else {
1021 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001022 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 }
1024 }
1025
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001026 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001027
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001028 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029}
1030
David S. Miller1e493d12008-09-10 17:27:15 -07001031static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1032 void *arg)
1033{
1034 struct dst_entry *dst, **pprev;
1035
1036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
1038 while ((dst = *pprev) != NULL) {
1039 struct rt6_info *rt = (struct rt6_info *) dst;
1040 if (func(rt, arg)) {
1041 *pprev = dst->next;
1042 dst_free(dst);
1043 } else {
1044 pprev = &dst->next;
1045 }
1046 }
1047 spin_unlock_bh(&icmp6_dst_lock);
1048}
1049
Daniel Lezcano569d3642008-01-18 03:56:57 -08001050static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001053 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001054 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1055 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001059 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060
Eric Dumazetfc66f952010-10-08 06:37:34 +00001061 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001062 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001063 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 goto out;
1065
Benjamin Thery6891a342008-03-04 13:49:47 -08001066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001074 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075}
1076
1077/* Clean host part of a prefix. Not necessary in radix tree,
1078 but results in cleaner routing tables.
1079
1080 Remove it only when all the things will work!
1081 */
1082
1083static int ipv6_get_mtu(struct net_device *dev)
1084{
1085 int mtu = IPV6_MIN_MTU;
1086 struct inet6_dev *idev;
1087
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001088 rcu_read_lock();
1089 idev = __in6_dev_get(dev);
1090 if (idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 mtu = idev->cnf.mtu6;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001092 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 return mtu;
1094}
1095
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001096int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001098 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1099 if (hoplimit < 0) {
1100 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001101 struct inet6_dev *idev;
1102
1103 rcu_read_lock();
1104 idev = __in6_dev_get(dev);
1105 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001106 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001107 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001108 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001109 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 }
1111 return hoplimit;
1112}
1113
1114/*
1115 *
1116 */
1117
Thomas Graf86872cb2006-08-22 00:01:08 -07001118int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119{
1120 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001121 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 struct rt6_info *rt = NULL;
1123 struct net_device *dev = NULL;
1124 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001125 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 int addr_type;
1127
Thomas Graf86872cb2006-08-22 00:01:08 -07001128 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 return -EINVAL;
1130#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001131 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 return -EINVAL;
1133#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001134 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001136 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 if (!dev)
1138 goto out;
1139 idev = in6_dev_get(dev);
1140 if (!idev)
1141 goto out;
1142 }
1143
Thomas Graf86872cb2006-08-22 00:01:08 -07001144 if (cfg->fc_metric == 0)
1145 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
Daniel Lezcano55786892008-03-04 13:47:47 -08001147 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001148 if (table == NULL) {
1149 err = -ENOBUFS;
1150 goto out;
1151 }
1152
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001153 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
1155 if (rt == NULL) {
1156 err = -ENOMEM;
1157 goto out;
1158 }
1159
Changli Gaod8d1f302010-06-10 23:31:35 -07001160 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001161 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1162 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1163 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
Thomas Graf86872cb2006-08-22 00:01:08 -07001165 if (cfg->fc_protocol == RTPROT_UNSPEC)
1166 cfg->fc_protocol = RTPROT_BOOT;
1167 rt->rt6i_protocol = cfg->fc_protocol;
1168
1169 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170
1171 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001172 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001173 else if (cfg->fc_flags & RTF_LOCAL)
1174 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001176 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
Changli Gaod8d1f302010-06-10 23:31:35 -07001178 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
Thomas Graf86872cb2006-08-22 00:01:08 -07001180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1181 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001183 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
1185#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1187 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188#endif
1189
Thomas Graf86872cb2006-08-22 00:01:08 -07001190 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
1192 /* We cannot add true routes via loopback here,
1193 they would result in kernel looping; promote them to reject routes
1194 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001195 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1197 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001199 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 if (dev) {
1201 dev_put(dev);
1202 in6_dev_put(idev);
1203 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001204 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 dev_hold(dev);
1206 idev = in6_dev_get(dev);
1207 if (!idev) {
1208 err = -ENODEV;
1209 goto out;
1210 }
1211 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001212 rt->dst.output = ip6_pkt_discard_out;
1213 rt->dst.input = ip6_pkt_discard;
1214 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1216 goto install_route;
1217 }
1218
Thomas Graf86872cb2006-08-22 00:01:08 -07001219 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 struct in6_addr *gw_addr;
1221 int gwa_type;
1222
Thomas Graf86872cb2006-08-22 00:01:08 -07001223 gw_addr = &cfg->fc_gateway;
1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 gwa_type = ipv6_addr_type(gw_addr);
1226
1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1228 struct rt6_info *grt;
1229
1230 /* IPv6 strictly inhibits using not link-local
1231 addresses as nexthop address.
1232 Otherwise, router will not able to send redirects.
1233 It is very good, but in some (rare!) circumstances
1234 (SIT, PtP, NBMA NOARP links) it is handy to allow
1235 some exceptions. --ANK
1236 */
1237 err = -EINVAL;
1238 if (!(gwa_type&IPV6_ADDR_UNICAST))
1239 goto out;
1240
Daniel Lezcano55786892008-03-04 13:47:47 -08001241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242
1243 err = -EHOSTUNREACH;
1244 if (grt == NULL)
1245 goto out;
1246 if (dev) {
1247 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001248 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 goto out;
1250 }
1251 } else {
1252 dev = grt->rt6i_dev;
1253 idev = grt->rt6i_idev;
1254 dev_hold(dev);
1255 in6_dev_hold(grt->rt6i_idev);
1256 }
1257 if (!(grt->rt6i_flags&RTF_GATEWAY))
1258 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001259 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260
1261 if (err)
1262 goto out;
1263 }
1264 err = -EINVAL;
1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1266 goto out;
1267 }
1268
1269 err = -ENODEV;
1270 if (dev == NULL)
1271 goto out;
1272
Thomas Graf86872cb2006-08-22 00:01:08 -07001273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1275 if (IS_ERR(rt->rt6i_nexthop)) {
1276 err = PTR_ERR(rt->rt6i_nexthop);
1277 rt->rt6i_nexthop = NULL;
1278 goto out;
1279 }
1280 }
1281
Thomas Graf86872cb2006-08-22 00:01:08 -07001282 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283
1284install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001285 if (cfg->fc_mx) {
1286 struct nlattr *nla;
1287 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001290 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001291
1292 if (type) {
1293 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 err = -EINVAL;
1295 goto out;
1296 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001297
Changli Gaod8d1f302010-06-10 23:31:35 -07001298 rt->dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 }
1301 }
1302
Changli Gaod8d1f302010-06-10 23:31:35 -07001303 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1304 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1305 if (!dst_mtu(&rt->dst))
1306 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1307 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1308 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1309 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001311 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001312
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001313 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001314
Thomas Graf86872cb2006-08-22 00:01:08 -07001315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316
1317out:
1318 if (dev)
1319 dev_put(dev);
1320 if (idev)
1321 in6_dev_put(idev);
1322 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001323 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 return err;
1325}
1326
Thomas Graf86872cb2006-08-22 00:01:08 -07001327static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328{
1329 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001330 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001331 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001333 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001334 return -ENOENT;
1335
Thomas Grafc71099a2006-08-04 23:20:06 -07001336 table = rt->rt6i_table;
1337 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
Thomas Graf86872cb2006-08-22 00:01:08 -07001339 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001340 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
Thomas Grafc71099a2006-08-04 23:20:06 -07001342 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
1344 return err;
1345}
1346
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001347int ip6_del_rt(struct rt6_info *rt)
1348{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001349 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001350 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001351 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001352 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001353}
1354
Thomas Graf86872cb2006-08-22 00:01:08 -07001355static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356{
Thomas Grafc71099a2006-08-04 23:20:06 -07001357 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 struct fib6_node *fn;
1359 struct rt6_info *rt;
1360 int err = -ESRCH;
1361
Daniel Lezcano55786892008-03-04 13:47:47 -08001362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001363 if (table == NULL)
1364 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365
Thomas Grafc71099a2006-08-04 23:20:06 -07001366 read_lock_bh(&table->tb6_lock);
1367
1368 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001369 &cfg->fc_dst, cfg->fc_dst_len,
1370 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001371
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001373 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001374 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001376 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001378 if (cfg->fc_flags & RTF_GATEWAY &&
1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001383 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001384 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
Thomas Graf86872cb2006-08-22 00:01:08 -07001386 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 }
1388 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001389 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
1391 return err;
1392}
1393
1394/*
1395 * Handle redirects
1396 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001397struct ip6rd_flowi {
1398 struct flowi fl;
1399 struct in6_addr gateway;
1400};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001402static struct rt6_info *__ip6_route_redirect(struct net *net,
1403 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001404 struct flowi *fl,
1405 int flags)
1406{
1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1408 struct rt6_info *rt;
1409 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001410
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001412 * Get the "current" route for this destination and
1413 * check if the redirect has come from approriate router.
1414 *
1415 * RFC 2461 specifies that redirects should only be
1416 * accepted if they come from the nexthop to the target.
1417 * Due to the way the routes are chosen, this notion
1418 * is a bit fuzzy and one might need to check all possible
1419 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421
Thomas Grafc71099a2006-08-04 23:20:06 -07001422 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001424restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001425 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001426 /*
1427 * Current route is on-link; redirect is always invalid.
1428 *
1429 * Seems, previous statement is not true. It could
1430 * be node, which looks for us as on-link (f.e. proxy ndisc)
1431 * But then router serving it might decide, that we should
1432 * know truth 8)8) --ANK (980726).
1433 */
1434 if (rt6_check_expired(rt))
1435 continue;
1436 if (!(rt->rt6i_flags & RTF_GATEWAY))
1437 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001438 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001439 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001441 continue;
1442 break;
1443 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001444
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001445 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001446 rt = net->ipv6.ip6_null_entry;
1447 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001448out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001449 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001450
1451 read_unlock_bh(&table->tb6_lock);
1452
1453 return rt;
1454};
1455
1456static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1457 struct in6_addr *src,
1458 struct in6_addr *gateway,
1459 struct net_device *dev)
1460{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001461 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001462 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001463 struct ip6rd_flowi rdfl = {
1464 .fl = {
1465 .oif = dev->ifindex,
1466 .nl_u = {
1467 .ip6_u = {
1468 .daddr = *dest,
1469 .saddr = *src,
1470 },
1471 },
1472 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001473 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001474
Brian Haley86c36ce2009-10-07 13:58:01 -07001475 ipv6_addr_copy(&rdfl.gateway, gateway);
1476
Thomas Grafadaa70b2006-10-13 15:01:03 -07001477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001479
Daniel Lezcano55786892008-03-04 13:47:47 -08001480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001481 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001482}
1483
1484void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1487{
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001490 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001491
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001494 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001498 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 }
1500
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 /*
1502 * We have finally decided to accept it.
1503 */
1504
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001505 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1510 );
1511
1512 /*
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1516 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001517 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518
1519 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001520 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 goto out;
1522
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1526
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001533 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
Changli Gaod8d1f302010-06-10 23:31:35 -07001538 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540 dst_mtu(&nrt->dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541
Thomas Graf40e22e82006-08-22 00:00:45 -07001542 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 goto out;
1544
Changli Gaod8d1f302010-06-10 23:31:35 -07001545 netevent.old = &rt->dst;
1546 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001550 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 return;
1552 }
1553
1554out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001555 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556}
1557
1558/*
1559 * Handle ICMP "packet too big" messages
1560 * i.e. Path MTU discovery
1561 */
1562
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001563static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1564 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565{
1566 struct rt6_info *rt, *nrt;
1567 int allfrag = 0;
1568
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 if (rt == NULL)
1571 return;
1572
Changli Gaod8d1f302010-06-10 23:31:35 -07001573 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 goto out;
1575
1576 if (pmtu < IPV6_MIN_MTU) {
1577 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001578 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 * MTU (1280) and a fragment header should always be included
1580 * after a node receiving Too Big message reporting PMTU is
1581 * less than the IPv6 Minimum Link MTU.
1582 */
1583 pmtu = IPV6_MIN_MTU;
1584 allfrag = 1;
1585 }
1586
1587 /* New mtu received -> path was valid.
1588 They are sent only in response to data packets,
1589 so that this nexthop apparently is reachable. --ANK
1590 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001591 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592
1593 /* Host route. If it is static, it would be better
1594 not to override it, but add new one, so that
1595 when cache entry will expire old pmtu
1596 would return automatically.
1597 */
1598 if (rt->rt6i_flags & RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001599 rt->dst.metrics[RTAX_MTU-1] = pmtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 if (allfrag)
Changli Gaod8d1f302010-06-10 23:31:35 -07001601 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1602 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1604 goto out;
1605 }
1606
1607 /* Network route.
1608 Two cases are possible:
1609 1. It is connected route. Action: COW
1610 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1611 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001612 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001613 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001614 else
1615 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001616
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001617 if (nrt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001618 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001619 if (allfrag)
Changli Gaod8d1f302010-06-10 23:31:35 -07001620 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001621
1622 /* According to RFC 1981, detecting PMTU increase shouldn't be
1623 * happened within 5 mins, the recommended timer is 10 mins.
1624 * Here this route expiration time is set to ip6_rt_mtu_expires
1625 * which is 10 mins. After 10 mins the decreased pmtu is expired
1626 * and detecting PMTU increase will be automatically happened.
1627 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001628 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001629 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1630
Thomas Graf40e22e82006-08-22 00:00:45 -07001631 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001634 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635}
1636
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001637void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1638 struct net_device *dev, u32 pmtu)
1639{
1640 struct net *net = dev_net(dev);
1641
1642 /*
1643 * RFC 1981 states that a node "MUST reduce the size of the packets it
1644 * is sending along the path" that caused the Packet Too Big message.
1645 * Since it's not possible in the general case to determine which
1646 * interface was used to send the original packet, we update the MTU
1647 * on the interface that will be used to send future packets. We also
1648 * update the MTU on the interface that received the Packet Too Big in
1649 * case the original packet was forced out that interface with
1650 * SO_BINDTODEVICE or similar. This is the next best thing to the
1651 * correct behaviour, which would be to update the MTU on all
1652 * interfaces.
1653 */
1654 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1655 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1656}
1657
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658/*
1659 * Misc support functions
1660 */
1661
1662static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1663{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001664 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001665 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666
1667 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001668 rt->dst.input = ort->dst.input;
1669 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Changli Gaod8d1f302010-06-10 23:31:35 -07001671 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1672 rt->dst.error = ort->dst.error;
1673 rt->dst.dev = ort->dst.dev;
1674 if (rt->dst.dev)
1675 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 rt->rt6i_idev = ort->rt6i_idev;
1677 if (rt->rt6i_idev)
1678 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001679 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 rt->rt6i_expires = 0;
1681
1682 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1683 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1684 rt->rt6i_metric = 0;
1685
1686 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1687#ifdef CONFIG_IPV6_SUBTREES
1688 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1689#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001690 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 }
1692 return rt;
1693}
1694
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001695#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001696static struct rt6_info *rt6_get_route_info(struct net *net,
1697 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001698 struct in6_addr *gwaddr, int ifindex)
1699{
1700 struct fib6_node *fn;
1701 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001702 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001703
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001704 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001705 if (table == NULL)
1706 return NULL;
1707
1708 write_lock_bh(&table->tb6_lock);
1709 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001710 if (!fn)
1711 goto out;
1712
Changli Gaod8d1f302010-06-10 23:31:35 -07001713 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001714 if (rt->rt6i_dev->ifindex != ifindex)
1715 continue;
1716 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1717 continue;
1718 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1719 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001720 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001721 break;
1722 }
1723out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001724 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001725 return rt;
1726}
1727
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001728static struct rt6_info *rt6_add_route_info(struct net *net,
1729 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001730 struct in6_addr *gwaddr, int ifindex,
1731 unsigned pref)
1732{
Thomas Graf86872cb2006-08-22 00:01:08 -07001733 struct fib6_config cfg = {
1734 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001735 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001736 .fc_ifindex = ifindex,
1737 .fc_dst_len = prefixlen,
1738 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1739 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001740 .fc_nlinfo.pid = 0,
1741 .fc_nlinfo.nlh = NULL,
1742 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001743 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001744
Thomas Graf86872cb2006-08-22 00:01:08 -07001745 ipv6_addr_copy(&cfg.fc_dst, prefix);
1746 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1747
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001748 /* We should treat it as a default route if prefix length is 0. */
1749 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001750 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001751
Thomas Graf86872cb2006-08-22 00:01:08 -07001752 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001753
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001754 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001755}
1756#endif
1757
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001759{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001761 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001763 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001764 if (table == NULL)
1765 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766
Thomas Grafc71099a2006-08-04 23:20:06 -07001767 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001768 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001770 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1772 break;
1773 }
1774 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001775 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001776 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 return rt;
1778}
1779
1780struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001781 struct net_device *dev,
1782 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783{
Thomas Graf86872cb2006-08-22 00:01:08 -07001784 struct fib6_config cfg = {
1785 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001786 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001787 .fc_ifindex = dev->ifindex,
1788 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1789 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001790 .fc_nlinfo.pid = 0,
1791 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001792 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001793 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794
Thomas Graf86872cb2006-08-22 00:01:08 -07001795 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796
Thomas Graf86872cb2006-08-22 00:01:08 -07001797 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 return rt6_get_dflt_router(gwaddr, dev);
1800}
1801
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001802void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803{
1804 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001805 struct fib6_table *table;
1806
1807 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001808 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001809 if (table == NULL)
1810 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811
1812restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001813 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001814 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001816 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001817 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001818 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 goto restart;
1820 }
1821 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001822 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823}
1824
Daniel Lezcano55786892008-03-04 13:47:47 -08001825static void rtmsg_to_fib6_config(struct net *net,
1826 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001827 struct fib6_config *cfg)
1828{
1829 memset(cfg, 0, sizeof(*cfg));
1830
1831 cfg->fc_table = RT6_TABLE_MAIN;
1832 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1833 cfg->fc_metric = rtmsg->rtmsg_metric;
1834 cfg->fc_expires = rtmsg->rtmsg_info;
1835 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1836 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1837 cfg->fc_flags = rtmsg->rtmsg_flags;
1838
Daniel Lezcano55786892008-03-04 13:47:47 -08001839 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001840
Thomas Graf86872cb2006-08-22 00:01:08 -07001841 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1842 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1843 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1844}
1845
Daniel Lezcano55786892008-03-04 13:47:47 -08001846int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847{
Thomas Graf86872cb2006-08-22 00:01:08 -07001848 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 struct in6_rtmsg rtmsg;
1850 int err;
1851
1852 switch(cmd) {
1853 case SIOCADDRT: /* Add a route */
1854 case SIOCDELRT: /* Delete a route */
1855 if (!capable(CAP_NET_ADMIN))
1856 return -EPERM;
1857 err = copy_from_user(&rtmsg, arg,
1858 sizeof(struct in6_rtmsg));
1859 if (err)
1860 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001861
Daniel Lezcano55786892008-03-04 13:47:47 -08001862 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001863
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 rtnl_lock();
1865 switch (cmd) {
1866 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001867 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 break;
1869 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001870 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 break;
1872 default:
1873 err = -EINVAL;
1874 }
1875 rtnl_unlock();
1876
1877 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001878 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879
1880 return -EINVAL;
1881}
1882
1883/*
1884 * Drop the packet on the floor
1885 */
1886
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001887static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001889 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001890 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001891 switch (ipstats_mib_noroutes) {
1892 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001893 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001894 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001895 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1896 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001897 break;
1898 }
1899 /* FALLTHROUGH */
1900 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001901 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1902 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001903 break;
1904 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001905 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 kfree_skb(skb);
1907 return 0;
1908}
1909
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001910static int ip6_pkt_discard(struct sk_buff *skb)
1911{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001912 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001913}
1914
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001915static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916{
Eric Dumazetadf30902009-06-02 05:19:30 +00001917 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001918 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919}
1920
David S. Miller6723ab52006-10-18 21:20:57 -07001921#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1922
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001923static int ip6_pkt_prohibit(struct sk_buff *skb)
1924{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001925 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001926}
1927
1928static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1929{
Eric Dumazetadf30902009-06-02 05:19:30 +00001930 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001931 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001932}
1933
David S. Miller6723ab52006-10-18 21:20:57 -07001934#endif
1935
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936/*
1937 * Allocate a dst for local (unicast / anycast) address.
1938 */
1939
1940struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1941 const struct in6_addr *addr,
1942 int anycast)
1943{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001944 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001946 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947
Ben Greear40385652010-11-08 12:33:48 +00001948 if (rt == NULL) {
1949 if (net_ratelimit())
1950 pr_warning("IPv6: Maximum number of routes reached,"
1951 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00001953 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
Daniel Lezcano55786892008-03-04 13:47:47 -08001955 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 in6_dev_hold(idev);
1957
Changli Gaod8d1f302010-06-10 23:31:35 -07001958 rt->dst.flags = DST_HOST;
1959 rt->dst.input = ip6_input;
1960 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001961 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07001963 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1964 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1965 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1966 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967
1968 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001969 if (anycast)
1970 rt->rt6i_flags |= RTF_ANYCAST;
1971 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001973 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1974 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001975 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08001976
1977 /* We are casting this because that is the return
1978 * value type. But an errno encoded pointer is the
1979 * same regardless of the underlying pointer type,
1980 * and that's what we are returning. So this is OK.
1981 */
1982 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 }
David S. Miller14deae42009-01-04 16:04:39 -08001984 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
1986 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1987 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001988 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989
Changli Gaod8d1f302010-06-10 23:31:35 -07001990 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
1992 return rt;
1993}
1994
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001995struct arg_dev_net {
1996 struct net_device *dev;
1997 struct net *net;
1998};
1999
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000static int fib6_ifdown(struct rt6_info *rt, void *arg)
2001{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002002 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2003 struct net *net = ((struct arg_dev_net *)arg)->net;
2004
2005 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2006 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 RT6_TRACE("deleted by ifdown %p\n", rt);
2008 return -1;
2009 }
2010 return 0;
2011}
2012
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002013void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002015 struct arg_dev_net adn = {
2016 .dev = dev,
2017 .net = net,
2018 };
2019
2020 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002021 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022}
2023
2024struct rt6_mtu_change_arg
2025{
2026 struct net_device *dev;
2027 unsigned mtu;
2028};
2029
2030static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2031{
2032 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2033 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002034 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035
2036 /* In IPv6 pmtu discovery is not optional,
2037 so that RTAX_MTU lock cannot disable it.
2038 We still use this lock to block changes
2039 caused by addrconf/ndisc.
2040 */
2041
2042 idev = __in6_dev_get(arg->dev);
2043 if (idev == NULL)
2044 return 0;
2045
2046 /* For administrative MTU increase, there is no way to discover
2047 IPv6 PMTU increase, so PMTU increase should be updated here.
2048 Since RFC 1981 doesn't include administrative MTU increase
2049 update PMTU increase is a MUST. (i.e. jumbo frame)
2050 */
2051 /*
2052 If new MTU is less than route PMTU, this new MTU will be the
2053 lowest MTU in the path, update the route PMTU to reflect PMTU
2054 decreases; if new MTU is greater than route PMTU, and the
2055 old MTU is the lowest MTU in the path, update the route PMTU
2056 to reflect the increase. In this case if the other nodes' MTU
2057 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2058 PMTU discouvery.
2059 */
2060 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002061 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2062 (dst_mtu(&rt->dst) >= arg->mtu ||
2063 (dst_mtu(&rt->dst) < arg->mtu &&
2064 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2065 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2066 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002067 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 return 0;
2069}
2070
2071void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2072{
Thomas Grafc71099a2006-08-04 23:20:06 -07002073 struct rt6_mtu_change_arg arg = {
2074 .dev = dev,
2075 .mtu = mtu,
2076 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002078 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079}
2080
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002081static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002082 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002083 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002084 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002085 [RTA_PRIORITY] = { .type = NLA_U32 },
2086 [RTA_METRICS] = { .type = NLA_NESTED },
2087};
2088
2089static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2090 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091{
Thomas Graf86872cb2006-08-22 00:01:08 -07002092 struct rtmsg *rtm;
2093 struct nlattr *tb[RTA_MAX+1];
2094 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095
Thomas Graf86872cb2006-08-22 00:01:08 -07002096 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2097 if (err < 0)
2098 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099
Thomas Graf86872cb2006-08-22 00:01:08 -07002100 err = -EINVAL;
2101 rtm = nlmsg_data(nlh);
2102 memset(cfg, 0, sizeof(*cfg));
2103
2104 cfg->fc_table = rtm->rtm_table;
2105 cfg->fc_dst_len = rtm->rtm_dst_len;
2106 cfg->fc_src_len = rtm->rtm_src_len;
2107 cfg->fc_flags = RTF_UP;
2108 cfg->fc_protocol = rtm->rtm_protocol;
2109
2110 if (rtm->rtm_type == RTN_UNREACHABLE)
2111 cfg->fc_flags |= RTF_REJECT;
2112
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002113 if (rtm->rtm_type == RTN_LOCAL)
2114 cfg->fc_flags |= RTF_LOCAL;
2115
Thomas Graf86872cb2006-08-22 00:01:08 -07002116 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2117 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002118 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002119
2120 if (tb[RTA_GATEWAY]) {
2121 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2122 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002124
2125 if (tb[RTA_DST]) {
2126 int plen = (rtm->rtm_dst_len + 7) >> 3;
2127
2128 if (nla_len(tb[RTA_DST]) < plen)
2129 goto errout;
2130
2131 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002133
2134 if (tb[RTA_SRC]) {
2135 int plen = (rtm->rtm_src_len + 7) >> 3;
2136
2137 if (nla_len(tb[RTA_SRC]) < plen)
2138 goto errout;
2139
2140 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002142
2143 if (tb[RTA_OIF])
2144 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2145
2146 if (tb[RTA_PRIORITY])
2147 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2148
2149 if (tb[RTA_METRICS]) {
2150 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2151 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002153
2154 if (tb[RTA_TABLE])
2155 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2156
2157 err = 0;
2158errout:
2159 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160}
2161
Thomas Grafc127ea22007-03-22 11:58:32 -07002162static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163{
Thomas Graf86872cb2006-08-22 00:01:08 -07002164 struct fib6_config cfg;
2165 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166
Thomas Graf86872cb2006-08-22 00:01:08 -07002167 err = rtm_to_fib6_config(skb, nlh, &cfg);
2168 if (err < 0)
2169 return err;
2170
2171 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172}
2173
Thomas Grafc127ea22007-03-22 11:58:32 -07002174static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175{
Thomas Graf86872cb2006-08-22 00:01:08 -07002176 struct fib6_config cfg;
2177 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178
Thomas Graf86872cb2006-08-22 00:01:08 -07002179 err = rtm_to_fib6_config(skb, nlh, &cfg);
2180 if (err < 0)
2181 return err;
2182
2183 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184}
2185
Thomas Graf339bf982006-11-10 14:10:15 -08002186static inline size_t rt6_nlmsg_size(void)
2187{
2188 return NLMSG_ALIGN(sizeof(struct rtmsg))
2189 + nla_total_size(16) /* RTA_SRC */
2190 + nla_total_size(16) /* RTA_DST */
2191 + nla_total_size(16) /* RTA_GATEWAY */
2192 + nla_total_size(16) /* RTA_PREFSRC */
2193 + nla_total_size(4) /* RTA_TABLE */
2194 + nla_total_size(4) /* RTA_IIF */
2195 + nla_total_size(4) /* RTA_OIF */
2196 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002197 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002198 + nla_total_size(sizeof(struct rta_cacheinfo));
2199}
2200
Brian Haley191cd582008-08-14 15:33:21 -07002201static int rt6_fill_node(struct net *net,
2202 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002203 struct in6_addr *dst, struct in6_addr *src,
2204 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002205 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206{
2207 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002208 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002209 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002210 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211
2212 if (prefix) { /* user wants prefix routes only */
2213 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2214 /* success since this is not a prefix route */
2215 return 1;
2216 }
2217 }
2218
Thomas Graf2d7202b2006-08-22 00:01:27 -07002219 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2220 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002221 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002222
2223 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 rtm->rtm_family = AF_INET6;
2225 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2226 rtm->rtm_src_len = rt->rt6i_src.plen;
2227 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002228 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002229 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002230 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002231 table = RT6_TABLE_UNSPEC;
2232 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002233 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 if (rt->rt6i_flags&RTF_REJECT)
2235 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002236 else if (rt->rt6i_flags&RTF_LOCAL)
2237 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2239 rtm->rtm_type = RTN_LOCAL;
2240 else
2241 rtm->rtm_type = RTN_UNICAST;
2242 rtm->rtm_flags = 0;
2243 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2244 rtm->rtm_protocol = rt->rt6i_protocol;
2245 if (rt->rt6i_flags&RTF_DYNAMIC)
2246 rtm->rtm_protocol = RTPROT_REDIRECT;
2247 else if (rt->rt6i_flags & RTF_ADDRCONF)
2248 rtm->rtm_protocol = RTPROT_KERNEL;
2249 else if (rt->rt6i_flags&RTF_DEFAULT)
2250 rtm->rtm_protocol = RTPROT_RA;
2251
2252 if (rt->rt6i_flags&RTF_CACHE)
2253 rtm->rtm_flags |= RTM_F_CLONED;
2254
2255 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002256 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002257 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002259 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260#ifdef CONFIG_IPV6_SUBTREES
2261 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002262 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002263 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002265 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002267 if (iif) {
2268#ifdef CONFIG_IPV6_MROUTE
2269 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002270 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002271 if (err <= 0) {
2272 if (!nowait) {
2273 if (err == 0)
2274 return 0;
2275 goto nla_put_failure;
2276 } else {
2277 if (err == -EMSGSIZE)
2278 goto nla_put_failure;
2279 }
2280 }
2281 } else
2282#endif
2283 NLA_PUT_U32(skb, RTA_IIF, iif);
2284 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002285 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002287 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002288 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002289 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002291
Changli Gaod8d1f302010-06-10 23:31:35 -07002292 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002293 goto nla_put_failure;
2294
Changli Gaod8d1f302010-06-10 23:31:35 -07002295 if (rt->dst.neighbour)
2296 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002297
Changli Gaod8d1f302010-06-10 23:31:35 -07002298 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002299 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2300
2301 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002302
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002303 if (!(rt->rt6i_flags & RTF_EXPIRES))
2304 expires = 0;
2305 else if (rt->rt6i_expires - jiffies < INT_MAX)
2306 expires = rt->rt6i_expires - jiffies;
2307 else
2308 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002309
Changli Gaod8d1f302010-06-10 23:31:35 -07002310 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2311 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002312 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313
Thomas Graf2d7202b2006-08-22 00:01:27 -07002314 return nlmsg_end(skb, nlh);
2315
2316nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002317 nlmsg_cancel(skb, nlh);
2318 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319}
2320
Patrick McHardy1b43af52006-08-10 23:11:17 -07002321int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322{
2323 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2324 int prefix;
2325
Thomas Graf2d7202b2006-08-22 00:01:27 -07002326 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2327 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2329 } else
2330 prefix = 0;
2331
Brian Haley191cd582008-08-14 15:33:21 -07002332 return rt6_fill_node(arg->net,
2333 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002335 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336}
2337
Thomas Grafc127ea22007-03-22 11:58:32 -07002338static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002340 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002341 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002343 struct sk_buff *skb;
2344 struct rtmsg *rtm;
2345 struct flowi fl;
2346 int err, iif = 0;
2347
2348 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2349 if (err < 0)
2350 goto errout;
2351
2352 err = -EINVAL;
2353 memset(&fl, 0, sizeof(fl));
2354
2355 if (tb[RTA_SRC]) {
2356 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2357 goto errout;
2358
2359 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2360 }
2361
2362 if (tb[RTA_DST]) {
2363 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2364 goto errout;
2365
2366 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2367 }
2368
2369 if (tb[RTA_IIF])
2370 iif = nla_get_u32(tb[RTA_IIF]);
2371
2372 if (tb[RTA_OIF])
2373 fl.oif = nla_get_u32(tb[RTA_OIF]);
2374
2375 if (iif) {
2376 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002377 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002378 if (!dev) {
2379 err = -ENODEV;
2380 goto errout;
2381 }
2382 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383
2384 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002385 if (skb == NULL) {
2386 err = -ENOBUFS;
2387 goto errout;
2388 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
2390 /* Reserve room for dummy headers, this skb can pass
2391 through good chunk of routing engine.
2392 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002393 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2395
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002396 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002397 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398
Brian Haley191cd582008-08-14 15:33:21 -07002399 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002401 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002403 kfree_skb(skb);
2404 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 }
2406
Daniel Lezcano55786892008-03-04 13:47:47 -08002407 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002408errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410}
2411
Thomas Graf86872cb2006-08-22 00:01:08 -07002412void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413{
2414 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002415 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002416 u32 seq;
2417 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002419 err = -ENOBUFS;
2420 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002421
Thomas Graf339bf982006-11-10 14:10:15 -08002422 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002423 if (skb == NULL)
2424 goto errout;
2425
Brian Haley191cd582008-08-14 15:33:21 -07002426 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002427 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002428 if (err < 0) {
2429 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2430 WARN_ON(err == -EMSGSIZE);
2431 kfree_skb(skb);
2432 goto errout;
2433 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002434 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2435 info->nlh, gfp_any());
2436 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002437errout:
2438 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002439 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440}
2441
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002442static int ip6_route_dev_notify(struct notifier_block *this,
2443 unsigned long event, void *data)
2444{
2445 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002446 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002447
2448 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002449 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002450 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2451#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002452 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002453 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002454 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002455 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2456#endif
2457 }
2458
2459 return NOTIFY_OK;
2460}
2461
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462/*
2463 * /proc
2464 */
2465
2466#ifdef CONFIG_PROC_FS
2467
2468#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2469
2470struct rt6_proc_arg
2471{
2472 char *buffer;
2473 int offset;
2474 int length;
2475 int skip;
2476 int len;
2477};
2478
2479static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2480{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002481 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002483 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484
2485#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002486 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002488 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489#endif
2490
2491 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002492 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002494 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002496 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002497 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2498 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002499 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 return 0;
2501}
2502
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002503static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002505 struct net *net = (struct net *)m->private;
2506 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002507 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508}
2509
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002510static int ipv6_route_open(struct inode *inode, struct file *file)
2511{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002512 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002513}
2514
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002515static const struct file_operations ipv6_route_proc_fops = {
2516 .owner = THIS_MODULE,
2517 .open = ipv6_route_open,
2518 .read = seq_read,
2519 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002520 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002521};
2522
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2524{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002525 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002527 net->ipv6.rt6_stats->fib_nodes,
2528 net->ipv6.rt6_stats->fib_route_nodes,
2529 net->ipv6.rt6_stats->fib_rt_alloc,
2530 net->ipv6.rt6_stats->fib_rt_entries,
2531 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002532 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002533 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534
2535 return 0;
2536}
2537
2538static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2539{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002540 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002541}
2542
Arjan van de Ven9a321442007-02-12 00:55:35 -08002543static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 .owner = THIS_MODULE,
2545 .open = rt6_stats_seq_open,
2546 .read = seq_read,
2547 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002548 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549};
2550#endif /* CONFIG_PROC_FS */
2551
2552#ifdef CONFIG_SYSCTL
2553
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002555int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 void __user *buffer, size_t *lenp, loff_t *ppos)
2557{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002558 struct net *net = current->nsproxy->net_ns;
2559 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002561 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002562 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 return 0;
2564 } else
2565 return -EINVAL;
2566}
2567
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002568ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002569 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002571 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002573 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002574 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575 },
2576 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002578 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 .maxlen = sizeof(int),
2580 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002581 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 },
2583 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002585 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 .maxlen = sizeof(int),
2587 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002588 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 },
2590 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002592 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 .maxlen = sizeof(int),
2594 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002595 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 },
2597 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002599 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 .maxlen = sizeof(int),
2601 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002602 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 },
2604 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002606 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607 .maxlen = sizeof(int),
2608 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002609 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610 },
2611 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002613 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 .maxlen = sizeof(int),
2615 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002616 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617 },
2618 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002620 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 .maxlen = sizeof(int),
2622 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002623 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624 },
2625 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002627 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 .maxlen = sizeof(int),
2629 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002630 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631 },
2632 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002634 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 .maxlen = sizeof(int),
2636 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002637 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002639 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640};
2641
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002642struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002643{
2644 struct ctl_table *table;
2645
2646 table = kmemdup(ipv6_route_table_template,
2647 sizeof(ipv6_route_table_template),
2648 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002649
2650 if (table) {
2651 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002652 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002653 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2654 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2655 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2656 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2657 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2658 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2659 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002660 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002661 }
2662
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002663 return table;
2664}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665#endif
2666
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002667static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002668{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002669 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002670
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002671 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2672 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002673
Eric Dumazetfc66f952010-10-08 06:37:34 +00002674 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2675 goto out_ip6_dst_ops;
2676
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002677 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2678 sizeof(*net->ipv6.ip6_null_entry),
2679 GFP_KERNEL);
2680 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002681 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002682 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002683 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002684 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002685
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2688 sizeof(*net->ipv6.ip6_prohibit_entry),
2689 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002690 if (!net->ipv6.ip6_prohibit_entry)
2691 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002692 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002693 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002694 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002695
2696 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2697 sizeof(*net->ipv6.ip6_blk_hole_entry),
2698 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002699 if (!net->ipv6.ip6_blk_hole_entry)
2700 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002701 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002702 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002703 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002704#endif
2705
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002706 net->ipv6.sysctl.flush_delay = 0;
2707 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2708 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2709 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2710 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2711 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2712 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2713 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2714
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002715#ifdef CONFIG_PROC_FS
2716 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2717 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2718#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002719 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2720
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002721 ret = 0;
2722out:
2723 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002724
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002725#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2726out_ip6_prohibit_entry:
2727 kfree(net->ipv6.ip6_prohibit_entry);
2728out_ip6_null_entry:
2729 kfree(net->ipv6.ip6_null_entry);
2730#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002731out_ip6_dst_entries:
2732 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002733out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002734 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002735}
2736
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002737static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002738{
2739#ifdef CONFIG_PROC_FS
2740 proc_net_remove(net, "ipv6_route");
2741 proc_net_remove(net, "rt6_stats");
2742#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002743 kfree(net->ipv6.ip6_null_entry);
2744#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2745 kfree(net->ipv6.ip6_prohibit_entry);
2746 kfree(net->ipv6.ip6_blk_hole_entry);
2747#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002748 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002749}
2750
2751static struct pernet_operations ip6_route_net_ops = {
2752 .init = ip6_route_net_init,
2753 .exit = ip6_route_net_exit,
2754};
2755
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002756static struct notifier_block ip6_route_dev_notifier = {
2757 .notifier_call = ip6_route_dev_notify,
2758 .priority = 0,
2759};
2760
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002761int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002762{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002763 int ret;
2764
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002765 ret = -ENOMEM;
2766 ip6_dst_ops_template.kmem_cachep =
2767 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2768 SLAB_HWCACHE_ALIGN, NULL);
2769 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002770 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002771
Eric Dumazetfc66f952010-10-08 06:37:34 +00002772 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002773 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002774 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002775
Eric Dumazetfc66f952010-10-08 06:37:34 +00002776 ret = register_pernet_subsys(&ip6_route_net_ops);
2777 if (ret)
2778 goto out_dst_entries;
2779
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002780 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2781
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002782 /* Registering of the loopback is done before this portion of code,
2783 * the loopback reference in rt6_info will not be taken, do it
2784 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002785 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002786 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2787 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002788 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002789 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002790 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002791 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2792 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002793 ret = fib6_init();
2794 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002795 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002796
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002797 ret = xfrm6_init();
2798 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002799 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002800
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002801 ret = fib6_rules_init();
2802 if (ret)
2803 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002804
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002805 ret = -ENOBUFS;
2806 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2807 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2808 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2809 goto fib6_rules_init;
2810
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002811 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002812 if (ret)
2813 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002814
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002815out:
2816 return ret;
2817
2818fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002819 fib6_rules_cleanup();
2820xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002821 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002822out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002823 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002824out_register_subsys:
2825 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002826out_dst_entries:
2827 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002828out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002829 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002830 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002831}
2832
2833void ip6_route_cleanup(void)
2834{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002835 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002836 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002838 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002839 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002840 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002841 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842}