blob: 76f06b94ab9f61a6e7ed6c844feeba21cae47e9a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Eric Dumazete2422972008-01-30 20:07:45 -0800111 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Eric Dumazete2422972008-01-30 20:07:45 -0800124 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700125};
126
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800127static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1),
142};
143
Thomas Graf101367c2006-08-04 03:39:02 -0700144#ifdef CONFIG_IPV6_MULTIPLE_TABLES
145
David S. Miller6723ab52006-10-18 21:20:57 -0700146static int ip6_pkt_prohibit(struct sk_buff *skb);
147static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700148
Adrian Bunk280a34c2008-04-21 02:29:32 -0700149static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700150 .u = {
151 .dst = {
152 .__refcnt = ATOMIC_INIT(1),
153 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700154 .obsolete = -1,
155 .error = -EACCES,
156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700157 .input = ip6_pkt_prohibit,
158 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 }
160 },
161 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
162 .rt6i_metric = ~(u32) 0,
163 .rt6i_ref = ATOMIC_INIT(1),
164};
165
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800166static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700167 .u = {
168 .dst = {
169 .__refcnt = ATOMIC_INIT(1),
170 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800174 .input = dst_discard,
175 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700176 }
177 },
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
179 .rt6i_metric = ~(u32) 0,
180 .rt6i_ref = ATOMIC_INIT(1),
181};
182
183#endif
184
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800186static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800188 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189}
190
191static void ip6_dst_destroy(struct dst_entry *dst)
192{
193 struct rt6_info *rt = (struct rt6_info *)dst;
194 struct inet6_dev *idev = rt->rt6i_idev;
195
196 if (idev != NULL) {
197 rt->rt6i_idev = NULL;
198 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900199 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200}
201
202static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
203 int how)
204{
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800207 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900208 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800210 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev =
212 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 if (loopback_idev != NULL) {
214 rt->rt6i_idev = loopback_idev;
215 in6_dev_put(idev);
216 }
217 }
218}
219
220static __inline__ int rt6_check_expired(const struct rt6_info *rt)
221{
222 return (rt->rt6i_flags & RTF_EXPIRES &&
223 time_after(jiffies, rt->rt6i_expires));
224}
225
Thomas Grafc71099a2006-08-04 23:20:06 -0700226static inline int rt6_need_strict(struct in6_addr *daddr)
227{
228 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900229 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700230}
231
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700233 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 */
235
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800236static inline struct rt6_info *rt6_device_match(struct net *net,
237 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900238 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700240 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241{
242 struct rt6_info *local = NULL;
243 struct rt6_info *sprt;
244
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900245 if (!oif && ipv6_addr_any(saddr))
246 goto out;
247
248 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249 struct net_device *dev = sprt->rt6i_dev;
250
251 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 if (dev->ifindex == oif)
253 return sprt;
254 if (dev->flags & IFF_LOOPBACK) {
255 if (sprt->rt6i_idev == NULL ||
256 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700257 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900259 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 local->rt6i_idev->dev->ifindex == oif))
261 continue;
262 }
263 local = sprt;
264 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900265 } else {
266 if (ipv6_chk_addr(net, saddr, dev,
267 flags & RT6_LOOKUP_F_IFACE))
268 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900270 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900272 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 if (local)
274 return local;
275
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700276 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800277 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900279out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 return rt;
281}
282
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800283#ifdef CONFIG_IPV6_ROUTER_PREF
284static void rt6_probe(struct rt6_info *rt)
285{
286 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287 /*
288 * Okay, this does not seem to be appropriate
289 * for now, however, we need to check if it
290 * is really so; aka Router Reachability Probing.
291 *
292 * Router Reachability Probe MUST be rate-limited
293 * to no more than one per minute.
294 */
295 if (!neigh || (neigh->nud_state & NUD_VALID))
296 return;
297 read_lock_bh(&neigh->lock);
298 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800299 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800300 struct in6_addr mcaddr;
301 struct in6_addr *target;
302
303 neigh->updated = jiffies;
304 read_unlock_bh(&neigh->lock);
305
306 target = (struct in6_addr *)&neigh->primary_key;
307 addrconf_addr_solict_mult(target, &mcaddr);
308 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 } else
310 read_unlock_bh(&neigh->lock);
311}
312#else
313static inline void rt6_probe(struct rt6_info *rt)
314{
315 return;
316}
317#endif
318
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700322static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800324 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700325 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700327 if ((dev->flags & IFF_LOOPBACK) &&
328 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 return 1;
330 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331}
332
Dave Jonesb6f99a22007-03-22 12:27:49 -0700333static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800336 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700337 if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 !(rt->rt6i_flags & RTF_GATEWAY))
339 m = 1;
340 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 read_lock_bh(&neigh->lock);
342 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700343 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800344#ifdef CONFIG_IPV6_ROUTER_PREF
345 else if (neigh->nud_state & NUD_FAILED)
346 m = 0;
347#endif
348 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800349 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800351 } else
352 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353 return m;
354}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356static int rt6_score_route(struct rt6_info *rt, int oif,
357 int strict)
358{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700359 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900360
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700361 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700362 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800364#ifdef CONFIG_IPV6_ROUTER_PREF
365 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
366#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700367 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800368 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800369 return -1;
370 return m;
371}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372
David S. Millerf11e6652007-03-24 20:36:25 -0700373static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
374 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800375{
David S. Millerf11e6652007-03-24 20:36:25 -0700376 int m;
377
378 if (rt6_check_expired(rt))
379 goto out;
380
381 m = rt6_score_route(rt, oif, strict);
382 if (m < 0)
383 goto out;
384
385 if (m > *mpri) {
386 if (strict & RT6_LOOKUP_F_REACHABLE)
387 rt6_probe(match);
388 *mpri = m;
389 match = rt;
390 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
391 rt6_probe(rt);
392 }
393
394out:
395 return match;
396}
397
398static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
399 struct rt6_info *rr_head,
400 u32 metric, int oif, int strict)
401{
402 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800403 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404
David S. Millerf11e6652007-03-24 20:36:25 -0700405 match = NULL;
406 for (rt = rr_head; rt && rt->rt6i_metric == metric;
407 rt = rt->u.dst.rt6_next)
408 match = find_match(rt, oif, strict, &mpri, match);
409 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800412
David S. Millerf11e6652007-03-24 20:36:25 -0700413 return match;
414}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415
David S. Millerf11e6652007-03-24 20:36:25 -0700416static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
417{
418 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800419 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
David S. Millerf11e6652007-03-24 20:36:25 -0700421 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800422 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
David S. Millerf11e6652007-03-24 20:36:25 -0700424 rt0 = fn->rr_ptr;
425 if (!rt0)
426 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
David S. Millerf11e6652007-03-24 20:36:25 -0700428 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800430 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700431 (strict & RT6_LOOKUP_F_REACHABLE)) {
432 struct rt6_info *next = rt0->u.dst.rt6_next;
433
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800434 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700435 if (!next || next->rt6i_metric != rt0->rt6i_metric)
436 next = fn->leaf;
437
438 if (next != rt0)
439 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 }
441
David S. Millerf11e6652007-03-24 20:36:25 -0700442 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800443 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900445 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800446 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447}
448
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800449#ifdef CONFIG_IPV6_ROUTE_INFO
450int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
451 struct in6_addr *gwaddr)
452{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900453 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800454 struct route_info *rinfo = (struct route_info *) opt;
455 struct in6_addr prefix_buf, *prefix;
456 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900457 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800458 struct rt6_info *rt;
459
460 if (len < sizeof(struct route_info)) {
461 return -EINVAL;
462 }
463
464 /* Sanity check for prefix_len and length */
465 if (rinfo->length > 3) {
466 return -EINVAL;
467 } else if (rinfo->prefix_len > 128) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 64) {
470 if (rinfo->length < 2) {
471 return -EINVAL;
472 }
473 } else if (rinfo->prefix_len > 0) {
474 if (rinfo->length < 1) {
475 return -EINVAL;
476 }
477 }
478
479 pref = rinfo->route_pref;
480 if (pref == ICMPV6_ROUTER_PREF_INVALID)
481 pref = ICMPV6_ROUTER_PREF_MEDIUM;
482
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900483 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497
498 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700499 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900511 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800523#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700524do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800525 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700527 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700537 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700538 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700539} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700540
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800541static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700543 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544{
545 struct fib6_node *fn;
546 struct rt6_info *rt;
547
Thomas Grafc71099a2006-08-04 23:20:06 -0700548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550restart:
551 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900552 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800553 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700554out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800555 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557 return rt;
558
559}
560
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900561struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
562 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700563{
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700569 },
570 },
571 };
572 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700574
Thomas Grafadaa70b2006-10-13 15:01:03 -0700575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
578 }
579
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
583
584 dst_release(dst);
585
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 return NULL;
587}
588
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900589EXPORT_SYMBOL(rt6_lookup);
590
Thomas Grafc71099a2006-08-04 23:20:06 -0700591/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
595 */
596
Thomas Graf86872cb2006-08-22 00:01:08 -0700597static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598{
599 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700600 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700604 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
607 return err;
608}
609
Thomas Graf40e22e82006-08-22 00:00:45 -0700610int ip6_ins_rt(struct rt6_info *rt)
611{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800612 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900613 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800614 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800615 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700616}
617
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800618static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 struct rt6_info *rt;
622
623 /*
624 * Clone the route.
625 */
626
627 rt = ip6_rt_copy(ort);
628
629 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800630 struct neighbour *neigh;
631 int attempts = !in_softirq();
632
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900633 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
634 if (rt->rt6i_dst.plen != 128 &&
635 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
636 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900638 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900640 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 rt->rt6i_dst.plen = 128;
642 rt->rt6i_flags |= RTF_CACHE;
643 rt->u.dst.flags |= DST_HOST;
644
645#ifdef CONFIG_IPV6_SUBTREES
646 if (rt->rt6i_src.plen && saddr) {
647 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
648 rt->rt6i_src.plen = 128;
649 }
650#endif
651
David S. Miller14deae42009-01-04 16:04:39 -0800652 retry:
653 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
654 if (IS_ERR(neigh)) {
655 struct net *net = dev_net(rt->rt6i_dev);
656 int saved_rt_min_interval =
657 net->ipv6.sysctl.ip6_rt_gc_min_interval;
658 int saved_rt_elasticity =
659 net->ipv6.sysctl.ip6_rt_gc_elasticity;
660
661 if (attempts-- > 0) {
662 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
663 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
664
665 ip6_dst_gc(net->ipv6.ip6_dst_ops);
666
667 net->ipv6.sysctl.ip6_rt_gc_elasticity =
668 saved_rt_elasticity;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval =
670 saved_rt_min_interval;
671 goto retry;
672 }
673
674 if (net_ratelimit())
675 printk(KERN_WARNING
676 "Neighbour table overflow.\n");
677 dst_free(&rt->u.dst);
678 return NULL;
679 }
680 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800682 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800684 return rt;
685}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800687static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
688{
689 struct rt6_info *rt = ip6_rt_copy(ort);
690 if (rt) {
691 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
692 rt->rt6i_dst.plen = 128;
693 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800694 rt->u.dst.flags |= DST_HOST;
695 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
696 }
697 return rt;
698}
699
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800700static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
701 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702{
703 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800704 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700705 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700708 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700710 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
712relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700713 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800715restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700719 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800720
721 BACKTRACK(net, &fl->fl6_src);
722 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800723 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800724 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800726 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700727 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800728
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800729 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800730 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800731 else {
732#if CLONE_OFFLINK_ROUTE
733 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
734#else
735 goto out2;
736#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800738
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800739 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800740 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800741
742 dst_hold(&rt->u.dst);
743 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700744 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800745 if (!err)
746 goto out2;
747 }
748
749 if (--attempts <= 0)
750 goto out2;
751
752 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700753 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800754 * released someone could insert this route. Relookup.
755 */
756 dst_release(&rt->u.dst);
757 goto relookup;
758
759out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800760 if (reachable) {
761 reachable = 0;
762 goto restart_2;
763 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800764 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700765 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766out2:
767 rt->u.dst.lastuse = jiffies;
768 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700769
770 return rt;
771}
772
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800773static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700774 struct flowi *fl, int flags)
775{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800776 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700777}
778
Thomas Grafc71099a2006-08-04 23:20:06 -0700779void ip6_route_input(struct sk_buff *skb)
780{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700781 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900782 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700783 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700784 struct flowi fl = {
785 .iif = skb->dev->ifindex,
786 .nl_u = {
787 .ip6_u = {
788 .daddr = iph->daddr,
789 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800790 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700791 },
792 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900793 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700794 .proto = iph->nexthdr,
795 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700796
797 if (rt6_need_strict(&iph->daddr))
798 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700799
Daniel Lezcano55786892008-03-04 13:47:47 -0800800 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700801}
802
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800803static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700804 struct flowi *fl, int flags)
805{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800806 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700807}
808
Daniel Lezcano4591db42008-03-05 10:48:10 -0800809struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
810 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700811{
812 int flags = 0;
813
814 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700815 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700816
Thomas Grafadaa70b2006-10-13 15:01:03 -0700817 if (!ipv6_addr_any(&fl->fl6_src))
818 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +0900819 else if (sk) {
820 unsigned int prefs = inet6_sk(sk)->srcprefs;
821 if (prefs & IPV6_PREFER_SRC_TMP)
822 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
823 if (prefs & IPV6_PREFER_SRC_PUBLIC)
824 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
825 if (prefs & IPV6_PREFER_SRC_COA)
826 flags |= RT6_LOOKUP_F_SRCPREF_COA;
827 }
Thomas Grafadaa70b2006-10-13 15:01:03 -0700828
Daniel Lezcano4591db42008-03-05 10:48:10 -0800829 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830}
831
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900832EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
David S. Miller14e50e52007-05-24 18:17:54 -0700834int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
835{
836 struct rt6_info *ort = (struct rt6_info *) *dstp;
837 struct rt6_info *rt = (struct rt6_info *)
838 dst_alloc(&ip6_dst_blackhole_ops);
839 struct dst_entry *new = NULL;
840
841 if (rt) {
842 new = &rt->u.dst;
843
844 atomic_set(&new->__refcnt, 1);
845 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800846 new->input = dst_discard;
847 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700848
849 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
850 new->dev = ort->u.dst.dev;
851 if (new->dev)
852 dev_hold(new->dev);
853 rt->rt6i_idev = ort->rt6i_idev;
854 if (rt->rt6i_idev)
855 in6_dev_hold(rt->rt6i_idev);
856 rt->rt6i_expires = 0;
857
858 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
859 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
860 rt->rt6i_metric = 0;
861
862 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
863#ifdef CONFIG_IPV6_SUBTREES
864 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
865#endif
866
867 dst_free(new);
868 }
869
870 dst_release(*dstp);
871 *dstp = new;
872 return (new ? 0 : -ENOMEM);
873}
874EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
875
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876/*
877 * Destination cache support functions
878 */
879
880static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
881{
882 struct rt6_info *rt;
883
884 rt = (struct rt6_info *) dst;
885
886 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
887 return dst;
888
889 return NULL;
890}
891
892static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
893{
894 struct rt6_info *rt = (struct rt6_info *) dst;
895
896 if (rt) {
897 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700898 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 else
900 dst_release(dst);
901 }
902 return NULL;
903}
904
905static void ip6_link_failure(struct sk_buff *skb)
906{
907 struct rt6_info *rt;
908
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
910
911 rt = (struct rt6_info *) skb->dst;
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->u.dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919}
920
921static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922{
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 mtu = IPV6_MIN_MTU;
929 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
930 }
931 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700932 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 }
934}
935
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936static int ipv6_get_mtu(struct net_device *dev);
937
Daniel Lezcano55786892008-03-04 13:47:47 -0800938static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939{
940 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
941
Daniel Lezcano55786892008-03-04 13:47:47 -0800942 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
943 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944
945 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900946 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
947 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
948 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 * rely only on pmtu discovery"
950 */
951 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
952 mtu = IPV6_MAXPLEN;
953 return mtu;
954}
955
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800956static struct dst_entry *icmp6_dst_gc_list;
957static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700958
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800959struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900961 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962{
963 struct rt6_info *rt;
964 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900965 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
967 if (unlikely(idev == NULL))
968 return NULL;
969
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800970 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (unlikely(rt == NULL)) {
972 in6_dev_put(idev);
973 goto out;
974 }
975
976 dev_hold(dev);
977 if (neigh)
978 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800979 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800981 if (IS_ERR(neigh))
982 neigh = NULL;
983 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985 rt->rt6i_dev = dev;
986 rt->rt6i_idev = idev;
987 rt->rt6i_nexthop = neigh;
988 atomic_set(&rt->u.dst.__refcnt, 1);
989 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
990 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800992 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
994#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900995 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
996 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 : 0;
998 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
999 rt->rt6i_dst.plen = 128;
1000#endif
1001
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001002 spin_lock_bh(&icmp6_dst_lock);
1003 rt->u.dst.next = icmp6_dst_gc_list;
1004 icmp6_dst_gc_list = &rt->u.dst;
1005 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
Daniel Lezcano55786892008-03-04 13:47:47 -08001007 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008
1009out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001010 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011}
1012
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001013int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014{
1015 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001016 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017
1018 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001019
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001020 spin_lock_bh(&icmp6_dst_lock);
1021 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 while ((dst = *pprev) != NULL) {
1024 if (!atomic_read(&dst->__refcnt)) {
1025 *pprev = dst->next;
1026 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 } else {
1028 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001029 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 }
1031 }
1032
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001033 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001034
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001035 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036}
1037
David S. Miller1e493d12008-09-10 17:27:15 -07001038static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1039 void *arg)
1040{
1041 struct dst_entry *dst, **pprev;
1042
1043 spin_lock_bh(&icmp6_dst_lock);
1044 pprev = &icmp6_dst_gc_list;
1045 while ((dst = *pprev) != NULL) {
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047 if (func(rt, arg)) {
1048 *pprev = dst->next;
1049 dst_free(dst);
1050 } else {
1051 pprev = &dst->next;
1052 }
1053 }
1054 spin_unlock_bh(&icmp6_dst_lock);
1055}
1056
Daniel Lezcano569d3642008-01-18 03:56:57 -08001057static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 unsigned long now = jiffies;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001060 struct net *net = ops->dst_net;
1061 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1062 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1063 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1064 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1065 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066
Daniel Lezcano7019b782008-03-04 13:50:14 -08001067 if (time_after(rt_last_gc + rt_min_interval, now) &&
1068 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 goto out;
1070
Benjamin Thery6891a342008-03-04 13:49:47 -08001071 net->ipv6.ip6_rt_gc_expire++;
1072 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1073 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001074 if (atomic_read(&ops->entries) < ops->gc_thresh)
1075 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001077 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1078 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079}
1080
1081/* Clean host part of a prefix. Not necessary in radix tree,
1082 but results in cleaner routing tables.
1083
1084 Remove it only when all the things will work!
1085 */
1086
1087static int ipv6_get_mtu(struct net_device *dev)
1088{
1089 int mtu = IPV6_MIN_MTU;
1090 struct inet6_dev *idev;
1091
1092 idev = in6_dev_get(dev);
1093 if (idev) {
1094 mtu = idev->cnf.mtu6;
1095 in6_dev_put(idev);
1096 }
1097 return mtu;
1098}
1099
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001100int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001102 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1103 if (hoplimit < 0) {
1104 struct net_device *dev = dst->dev;
1105 struct inet6_dev *idev = in6_dev_get(dev);
1106 if (idev) {
1107 hoplimit = idev->cnf.hop_limit;
1108 in6_dev_put(idev);
1109 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001110 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 }
1112 return hoplimit;
1113}
1114
1115/*
1116 *
1117 */
1118
Thomas Graf86872cb2006-08-22 00:01:08 -07001119int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120{
1121 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001122 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 struct rt6_info *rt = NULL;
1124 struct net_device *dev = NULL;
1125 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001126 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 int addr_type;
1128
Thomas Graf86872cb2006-08-22 00:01:08 -07001129 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 return -EINVAL;
1131#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001132 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 return -EINVAL;
1134#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001135 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001137 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (!dev)
1139 goto out;
1140 idev = in6_dev_get(dev);
1141 if (!idev)
1142 goto out;
1143 }
1144
Thomas Graf86872cb2006-08-22 00:01:08 -07001145 if (cfg->fc_metric == 0)
1146 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147
Daniel Lezcano55786892008-03-04 13:47:47 -08001148 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001149 if (table == NULL) {
1150 err = -ENOBUFS;
1151 goto out;
1152 }
1153
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001154 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155
1156 if (rt == NULL) {
1157 err = -ENOMEM;
1158 goto out;
1159 }
1160
1161 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001162 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1163 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1164 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
Thomas Graf86872cb2006-08-22 00:01:08 -07001166 if (cfg->fc_protocol == RTPROT_UNSPEC)
1167 cfg->fc_protocol = RTPROT_BOOT;
1168 rt->rt6i_protocol = cfg->fc_protocol;
1169
1170 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
1172 if (addr_type & IPV6_ADDR_MULTICAST)
1173 rt->u.dst.input = ip6_mc_input;
1174 else
1175 rt->u.dst.input = ip6_forward;
1176
1177 rt->u.dst.output = ip6_output;
1178
Thomas Graf86872cb2006-08-22 00:01:08 -07001179 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1180 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 if (rt->rt6i_dst.plen == 128)
1182 rt->u.dst.flags = DST_HOST;
1183
1184#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001185 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1186 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187#endif
1188
Thomas Graf86872cb2006-08-22 00:01:08 -07001189 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190
1191 /* We cannot add true routes via loopback here,
1192 they would result in kernel looping; promote them to reject routes
1193 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001194 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1196 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001197 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 if (dev) {
1199 dev_put(dev);
1200 in6_dev_put(idev);
1201 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001202 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 dev_hold(dev);
1204 idev = in6_dev_get(dev);
1205 if (!idev) {
1206 err = -ENODEV;
1207 goto out;
1208 }
1209 }
1210 rt->u.dst.output = ip6_pkt_discard_out;
1211 rt->u.dst.input = ip6_pkt_discard;
1212 rt->u.dst.error = -ENETUNREACH;
1213 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1214 goto install_route;
1215 }
1216
Thomas Graf86872cb2006-08-22 00:01:08 -07001217 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 struct in6_addr *gw_addr;
1219 int gwa_type;
1220
Thomas Graf86872cb2006-08-22 00:01:08 -07001221 gw_addr = &cfg->fc_gateway;
1222 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 gwa_type = ipv6_addr_type(gw_addr);
1224
1225 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1226 struct rt6_info *grt;
1227
1228 /* IPv6 strictly inhibits using not link-local
1229 addresses as nexthop address.
1230 Otherwise, router will not able to send redirects.
1231 It is very good, but in some (rare!) circumstances
1232 (SIT, PtP, NBMA NOARP links) it is handy to allow
1233 some exceptions. --ANK
1234 */
1235 err = -EINVAL;
1236 if (!(gwa_type&IPV6_ADDR_UNICAST))
1237 goto out;
1238
Daniel Lezcano55786892008-03-04 13:47:47 -08001239 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
1241 err = -EHOSTUNREACH;
1242 if (grt == NULL)
1243 goto out;
1244 if (dev) {
1245 if (dev != grt->rt6i_dev) {
1246 dst_release(&grt->u.dst);
1247 goto out;
1248 }
1249 } else {
1250 dev = grt->rt6i_dev;
1251 idev = grt->rt6i_idev;
1252 dev_hold(dev);
1253 in6_dev_hold(grt->rt6i_idev);
1254 }
1255 if (!(grt->rt6i_flags&RTF_GATEWAY))
1256 err = 0;
1257 dst_release(&grt->u.dst);
1258
1259 if (err)
1260 goto out;
1261 }
1262 err = -EINVAL;
1263 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1264 goto out;
1265 }
1266
1267 err = -ENODEV;
1268 if (dev == NULL)
1269 goto out;
1270
Thomas Graf86872cb2006-08-22 00:01:08 -07001271 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1273 if (IS_ERR(rt->rt6i_nexthop)) {
1274 err = PTR_ERR(rt->rt6i_nexthop);
1275 rt->rt6i_nexthop = NULL;
1276 goto out;
1277 }
1278 }
1279
Thomas Graf86872cb2006-08-22 00:01:08 -07001280 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281
1282install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001283 if (cfg->fc_mx) {
1284 struct nlattr *nla;
1285 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286
Thomas Graf86872cb2006-08-22 00:01:08 -07001287 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001288 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001289
1290 if (type) {
1291 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 err = -EINVAL;
1293 goto out;
1294 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001295
1296 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 }
1299 }
1300
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001301 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001303 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001305 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001306 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 rt->u.dst.dev = dev;
1308 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001309 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001310
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001311 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001312
Thomas Graf86872cb2006-08-22 00:01:08 -07001313 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314
1315out:
1316 if (dev)
1317 dev_put(dev);
1318 if (idev)
1319 in6_dev_put(idev);
1320 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001321 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 return err;
1323}
1324
Thomas Graf86872cb2006-08-22 00:01:08 -07001325static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326{
1327 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001328 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001329 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001331 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001332 return -ENOENT;
1333
Thomas Grafc71099a2006-08-04 23:20:06 -07001334 table = rt->rt6i_table;
1335 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
Thomas Graf86872cb2006-08-22 00:01:08 -07001337 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 dst_release(&rt->u.dst);
1339
Thomas Grafc71099a2006-08-04 23:20:06 -07001340 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
1342 return err;
1343}
1344
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001345int ip6_del_rt(struct rt6_info *rt)
1346{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001347 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001348 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001349 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001350 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001351}
1352
Thomas Graf86872cb2006-08-22 00:01:08 -07001353static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354{
Thomas Grafc71099a2006-08-04 23:20:06 -07001355 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 struct fib6_node *fn;
1357 struct rt6_info *rt;
1358 int err = -ESRCH;
1359
Daniel Lezcano55786892008-03-04 13:47:47 -08001360 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001361 if (table == NULL)
1362 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
Thomas Grafc71099a2006-08-04 23:20:06 -07001364 read_lock_bh(&table->tb6_lock);
1365
1366 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001367 &cfg->fc_dst, cfg->fc_dst_len,
1368 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001369
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001371 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001372 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001374 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001376 if (cfg->fc_flags & RTF_GATEWAY &&
1377 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001379 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 continue;
1381 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001382 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383
Thomas Graf86872cb2006-08-22 00:01:08 -07001384 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 }
1386 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001387 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388
1389 return err;
1390}
1391
1392/*
1393 * Handle redirects
1394 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001395struct ip6rd_flowi {
1396 struct flowi fl;
1397 struct in6_addr gateway;
1398};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001400static struct rt6_info *__ip6_route_redirect(struct net *net,
1401 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001402 struct flowi *fl,
1403 int flags)
1404{
1405 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1406 struct rt6_info *rt;
1407 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001408
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001410 * Get the "current" route for this destination and
1411 * check if the redirect has come from approriate router.
1412 *
1413 * RFC 2461 specifies that redirects should only be
1414 * accepted if they come from the nexthop to the target.
1415 * Due to the way the routes are chosen, this notion
1416 * is a bit fuzzy and one might need to check all possible
1417 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419
Thomas Grafc71099a2006-08-04 23:20:06 -07001420 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001421 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001422restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001423 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001424 /*
1425 * Current route is on-link; redirect is always invalid.
1426 *
1427 * Seems, previous statement is not true. It could
1428 * be node, which looks for us as on-link (f.e. proxy ndisc)
1429 * But then router serving it might decide, that we should
1430 * know truth 8)8) --ANK (980726).
1431 */
1432 if (rt6_check_expired(rt))
1433 continue;
1434 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001436 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001437 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001438 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001439 continue;
1440 break;
1441 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001442
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001443 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001444 rt = net->ipv6.ip6_null_entry;
1445 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001446out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001447 dst_hold(&rt->u.dst);
1448
1449 read_unlock_bh(&table->tb6_lock);
1450
1451 return rt;
1452};
1453
1454static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1455 struct in6_addr *src,
1456 struct in6_addr *gateway,
1457 struct net_device *dev)
1458{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001459 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001460 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001461 struct ip6rd_flowi rdfl = {
1462 .fl = {
1463 .oif = dev->ifindex,
1464 .nl_u = {
1465 .ip6_u = {
1466 .daddr = *dest,
1467 .saddr = *src,
1468 },
1469 },
1470 },
1471 .gateway = *gateway,
1472 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001473
1474 if (rt6_need_strict(dest))
1475 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001476
Daniel Lezcano55786892008-03-04 13:47:47 -08001477 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001478 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001479}
1480
1481void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1482 struct in6_addr *saddr,
1483 struct neighbour *neigh, u8 *lladdr, int on_link)
1484{
1485 struct rt6_info *rt, *nrt = NULL;
1486 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001487 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001488
1489 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1490
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001491 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 if (net_ratelimit())
1493 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1494 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001495 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 }
1497
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 /*
1499 * We have finally decided to accept it.
1500 */
1501
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001502 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1504 NEIGH_UPDATE_F_OVERRIDE|
1505 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1506 NEIGH_UPDATE_F_ISROUTER))
1507 );
1508
1509 /*
1510 * Redirect received -> path was valid.
1511 * Look, redirects are sent only in response to data packets,
1512 * so that this nexthop apparently is reachable. --ANK
1513 */
1514 dst_confirm(&rt->u.dst);
1515
1516 /* Duplicate redirect: silently ignore. */
1517 if (neigh == rt->u.dst.neighbour)
1518 goto out;
1519
1520 nrt = ip6_rt_copy(rt);
1521 if (nrt == NULL)
1522 goto out;
1523
1524 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1525 if (on_link)
1526 nrt->rt6i_flags &= ~RTF_GATEWAY;
1527
1528 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1529 nrt->rt6i_dst.plen = 128;
1530 nrt->u.dst.flags |= DST_HOST;
1531
1532 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1533 nrt->rt6i_nexthop = neigh_clone(neigh);
1534 /* Reset pmtu, it may be better */
1535 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001536 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001537 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538
Thomas Graf40e22e82006-08-22 00:00:45 -07001539 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 goto out;
1541
Tom Tucker8d717402006-07-30 20:43:36 -07001542 netevent.old = &rt->u.dst;
1543 netevent.new = &nrt->u.dst;
1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1545
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001547 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 return;
1549 }
1550
1551out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001552 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 return;
1554}
1555
1556/*
1557 * Handle ICMP "packet too big" messages
1558 * i.e. Path MTU discovery
1559 */
1560
1561void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1562 struct net_device *dev, u32 pmtu)
1563{
1564 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001565 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 int allfrag = 0;
1567
Daniel Lezcano55786892008-03-04 13:47:47 -08001568 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 if (rt == NULL)
1570 return;
1571
1572 if (pmtu >= dst_mtu(&rt->u.dst))
1573 goto out;
1574
1575 if (pmtu < IPV6_MIN_MTU) {
1576 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1581 */
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1584 }
1585
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1589 */
1590 dst_confirm(&rt->u.dst);
1591
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1596 */
1597 if (rt->rt6i_flags & RTF_CACHE) {
1598 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1599 if (allfrag)
1600 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001601 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1603 goto out;
1604 }
1605
1606 /* Network route.
1607 Two cases are possible:
1608 1. It is connected route. Action: COW
1609 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1610 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001611 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001612 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001613 else
1614 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001615
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001616 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001617 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1618 if (allfrag)
1619 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1620
1621 /* According to RFC 1981, detecting PMTU increase shouldn't be
1622 * happened within 5 mins, the recommended timer is 10 mins.
1623 * Here this route expiration time is set to ip6_rt_mtu_expires
1624 * which is 10 mins. After 10 mins the decreased pmtu is expired
1625 * and detecting PMTU increase will be automatically happened.
1626 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001627 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001628 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1629
Thomas Graf40e22e82006-08-22 00:00:45 -07001630 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632out:
1633 dst_release(&rt->u.dst);
1634}
1635
1636/*
1637 * Misc support functions
1638 */
1639
1640static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1641{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001642 struct net *net = dev_net(ort->rt6i_dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001643 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644
1645 if (rt) {
1646 rt->u.dst.input = ort->u.dst.input;
1647 rt->u.dst.output = ort->u.dst.output;
1648
1649 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001650 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 rt->u.dst.dev = ort->u.dst.dev;
1652 if (rt->u.dst.dev)
1653 dev_hold(rt->u.dst.dev);
1654 rt->rt6i_idev = ort->rt6i_idev;
1655 if (rt->rt6i_idev)
1656 in6_dev_hold(rt->rt6i_idev);
1657 rt->u.dst.lastuse = jiffies;
1658 rt->rt6i_expires = 0;
1659
1660 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1661 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1662 rt->rt6i_metric = 0;
1663
1664 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1665#ifdef CONFIG_IPV6_SUBTREES
1666 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1667#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001668 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 }
1670 return rt;
1671}
1672
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001673#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001674static struct rt6_info *rt6_get_route_info(struct net *net,
1675 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001676 struct in6_addr *gwaddr, int ifindex)
1677{
1678 struct fib6_node *fn;
1679 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001680 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001681
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001682 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001683 if (table == NULL)
1684 return NULL;
1685
1686 write_lock_bh(&table->tb6_lock);
1687 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001688 if (!fn)
1689 goto out;
1690
Eric Dumazet7cc48262007-02-09 16:22:57 -08001691 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001692 if (rt->rt6i_dev->ifindex != ifindex)
1693 continue;
1694 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1695 continue;
1696 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1697 continue;
1698 dst_hold(&rt->u.dst);
1699 break;
1700 }
1701out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001702 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001703 return rt;
1704}
1705
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001706static struct rt6_info *rt6_add_route_info(struct net *net,
1707 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001708 struct in6_addr *gwaddr, int ifindex,
1709 unsigned pref)
1710{
Thomas Graf86872cb2006-08-22 00:01:08 -07001711 struct fib6_config cfg = {
1712 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001713 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001714 .fc_ifindex = ifindex,
1715 .fc_dst_len = prefixlen,
1716 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1717 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001718 .fc_nlinfo.pid = 0,
1719 .fc_nlinfo.nlh = NULL,
1720 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001721 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001722
Thomas Graf86872cb2006-08-22 00:01:08 -07001723 ipv6_addr_copy(&cfg.fc_dst, prefix);
1724 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1725
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001726 /* We should treat it as a default route if prefix length is 0. */
1727 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001728 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001729
Thomas Graf86872cb2006-08-22 00:01:08 -07001730 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001731
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001732 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001733}
1734#endif
1735
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001737{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001739 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001741 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001742 if (table == NULL)
1743 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
Thomas Grafc71099a2006-08-04 23:20:06 -07001745 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001746 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001748 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1750 break;
1751 }
1752 if (rt)
1753 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001754 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 return rt;
1756}
1757
1758struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001759 struct net_device *dev,
1760 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761{
Thomas Graf86872cb2006-08-22 00:01:08 -07001762 struct fib6_config cfg = {
1763 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001764 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001765 .fc_ifindex = dev->ifindex,
1766 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1767 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001768 .fc_nlinfo.pid = 0,
1769 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001770 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001771 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
Thomas Graf86872cb2006-08-22 00:01:08 -07001773 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774
Thomas Graf86872cb2006-08-22 00:01:08 -07001775 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 return rt6_get_dflt_router(gwaddr, dev);
1778}
1779
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001780void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781{
1782 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001783 struct fib6_table *table;
1784
1785 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001786 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001787 if (table == NULL)
1788 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789
1790restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001791 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001792 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1794 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001795 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001796 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 goto restart;
1798 }
1799 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001800 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801}
1802
Daniel Lezcano55786892008-03-04 13:47:47 -08001803static void rtmsg_to_fib6_config(struct net *net,
1804 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001805 struct fib6_config *cfg)
1806{
1807 memset(cfg, 0, sizeof(*cfg));
1808
1809 cfg->fc_table = RT6_TABLE_MAIN;
1810 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1811 cfg->fc_metric = rtmsg->rtmsg_metric;
1812 cfg->fc_expires = rtmsg->rtmsg_info;
1813 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1814 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1815 cfg->fc_flags = rtmsg->rtmsg_flags;
1816
Daniel Lezcano55786892008-03-04 13:47:47 -08001817 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001818
Thomas Graf86872cb2006-08-22 00:01:08 -07001819 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1820 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1821 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1822}
1823
Daniel Lezcano55786892008-03-04 13:47:47 -08001824int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825{
Thomas Graf86872cb2006-08-22 00:01:08 -07001826 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 struct in6_rtmsg rtmsg;
1828 int err;
1829
1830 switch(cmd) {
1831 case SIOCADDRT: /* Add a route */
1832 case SIOCDELRT: /* Delete a route */
1833 if (!capable(CAP_NET_ADMIN))
1834 return -EPERM;
1835 err = copy_from_user(&rtmsg, arg,
1836 sizeof(struct in6_rtmsg));
1837 if (err)
1838 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001839
Daniel Lezcano55786892008-03-04 13:47:47 -08001840 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001841
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 rtnl_lock();
1843 switch (cmd) {
1844 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001845 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 break;
1847 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001848 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 break;
1850 default:
1851 err = -EINVAL;
1852 }
1853 rtnl_unlock();
1854
1855 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001856 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857
1858 return -EINVAL;
1859}
1860
1861/*
1862 * Drop the packet on the floor
1863 */
1864
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001865static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001867 int type;
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001868 struct dst_entry *dst = skb->dst;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001869 switch (ipstats_mib_noroutes) {
1870 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001871 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001872 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001873 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1874 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001875 break;
1876 }
1877 /* FALLTHROUGH */
1878 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001879 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1880 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001881 break;
1882 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001883 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 kfree_skb(skb);
1885 return 0;
1886}
1887
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001888static int ip6_pkt_discard(struct sk_buff *skb)
1889{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001890 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001891}
1892
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001893static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894{
1895 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001896 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897}
1898
David S. Miller6723ab52006-10-18 21:20:57 -07001899#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1900
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001901static int ip6_pkt_prohibit(struct sk_buff *skb)
1902{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001903 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001904}
1905
1906static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1907{
1908 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001909 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001910}
1911
David S. Miller6723ab52006-10-18 21:20:57 -07001912#endif
1913
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914/*
1915 * Allocate a dst for local (unicast / anycast) address.
1916 */
1917
1918struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1919 const struct in6_addr *addr,
1920 int anycast)
1921{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001922 struct net *net = dev_net(idev->dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001923 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001924 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925
1926 if (rt == NULL)
1927 return ERR_PTR(-ENOMEM);
1928
Daniel Lezcano55786892008-03-04 13:47:47 -08001929 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 in6_dev_hold(idev);
1931
1932 rt->u.dst.flags = DST_HOST;
1933 rt->u.dst.input = ip6_input;
1934 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001935 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 rt->rt6i_idev = idev;
1937 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001938 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1940 rt->u.dst.obsolete = -1;
1941
1942 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001943 if (anycast)
1944 rt->rt6i_flags |= RTF_ANYCAST;
1945 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001947 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1948 if (IS_ERR(neigh)) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001949 dst_free(&rt->u.dst);
David S. Miller14deae42009-01-04 16:04:39 -08001950
1951 /* We are casting this because that is the return
1952 * value type. But an errno encoded pointer is the
1953 * same regardless of the underlying pointer type,
1954 * and that's what we are returning. So this is OK.
1955 */
1956 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 }
David S. Miller14deae42009-01-04 16:04:39 -08001958 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959
1960 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1961 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001962 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
1964 atomic_set(&rt->u.dst.__refcnt, 1);
1965
1966 return rt;
1967}
1968
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001969struct arg_dev_net {
1970 struct net_device *dev;
1971 struct net *net;
1972};
1973
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974static int fib6_ifdown(struct rt6_info *rt, void *arg)
1975{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001976 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1977 struct net *net = ((struct arg_dev_net *)arg)->net;
1978
1979 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1980 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 RT6_TRACE("deleted by ifdown %p\n", rt);
1982 return -1;
1983 }
1984 return 0;
1985}
1986
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001987void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001989 struct arg_dev_net adn = {
1990 .dev = dev,
1991 .net = net,
1992 };
1993
1994 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001995 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996}
1997
1998struct rt6_mtu_change_arg
1999{
2000 struct net_device *dev;
2001 unsigned mtu;
2002};
2003
2004static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2005{
2006 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2007 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002008 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009
2010 /* In IPv6 pmtu discovery is not optional,
2011 so that RTAX_MTU lock cannot disable it.
2012 We still use this lock to block changes
2013 caused by addrconf/ndisc.
2014 */
2015
2016 idev = __in6_dev_get(arg->dev);
2017 if (idev == NULL)
2018 return 0;
2019
2020 /* For administrative MTU increase, there is no way to discover
2021 IPv6 PMTU increase, so PMTU increase should be updated here.
2022 Since RFC 1981 doesn't include administrative MTU increase
2023 update PMTU increase is a MUST. (i.e. jumbo frame)
2024 */
2025 /*
2026 If new MTU is less than route PMTU, this new MTU will be the
2027 lowest MTU in the path, update the route PMTU to reflect PMTU
2028 decreases; if new MTU is greater than route PMTU, and the
2029 old MTU is the lowest MTU in the path, update the route PMTU
2030 to reflect the increase. In this case if the other nodes' MTU
2031 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2032 PMTU discouvery.
2033 */
2034 if (rt->rt6i_dev == arg->dev &&
2035 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08002036 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002037 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07002038 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08002040 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002041 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 return 0;
2043}
2044
2045void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2046{
Thomas Grafc71099a2006-08-04 23:20:06 -07002047 struct rt6_mtu_change_arg arg = {
2048 .dev = dev,
2049 .mtu = mtu,
2050 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002052 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053}
2054
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002055static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002056 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002057 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002058 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002059 [RTA_PRIORITY] = { .type = NLA_U32 },
2060 [RTA_METRICS] = { .type = NLA_NESTED },
2061};
2062
2063static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2064 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065{
Thomas Graf86872cb2006-08-22 00:01:08 -07002066 struct rtmsg *rtm;
2067 struct nlattr *tb[RTA_MAX+1];
2068 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069
Thomas Graf86872cb2006-08-22 00:01:08 -07002070 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2071 if (err < 0)
2072 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073
Thomas Graf86872cb2006-08-22 00:01:08 -07002074 err = -EINVAL;
2075 rtm = nlmsg_data(nlh);
2076 memset(cfg, 0, sizeof(*cfg));
2077
2078 cfg->fc_table = rtm->rtm_table;
2079 cfg->fc_dst_len = rtm->rtm_dst_len;
2080 cfg->fc_src_len = rtm->rtm_src_len;
2081 cfg->fc_flags = RTF_UP;
2082 cfg->fc_protocol = rtm->rtm_protocol;
2083
2084 if (rtm->rtm_type == RTN_UNREACHABLE)
2085 cfg->fc_flags |= RTF_REJECT;
2086
2087 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2088 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002089 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002090
2091 if (tb[RTA_GATEWAY]) {
2092 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2093 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002095
2096 if (tb[RTA_DST]) {
2097 int plen = (rtm->rtm_dst_len + 7) >> 3;
2098
2099 if (nla_len(tb[RTA_DST]) < plen)
2100 goto errout;
2101
2102 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002104
2105 if (tb[RTA_SRC]) {
2106 int plen = (rtm->rtm_src_len + 7) >> 3;
2107
2108 if (nla_len(tb[RTA_SRC]) < plen)
2109 goto errout;
2110
2111 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002113
2114 if (tb[RTA_OIF])
2115 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2116
2117 if (tb[RTA_PRIORITY])
2118 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2119
2120 if (tb[RTA_METRICS]) {
2121 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2122 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002124
2125 if (tb[RTA_TABLE])
2126 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2127
2128 err = 0;
2129errout:
2130 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131}
2132
Thomas Grafc127ea22007-03-22 11:58:32 -07002133static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134{
Thomas Graf86872cb2006-08-22 00:01:08 -07002135 struct fib6_config cfg;
2136 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137
Thomas Graf86872cb2006-08-22 00:01:08 -07002138 err = rtm_to_fib6_config(skb, nlh, &cfg);
2139 if (err < 0)
2140 return err;
2141
2142 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143}
2144
Thomas Grafc127ea22007-03-22 11:58:32 -07002145static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146{
Thomas Graf86872cb2006-08-22 00:01:08 -07002147 struct fib6_config cfg;
2148 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149
Thomas Graf86872cb2006-08-22 00:01:08 -07002150 err = rtm_to_fib6_config(skb, nlh, &cfg);
2151 if (err < 0)
2152 return err;
2153
2154 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155}
2156
Thomas Graf339bf982006-11-10 14:10:15 -08002157static inline size_t rt6_nlmsg_size(void)
2158{
2159 return NLMSG_ALIGN(sizeof(struct rtmsg))
2160 + nla_total_size(16) /* RTA_SRC */
2161 + nla_total_size(16) /* RTA_DST */
2162 + nla_total_size(16) /* RTA_GATEWAY */
2163 + nla_total_size(16) /* RTA_PREFSRC */
2164 + nla_total_size(4) /* RTA_TABLE */
2165 + nla_total_size(4) /* RTA_IIF */
2166 + nla_total_size(4) /* RTA_OIF */
2167 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002168 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002169 + nla_total_size(sizeof(struct rta_cacheinfo));
2170}
2171
Brian Haley191cd582008-08-14 15:33:21 -07002172static int rt6_fill_node(struct net *net,
2173 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002174 struct in6_addr *dst, struct in6_addr *src,
2175 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002176 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177{
2178 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002179 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002180 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002181 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182
2183 if (prefix) { /* user wants prefix routes only */
2184 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2185 /* success since this is not a prefix route */
2186 return 1;
2187 }
2188 }
2189
Thomas Graf2d7202b2006-08-22 00:01:27 -07002190 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2191 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002192 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002193
2194 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 rtm->rtm_family = AF_INET6;
2196 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2197 rtm->rtm_src_len = rt->rt6i_src.plen;
2198 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002199 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002200 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002201 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002202 table = RT6_TABLE_UNSPEC;
2203 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002204 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 if (rt->rt6i_flags&RTF_REJECT)
2206 rtm->rtm_type = RTN_UNREACHABLE;
2207 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2208 rtm->rtm_type = RTN_LOCAL;
2209 else
2210 rtm->rtm_type = RTN_UNICAST;
2211 rtm->rtm_flags = 0;
2212 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2213 rtm->rtm_protocol = rt->rt6i_protocol;
2214 if (rt->rt6i_flags&RTF_DYNAMIC)
2215 rtm->rtm_protocol = RTPROT_REDIRECT;
2216 else if (rt->rt6i_flags & RTF_ADDRCONF)
2217 rtm->rtm_protocol = RTPROT_KERNEL;
2218 else if (rt->rt6i_flags&RTF_DEFAULT)
2219 rtm->rtm_protocol = RTPROT_RA;
2220
2221 if (rt->rt6i_flags&RTF_CACHE)
2222 rtm->rtm_flags |= RTM_F_CLONED;
2223
2224 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002225 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002226 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002228 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229#ifdef CONFIG_IPV6_SUBTREES
2230 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002231 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002232 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002234 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002236 if (iif) {
2237#ifdef CONFIG_IPV6_MROUTE
2238 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002239 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002240 if (err <= 0) {
2241 if (!nowait) {
2242 if (err == 0)
2243 return 0;
2244 goto nla_put_failure;
2245 } else {
2246 if (err == -EMSGSIZE)
2247 goto nla_put_failure;
2248 }
2249 }
2250 } else
2251#endif
2252 NLA_PUT_U32(skb, RTA_IIF, iif);
2253 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002254 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002256 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002257 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002258 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002260
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002262 goto nla_put_failure;
2263
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002265 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2266
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002268 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2269
2270 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002271
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002272 if (!(rt->rt6i_flags & RTF_EXPIRES))
2273 expires = 0;
2274 else if (rt->rt6i_expires - jiffies < INT_MAX)
2275 expires = rt->rt6i_expires - jiffies;
2276 else
2277 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002278
Thomas Grafe3703b32006-11-27 09:27:07 -08002279 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2280 expires, rt->u.dst.error) < 0)
2281 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282
Thomas Graf2d7202b2006-08-22 00:01:27 -07002283 return nlmsg_end(skb, nlh);
2284
2285nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002286 nlmsg_cancel(skb, nlh);
2287 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288}
2289
Patrick McHardy1b43af52006-08-10 23:11:17 -07002290int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291{
2292 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2293 int prefix;
2294
Thomas Graf2d7202b2006-08-22 00:01:27 -07002295 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2296 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2298 } else
2299 prefix = 0;
2300
Brian Haley191cd582008-08-14 15:33:21 -07002301 return rt6_fill_node(arg->net,
2302 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002304 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305}
2306
Thomas Grafc127ea22007-03-22 11:58:32 -07002307static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002309 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002310 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002312 struct sk_buff *skb;
2313 struct rtmsg *rtm;
2314 struct flowi fl;
2315 int err, iif = 0;
2316
2317 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2318 if (err < 0)
2319 goto errout;
2320
2321 err = -EINVAL;
2322 memset(&fl, 0, sizeof(fl));
2323
2324 if (tb[RTA_SRC]) {
2325 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2326 goto errout;
2327
2328 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2329 }
2330
2331 if (tb[RTA_DST]) {
2332 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2333 goto errout;
2334
2335 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2336 }
2337
2338 if (tb[RTA_IIF])
2339 iif = nla_get_u32(tb[RTA_IIF]);
2340
2341 if (tb[RTA_OIF])
2342 fl.oif = nla_get_u32(tb[RTA_OIF]);
2343
2344 if (iif) {
2345 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002346 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002347 if (!dev) {
2348 err = -ENODEV;
2349 goto errout;
2350 }
2351 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
2353 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002354 if (skb == NULL) {
2355 err = -ENOBUFS;
2356 goto errout;
2357 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
2359 /* Reserve room for dummy headers, this skb can pass
2360 through good chunk of routing engine.
2361 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002362 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2364
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002365 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 skb->dst = &rt->u.dst;
2367
Brian Haley191cd582008-08-14 15:33:21 -07002368 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002370 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002372 kfree_skb(skb);
2373 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 }
2375
Daniel Lezcano55786892008-03-04 13:47:47 -08002376 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002377errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379}
2380
Thomas Graf86872cb2006-08-22 00:01:08 -07002381void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382{
2383 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002384 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002385 u32 seq;
2386 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002388 err = -ENOBUFS;
2389 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002390
Thomas Graf339bf982006-11-10 14:10:15 -08002391 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002392 if (skb == NULL)
2393 goto errout;
2394
Brian Haley191cd582008-08-14 15:33:21 -07002395 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002396 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002397 if (err < 0) {
2398 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2399 WARN_ON(err == -EMSGSIZE);
2400 kfree_skb(skb);
2401 goto errout;
2402 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002403 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2404 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002405errout:
2406 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002407 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408}
2409
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002410static int ip6_route_dev_notify(struct notifier_block *this,
2411 unsigned long event, void *data)
2412{
2413 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002414 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002415
2416 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2417 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2418 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2419#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2420 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2421 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2422 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2424#endif
2425 }
2426
2427 return NOTIFY_OK;
2428}
2429
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430/*
2431 * /proc
2432 */
2433
2434#ifdef CONFIG_PROC_FS
2435
2436#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2437
2438struct rt6_proc_arg
2439{
2440 char *buffer;
2441 int offset;
2442 int length;
2443 int skip;
2444 int len;
2445};
2446
2447static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2448{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002449 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002451 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452
2453#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002454 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002456 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457#endif
2458
2459 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002460 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002462 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002464 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2465 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2466 rt->u.dst.__use, rt->rt6i_flags,
2467 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 return 0;
2469}
2470
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002471static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002473 struct net *net = (struct net *)m->private;
2474 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002475 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476}
2477
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002478static int ipv6_route_open(struct inode *inode, struct file *file)
2479{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002480 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002481}
2482
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002483static const struct file_operations ipv6_route_proc_fops = {
2484 .owner = THIS_MODULE,
2485 .open = ipv6_route_open,
2486 .read = seq_read,
2487 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002488 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002489};
2490
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2492{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002493 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002495 net->ipv6.rt6_stats->fib_nodes,
2496 net->ipv6.rt6_stats->fib_route_nodes,
2497 net->ipv6.rt6_stats->fib_rt_alloc,
2498 net->ipv6.rt6_stats->fib_rt_entries,
2499 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002500 atomic_read(&net->ipv6.ip6_dst_ops->entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002501 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502
2503 return 0;
2504}
2505
2506static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2507{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002508 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002509}
2510
Arjan van de Ven9a321442007-02-12 00:55:35 -08002511static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 .owner = THIS_MODULE,
2513 .open = rt6_stats_seq_open,
2514 .read = seq_read,
2515 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002516 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517};
2518#endif /* CONFIG_PROC_FS */
2519
2520#ifdef CONFIG_SYSCTL
2521
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522static
2523int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2524 void __user *buffer, size_t *lenp, loff_t *ppos)
2525{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002526 struct net *net = current->nsproxy->net_ns;
2527 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 if (write) {
2529 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002530 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 return 0;
2532 } else
2533 return -EINVAL;
2534}
2535
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002536ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002537 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002539 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002541 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002542 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 },
2544 {
2545 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2546 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002547 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 .maxlen = sizeof(int),
2549 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002550 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 },
2552 {
2553 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2554 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002555 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 .maxlen = sizeof(int),
2557 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002558 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 },
2560 {
2561 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2562 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002563 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 .maxlen = sizeof(int),
2565 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002566 .proc_handler = proc_dointvec_jiffies,
2567 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 },
2569 {
2570 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2571 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 .maxlen = sizeof(int),
2574 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002575 .proc_handler = proc_dointvec_jiffies,
2576 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 },
2578 {
2579 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2580 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002581 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 .maxlen = sizeof(int),
2583 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002584 .proc_handler = proc_dointvec_jiffies,
2585 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 },
2587 {
2588 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2589 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002590 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 .maxlen = sizeof(int),
2592 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002593 .proc_handler = proc_dointvec_jiffies,
2594 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 },
2596 {
2597 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2598 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002599 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 .maxlen = sizeof(int),
2601 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002602 .proc_handler = proc_dointvec_jiffies,
2603 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 },
2605 {
2606 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2607 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002608 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609 .maxlen = sizeof(int),
2610 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002611 .proc_handler = proc_dointvec_jiffies,
2612 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613 },
2614 {
2615 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2616 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 .maxlen = sizeof(int),
2619 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002620 .proc_handler = proc_dointvec_ms_jiffies,
2621 .strategy = sysctl_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622 },
2623 { .ctl_name = 0 }
2624};
2625
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002626struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2627{
2628 struct ctl_table *table;
2629
2630 table = kmemdup(ipv6_route_table_template,
2631 sizeof(ipv6_route_table_template),
2632 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002633
2634 if (table) {
2635 table[0].data = &net->ipv6.sysctl.flush_delay;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002636 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002637 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2638 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2639 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2640 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2641 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2642 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2643 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2644 }
2645
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002646 return table;
2647}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648#endif
2649
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002650static int ip6_route_net_init(struct net *net)
2651{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002652 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002653
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002654 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2655 sizeof(*net->ipv6.ip6_dst_ops),
2656 GFP_KERNEL);
2657 if (!net->ipv6.ip6_dst_ops)
2658 goto out;
Denis V. Lunev48115be2008-04-16 02:01:34 -07002659 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002660
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002661 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662 sizeof(*net->ipv6.ip6_null_entry),
2663 GFP_KERNEL);
2664 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002665 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002666 net->ipv6.ip6_null_entry->u.dst.path =
2667 (struct dst_entry *)net->ipv6.ip6_null_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002668 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002669
2670#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2671 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2672 sizeof(*net->ipv6.ip6_prohibit_entry),
2673 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002674 if (!net->ipv6.ip6_prohibit_entry)
2675 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002676 net->ipv6.ip6_prohibit_entry->u.dst.path =
2677 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002678 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002679
2680 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2681 sizeof(*net->ipv6.ip6_blk_hole_entry),
2682 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002683 if (!net->ipv6.ip6_blk_hole_entry)
2684 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002685 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2686 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002687 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002688#endif
2689
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002690 net->ipv6.sysctl.flush_delay = 0;
2691 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2692 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2693 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2694 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2695 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2696 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2697 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2698
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002699#ifdef CONFIG_PROC_FS
2700 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2701 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2702#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002703 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2704
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002705 ret = 0;
2706out:
2707 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002708
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002709#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2710out_ip6_prohibit_entry:
2711 kfree(net->ipv6.ip6_prohibit_entry);
2712out_ip6_null_entry:
2713 kfree(net->ipv6.ip6_null_entry);
2714#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002715out_ip6_dst_ops:
Denis V. Lunev48115be2008-04-16 02:01:34 -07002716 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002717 kfree(net->ipv6.ip6_dst_ops);
2718 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002719}
2720
2721static void ip6_route_net_exit(struct net *net)
2722{
2723#ifdef CONFIG_PROC_FS
2724 proc_net_remove(net, "ipv6_route");
2725 proc_net_remove(net, "rt6_stats");
2726#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002727 kfree(net->ipv6.ip6_null_entry);
2728#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2729 kfree(net->ipv6.ip6_prohibit_entry);
2730 kfree(net->ipv6.ip6_blk_hole_entry);
2731#endif
Denis V. Lunev48115be2008-04-16 02:01:34 -07002732 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002733 kfree(net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002734}
2735
2736static struct pernet_operations ip6_route_net_ops = {
2737 .init = ip6_route_net_init,
2738 .exit = ip6_route_net_exit,
2739};
2740
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002741static struct notifier_block ip6_route_dev_notifier = {
2742 .notifier_call = ip6_route_dev_notify,
2743 .priority = 0,
2744};
2745
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002746int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002748 int ret;
2749
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002750 ret = -ENOMEM;
2751 ip6_dst_ops_template.kmem_cachep =
2752 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2753 SLAB_HWCACHE_ALIGN, NULL);
2754 if (!ip6_dst_ops_template.kmem_cachep)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002755 goto out;;
David S. Miller14e50e52007-05-24 18:17:54 -07002756
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002757 ret = register_pernet_subsys(&ip6_route_net_ops);
2758 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002759 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002760
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002761 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2762
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002763 /* Registering of the loopback is done before this portion of code,
2764 * the loopback reference in rt6_info will not be taken, do it
2765 * manually for init_net */
2766 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2767 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2768 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2769 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2770 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2771 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2772 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2773 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002774 ret = fib6_init();
2775 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002776 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002777
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002778 ret = xfrm6_init();
2779 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002780 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002781
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002782 ret = fib6_rules_init();
2783 if (ret)
2784 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002785
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002786 ret = -ENOBUFS;
2787 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2788 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2789 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2790 goto fib6_rules_init;
2791
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002792 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002793 if (ret)
2794 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002795
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002796out:
2797 return ret;
2798
2799fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002800 fib6_rules_cleanup();
2801xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002802 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002803out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002804 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002805out_register_subsys:
2806 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002807out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002808 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002809 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002810}
2811
2812void ip6_route_cleanup(void)
2813{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002814 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002815 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002816 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002818 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002819 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002820}