blob: a415ac610e2d76ca6a06da566c4e2c6f847b2945 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070057#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070058#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080077#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
Thomas Graf101367c2006-08-04 03:39:02 -0700142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
David S. Miller6723ab52006-10-18 21:20:57 -0700144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146static int ip6_pkt_blk_hole(struct sk_buff *skb);
147
Thomas Graf101367c2006-08-04 03:39:02 -0700148struct rt6_info ip6_prohibit_entry = {
149 .u = {
150 .dst = {
151 .__refcnt = ATOMIC_INIT(1),
152 .__use = 1,
153 .dev = &loopback_dev,
154 .obsolete = -1,
155 .error = -EACCES,
156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700157 .input = ip6_pkt_prohibit,
158 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .ops = &ip6_dst_ops,
160 .path = (struct dst_entry*)&ip6_prohibit_entry,
161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
168struct rt6_info ip6_blk_hole_entry = {
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .dev = &loopback_dev,
174 .obsolete = -1,
175 .error = -EINVAL,
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700177 .input = ip6_pkt_blk_hole,
178 .output = ip6_pkt_blk_hole,
Thomas Graf101367c2006-08-04 03:39:02 -0700179 .ops = &ip6_dst_ops,
180 .path = (struct dst_entry*)&ip6_blk_hole_entry,
181 }
182 },
183 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
184 .rt6i_metric = ~(u32) 0,
185 .rt6i_ref = ATOMIC_INIT(1),
186};
187
188#endif
189
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190/* allocate dst with ip6_dst_ops */
191static __inline__ struct rt6_info *ip6_dst_alloc(void)
192{
193 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
194}
195
196static void ip6_dst_destroy(struct dst_entry *dst)
197{
198 struct rt6_info *rt = (struct rt6_info *)dst;
199 struct inet6_dev *idev = rt->rt6i_idev;
200
201 if (idev != NULL) {
202 rt->rt6i_idev = NULL;
203 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205}
206
207static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
208 int how)
209{
210 struct rt6_info *rt = (struct rt6_info *)dst;
211 struct inet6_dev *idev = rt->rt6i_idev;
212
213 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220}
221
222static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223{
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226}
227
Thomas Grafc71099a2006-08-04 23:20:06 -0700228static inline int rt6_need_strict(struct in6_addr *daddr)
229{
230 return (ipv6_addr_type(daddr) &
231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232}
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700235 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
237
238static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 int oif,
240 int strict)
241{
242 struct rt6_info *local = NULL;
243 struct rt6_info *sprt;
244
245 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800246 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 struct net_device *dev = sprt->rt6i_dev;
248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
253 if (strict && oif)
254 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900255 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
261 }
262
263 if (local)
264 return local;
265
266 if (strict)
267 return &ip6_null_entry;
268 }
269 return rt;
270}
271
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800272#ifdef CONFIG_IPV6_ROUTER_PREF
273static void rt6_probe(struct rt6_info *rt)
274{
275 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
276 /*
277 * Okay, this does not seem to be appropriate
278 * for now, however, we need to check if it
279 * is really so; aka Router Reachability Probing.
280 *
281 * Router Reachability Probe MUST be rate-limited
282 * to no more than one per minute.
283 */
284 if (!neigh || (neigh->nud_state & NUD_VALID))
285 return;
286 read_lock_bh(&neigh->lock);
287 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800288 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800289 struct in6_addr mcaddr;
290 struct in6_addr *target;
291
292 neigh->updated = jiffies;
293 read_unlock_bh(&neigh->lock);
294
295 target = (struct in6_addr *)&neigh->primary_key;
296 addrconf_addr_solict_mult(target, &mcaddr);
297 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
298 } else
299 read_unlock_bh(&neigh->lock);
300}
301#else
302static inline void rt6_probe(struct rt6_info *rt)
303{
304 return;
305}
306#endif
307
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800309 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800311static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800313 struct net_device *dev = rt->rt6i_dev;
YOSHIFUJI Hideakia0d78eb2007-02-04 20:15:04 -0800314 int ret = 0;
315
316 if (!oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800317 return 2;
YOSHIFUJI Hideakia0d78eb2007-02-04 20:15:04 -0800318 if (dev->flags & IFF_LOOPBACK) {
319 if (!WARN_ON(rt->rt6i_idev == NULL) &&
320 rt->rt6i_idev->dev->ifindex == oif)
321 ret = 1;
322 else
323 return 0;
324 }
325 if (dev->ifindex == oif)
326 return 2;
327
328 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329}
330
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800331static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800333 struct neighbour *neigh = rt->rt6i_nexthop;
334 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700335 if (rt->rt6i_flags & RTF_NONEXTHOP ||
336 !(rt->rt6i_flags & RTF_GATEWAY))
337 m = 1;
338 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800339 read_lock_bh(&neigh->lock);
340 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700341 m = 2;
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800342 else if (!(neigh->nud_state & NUD_FAILED))
343 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_unlock_bh(&neigh->lock);
345 }
346 return m;
347}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349static int rt6_score_route(struct rt6_info *rt, int oif,
350 int strict)
351{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700352 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900353
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700354 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700355 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800357#ifdef CONFIG_IPV6_ROUTER_PREF
358 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
359#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700360 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800361 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 return -1;
363 return m;
364}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
367 int strict)
368{
369 struct rt6_info *match = NULL, *last = NULL;
370 struct rt6_info *rt, *rt0 = *head;
371 u32 metric;
372 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800374 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
375 __FUNCTION__, head, head ? *head : NULL, oif);
376
377 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700378 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
Eric Dumazet7cc48262007-02-09 16:22:57 -0800379 rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800380 int m;
381
382 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 continue;
384
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800385 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800387 m = rt6_score_route(rt, oif, strict);
388 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800391 if (m > mpri) {
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800392 if (strict & RT6_LOOKUP_F_REACHABLE)
393 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800394 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 mpri = m;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800396 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800397 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 }
399 }
400
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800401 if (!match &&
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700402 (strict & RT6_LOOKUP_F_REACHABLE) &&
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800403 last && last != rt0) {
404 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700405 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700406 spin_lock(&lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -0800407 *head = rt0->u.dst.rt6_next;
408 rt0->u.dst.rt6_next = last->u.dst.rt6_next;
409 last->u.dst.rt6_next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700410 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 }
412
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800413 RT6_TRACE("%s() => %p, score=%d\n",
414 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800416 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417}
418
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800419#ifdef CONFIG_IPV6_ROUTE_INFO
420int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
421 struct in6_addr *gwaddr)
422{
423 struct route_info *rinfo = (struct route_info *) opt;
424 struct in6_addr prefix_buf, *prefix;
425 unsigned int pref;
426 u32 lifetime;
427 struct rt6_info *rt;
428
429 if (len < sizeof(struct route_info)) {
430 return -EINVAL;
431 }
432
433 /* Sanity check for prefix_len and length */
434 if (rinfo->length > 3) {
435 return -EINVAL;
436 } else if (rinfo->prefix_len > 128) {
437 return -EINVAL;
438 } else if (rinfo->prefix_len > 64) {
439 if (rinfo->length < 2) {
440 return -EINVAL;
441 }
442 } else if (rinfo->prefix_len > 0) {
443 if (rinfo->length < 1) {
444 return -EINVAL;
445 }
446 }
447
448 pref = rinfo->route_pref;
449 if (pref == ICMPV6_ROUTER_PREF_INVALID)
450 pref = ICMPV6_ROUTER_PREF_MEDIUM;
451
Al Viroe69a4ad2006-11-14 20:56:00 -0800452 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800453 if (lifetime == 0xffffffff) {
454 /* infinity */
455 } else if (lifetime > 0x7fffffff/HZ) {
456 /* Avoid arithmetic overflow */
457 lifetime = 0x7fffffff/HZ - 1;
458 }
459
460 if (rinfo->length == 3)
461 prefix = (struct in6_addr *)rinfo->prefix;
462 else {
463 /* this function is safe */
464 ipv6_addr_prefix(&prefix_buf,
465 (struct in6_addr *)rinfo->prefix,
466 rinfo->prefix_len);
467 prefix = &prefix_buf;
468 }
469
470 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
471
472 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700473 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800474 rt = NULL;
475 }
476
477 if (!rt && lifetime)
478 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
479 pref);
480 else if (rt)
481 rt->rt6i_flags = RTF_ROUTEINFO |
482 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
483
484 if (rt) {
485 if (lifetime == 0xffffffff) {
486 rt->rt6i_flags &= ~RTF_EXPIRES;
487 } else {
488 rt->rt6i_expires = jiffies + HZ * lifetime;
489 rt->rt6i_flags |= RTF_EXPIRES;
490 }
491 dst_release(&rt->u.dst);
492 }
493 return 0;
494}
495#endif
496
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700497#define BACKTRACK(saddr) \
498do { \
499 if (rt == &ip6_null_entry) { \
500 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700501 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700502 if (fn->fn_flags & RTN_TL_ROOT) \
503 goto out; \
504 pn = fn->parent; \
505 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800506 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700507 else \
508 fn = pn; \
509 if (fn->fn_flags & RTN_RTINFO) \
510 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700511 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700512 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700513} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700514
515static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
516 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517{
518 struct fib6_node *fn;
519 struct rt6_info *rt;
520
Thomas Grafc71099a2006-08-04 23:20:06 -0700521 read_lock_bh(&table->tb6_lock);
522 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
523restart:
524 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700525 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700527out:
YOSHIFUJI Hideaki33cc4892006-08-28 13:19:30 -0700528 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700529 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530
531 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700532 rt->u.dst.__use++;
533
534 return rt;
535
536}
537
538struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
539 int oif, int strict)
540{
541 struct flowi fl = {
542 .oif = oif,
543 .nl_u = {
544 .ip6_u = {
545 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 },
547 },
548 };
549 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700550 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700551
Thomas Grafadaa70b2006-10-13 15:01:03 -0700552 if (saddr) {
553 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
554 flags |= RT6_LOOKUP_F_HAS_SADDR;
555 }
556
Thomas Grafc71099a2006-08-04 23:20:06 -0700557 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
558 if (dst->error == 0)
559 return (struct rt6_info *) dst;
560
561 dst_release(dst);
562
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 return NULL;
564}
565
Thomas Grafc71099a2006-08-04 23:20:06 -0700566/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 It takes new route entry, the addition fails by any reason the
568 route is freed. In any case, if caller does not hold it, it may
569 be destroyed.
570 */
571
Thomas Graf86872cb2006-08-22 00:01:08 -0700572static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573{
574 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700575 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576
Thomas Grafc71099a2006-08-04 23:20:06 -0700577 table = rt->rt6i_table;
578 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700579 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700580 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
582 return err;
583}
584
Thomas Graf40e22e82006-08-22 00:00:45 -0700585int ip6_ins_rt(struct rt6_info *rt)
586{
Thomas Graf86872cb2006-08-22 00:01:08 -0700587 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700588}
589
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800590static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
591 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 struct rt6_info *rt;
594
595 /*
596 * Clone the route.
597 */
598
599 rt = ip6_rt_copy(ort);
600
601 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900602 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
603 if (rt->rt6i_dst.plen != 128 &&
604 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
605 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900607 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 rt->rt6i_dst.plen = 128;
611 rt->rt6i_flags |= RTF_CACHE;
612 rt->u.dst.flags |= DST_HOST;
613
614#ifdef CONFIG_IPV6_SUBTREES
615 if (rt->rt6i_src.plen && saddr) {
616 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
617 rt->rt6i_src.plen = 128;
618 }
619#endif
620
621 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
622
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800623 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800625 return rt;
626}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800628static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
629{
630 struct rt6_info *rt = ip6_rt_copy(ort);
631 if (rt) {
632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
633 rt->rt6i_dst.plen = 128;
634 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800635 rt->u.dst.flags |= DST_HOST;
636 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
637 }
638 return rt;
639}
640
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700641static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
642 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643{
644 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800645 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700646 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800648 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800649 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700651 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800656restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700657 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700660 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700661 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800662 if (rt == &ip6_null_entry ||
663 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800664 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800666 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700667 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800668
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800669 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800670 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800671 else {
672#if CLONE_OFFLINK_ROUTE
673 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
674#else
675 goto out2;
676#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800678
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800679 dst_release(&rt->u.dst);
680 rt = nrt ? : &ip6_null_entry;
681
682 dst_hold(&rt->u.dst);
683 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700684 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800685 if (!err)
686 goto out2;
687 }
688
689 if (--attempts <= 0)
690 goto out2;
691
692 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700693 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800694 * released someone could insert this route. Relookup.
695 */
696 dst_release(&rt->u.dst);
697 goto relookup;
698
699out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800700 if (reachable) {
701 reachable = 0;
702 goto restart_2;
703 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800704 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700705 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706out2:
707 rt->u.dst.lastuse = jiffies;
708 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700709
710 return rt;
711}
712
713void ip6_route_input(struct sk_buff *skb)
714{
715 struct ipv6hdr *iph = skb->nh.ipv6h;
Thomas Grafadaa70b2006-10-13 15:01:03 -0700716 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700717 struct flowi fl = {
718 .iif = skb->dev->ifindex,
719 .nl_u = {
720 .ip6_u = {
721 .daddr = iph->daddr,
722 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800723 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 },
725 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900726 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700727 .proto = iph->nexthdr,
728 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700729
730 if (rt6_need_strict(&iph->daddr))
731 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700732
733 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
734}
735
736static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
737 struct flowi *fl, int flags)
738{
739 struct fib6_node *fn;
740 struct rt6_info *rt, *nrt;
741 int strict = 0;
742 int attempts = 3;
743 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800744 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700745
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700746 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700747
748relookup:
749 read_lock_bh(&table->tb6_lock);
750
751restart_2:
752 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
753
754restart:
755 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700756 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700757 if (rt == &ip6_null_entry ||
758 rt->rt6i_flags & RTF_CACHE)
759 goto out;
760
761 dst_hold(&rt->u.dst);
762 read_unlock_bh(&table->tb6_lock);
763
764 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
765 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
766 else {
767#if CLONE_OFFLINK_ROUTE
768 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
769#else
770 goto out2;
771#endif
772 }
773
774 dst_release(&rt->u.dst);
775 rt = nrt ? : &ip6_null_entry;
776
777 dst_hold(&rt->u.dst);
778 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700779 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700780 if (!err)
781 goto out2;
782 }
783
784 if (--attempts <= 0)
785 goto out2;
786
787 /*
788 * Race condition! In the gap, when table->tb6_lock was
789 * released someone could insert this route. Relookup.
790 */
791 dst_release(&rt->u.dst);
792 goto relookup;
793
794out:
795 if (reachable) {
796 reachable = 0;
797 goto restart_2;
798 }
799 dst_hold(&rt->u.dst);
800 read_unlock_bh(&table->tb6_lock);
801out2:
802 rt->u.dst.lastuse = jiffies;
803 rt->u.dst.__use++;
804 return rt;
805}
806
807struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
808{
809 int flags = 0;
810
811 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700812 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700813
Thomas Grafadaa70b2006-10-13 15:01:03 -0700814 if (!ipv6_addr_any(&fl->fl6_src))
815 flags |= RT6_LOOKUP_F_HAS_SADDR;
816
Thomas Grafc71099a2006-08-04 23:20:06 -0700817 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818}
819
820
821/*
822 * Destination cache support functions
823 */
824
825static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
826{
827 struct rt6_info *rt;
828
829 rt = (struct rt6_info *) dst;
830
831 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
832 return dst;
833
834 return NULL;
835}
836
837static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
838{
839 struct rt6_info *rt = (struct rt6_info *) dst;
840
841 if (rt) {
842 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700843 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 else
845 dst_release(dst);
846 }
847 return NULL;
848}
849
850static void ip6_link_failure(struct sk_buff *skb)
851{
852 struct rt6_info *rt;
853
854 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
855
856 rt = (struct rt6_info *) skb->dst;
857 if (rt) {
858 if (rt->rt6i_flags&RTF_CACHE) {
859 dst_set_expires(&rt->u.dst, 0);
860 rt->rt6i_flags |= RTF_EXPIRES;
861 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
862 rt->rt6i_node->fn_sernum = -1;
863 }
864}
865
866static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
867{
868 struct rt6_info *rt6 = (struct rt6_info*)dst;
869
870 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
871 rt6->rt6i_flags |= RTF_MODIFIED;
872 if (mtu < IPV6_MIN_MTU) {
873 mtu = IPV6_MIN_MTU;
874 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
875 }
876 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700877 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 }
879}
880
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881static int ipv6_get_mtu(struct net_device *dev);
882
883static inline unsigned int ipv6_advmss(unsigned int mtu)
884{
885 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
886
887 if (mtu < ip6_rt_min_advmss)
888 mtu = ip6_rt_min_advmss;
889
890 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900891 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
892 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
893 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 * rely only on pmtu discovery"
895 */
896 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
897 mtu = IPV6_MAXPLEN;
898 return mtu;
899}
900
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700901static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700902static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700903
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900904struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 struct neighbour *neigh,
906 struct in6_addr *addr,
907 int (*output)(struct sk_buff *))
908{
909 struct rt6_info *rt;
910 struct inet6_dev *idev = in6_dev_get(dev);
911
912 if (unlikely(idev == NULL))
913 return NULL;
914
915 rt = ip6_dst_alloc();
916 if (unlikely(rt == NULL)) {
917 in6_dev_put(idev);
918 goto out;
919 }
920
921 dev_hold(dev);
922 if (neigh)
923 neigh_hold(neigh);
924 else
925 neigh = ndisc_get_neigh(dev, addr);
926
927 rt->rt6i_dev = dev;
928 rt->rt6i_idev = idev;
929 rt->rt6i_nexthop = neigh;
930 atomic_set(&rt->u.dst.__refcnt, 1);
931 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
932 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
933 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
934 rt->u.dst.output = output;
935
936#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900937 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
938 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 : 0;
940 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
941 rt->rt6i_dst.plen = 128;
942#endif
943
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700944 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 rt->u.dst.next = ndisc_dst_gc_list;
946 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700947 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
949 fib6_force_start_gc();
950
951out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900952 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953}
954
955int ndisc_dst_gc(int *more)
956{
957 struct dst_entry *dst, *next, **pprev;
958 int freed;
959
960 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900961 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700962
963 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700965
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 while ((dst = *pprev) != NULL) {
967 if (!atomic_read(&dst->__refcnt)) {
968 *pprev = dst->next;
969 dst_free(dst);
970 freed++;
971 } else {
972 pprev = &dst->next;
973 (*more)++;
974 }
975 }
976
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700977 spin_unlock_bh(&ndisc_lock);
978
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 return freed;
980}
981
982static int ip6_dst_gc(void)
983{
984 static unsigned expire = 30*HZ;
985 static unsigned long last_gc;
986 unsigned long now = jiffies;
987
988 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
989 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
990 goto out;
991
992 expire++;
993 fib6_run_gc(expire);
994 last_gc = now;
995 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
996 expire = ip6_rt_gc_timeout>>1;
997
998out:
999 expire -= expire>>ip6_rt_gc_elasticity;
1000 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1001}
1002
1003/* Clean host part of a prefix. Not necessary in radix tree,
1004 but results in cleaner routing tables.
1005
1006 Remove it only when all the things will work!
1007 */
1008
1009static int ipv6_get_mtu(struct net_device *dev)
1010{
1011 int mtu = IPV6_MIN_MTU;
1012 struct inet6_dev *idev;
1013
1014 idev = in6_dev_get(dev);
1015 if (idev) {
1016 mtu = idev->cnf.mtu6;
1017 in6_dev_put(idev);
1018 }
1019 return mtu;
1020}
1021
1022int ipv6_get_hoplimit(struct net_device *dev)
1023{
1024 int hoplimit = ipv6_devconf.hop_limit;
1025 struct inet6_dev *idev;
1026
1027 idev = in6_dev_get(dev);
1028 if (idev) {
1029 hoplimit = idev->cnf.hop_limit;
1030 in6_dev_put(idev);
1031 }
1032 return hoplimit;
1033}
1034
1035/*
1036 *
1037 */
1038
Thomas Graf86872cb2006-08-22 00:01:08 -07001039int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040{
1041 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 struct rt6_info *rt = NULL;
1043 struct net_device *dev = NULL;
1044 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001045 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 int addr_type;
1047
Thomas Graf86872cb2006-08-22 00:01:08 -07001048 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 return -EINVAL;
1050#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001051 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 return -EINVAL;
1053#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001054 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001056 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 if (!dev)
1058 goto out;
1059 idev = in6_dev_get(dev);
1060 if (!idev)
1061 goto out;
1062 }
1063
Thomas Graf86872cb2006-08-22 00:01:08 -07001064 if (cfg->fc_metric == 0)
1065 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066
Thomas Graf86872cb2006-08-22 00:01:08 -07001067 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001068 if (table == NULL) {
1069 err = -ENOBUFS;
1070 goto out;
1071 }
1072
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 rt = ip6_dst_alloc();
1074
1075 if (rt == NULL) {
1076 err = -ENOMEM;
1077 goto out;
1078 }
1079
1080 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001081 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082
Thomas Graf86872cb2006-08-22 00:01:08 -07001083 if (cfg->fc_protocol == RTPROT_UNSPEC)
1084 cfg->fc_protocol = RTPROT_BOOT;
1085 rt->rt6i_protocol = cfg->fc_protocol;
1086
1087 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
1089 if (addr_type & IPV6_ADDR_MULTICAST)
1090 rt->u.dst.input = ip6_mc_input;
1091 else
1092 rt->u.dst.input = ip6_forward;
1093
1094 rt->u.dst.output = ip6_output;
1095
Thomas Graf86872cb2006-08-22 00:01:08 -07001096 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1097 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 if (rt->rt6i_dst.plen == 128)
1099 rt->u.dst.flags = DST_HOST;
1100
1101#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001102 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1103 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104#endif
1105
Thomas Graf86872cb2006-08-22 00:01:08 -07001106 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
1108 /* We cannot add true routes via loopback here,
1109 they would result in kernel looping; promote them to reject routes
1110 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001111 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1113 /* hold loopback dev/idev if we haven't done so. */
1114 if (dev != &loopback_dev) {
1115 if (dev) {
1116 dev_put(dev);
1117 in6_dev_put(idev);
1118 }
1119 dev = &loopback_dev;
1120 dev_hold(dev);
1121 idev = in6_dev_get(dev);
1122 if (!idev) {
1123 err = -ENODEV;
1124 goto out;
1125 }
1126 }
1127 rt->u.dst.output = ip6_pkt_discard_out;
1128 rt->u.dst.input = ip6_pkt_discard;
1129 rt->u.dst.error = -ENETUNREACH;
1130 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1131 goto install_route;
1132 }
1133
Thomas Graf86872cb2006-08-22 00:01:08 -07001134 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 struct in6_addr *gw_addr;
1136 int gwa_type;
1137
Thomas Graf86872cb2006-08-22 00:01:08 -07001138 gw_addr = &cfg->fc_gateway;
1139 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 gwa_type = ipv6_addr_type(gw_addr);
1141
1142 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1143 struct rt6_info *grt;
1144
1145 /* IPv6 strictly inhibits using not link-local
1146 addresses as nexthop address.
1147 Otherwise, router will not able to send redirects.
1148 It is very good, but in some (rare!) circumstances
1149 (SIT, PtP, NBMA NOARP links) it is handy to allow
1150 some exceptions. --ANK
1151 */
1152 err = -EINVAL;
1153 if (!(gwa_type&IPV6_ADDR_UNICAST))
1154 goto out;
1155
Thomas Graf86872cb2006-08-22 00:01:08 -07001156 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
1158 err = -EHOSTUNREACH;
1159 if (grt == NULL)
1160 goto out;
1161 if (dev) {
1162 if (dev != grt->rt6i_dev) {
1163 dst_release(&grt->u.dst);
1164 goto out;
1165 }
1166 } else {
1167 dev = grt->rt6i_dev;
1168 idev = grt->rt6i_idev;
1169 dev_hold(dev);
1170 in6_dev_hold(grt->rt6i_idev);
1171 }
1172 if (!(grt->rt6i_flags&RTF_GATEWAY))
1173 err = 0;
1174 dst_release(&grt->u.dst);
1175
1176 if (err)
1177 goto out;
1178 }
1179 err = -EINVAL;
1180 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1181 goto out;
1182 }
1183
1184 err = -ENODEV;
1185 if (dev == NULL)
1186 goto out;
1187
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1190 if (IS_ERR(rt->rt6i_nexthop)) {
1191 err = PTR_ERR(rt->rt6i_nexthop);
1192 rt->rt6i_nexthop = NULL;
1193 goto out;
1194 }
1195 }
1196
Thomas Graf86872cb2006-08-22 00:01:08 -07001197 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
1199install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001200 if (cfg->fc_mx) {
1201 struct nlattr *nla;
1202 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Thomas Graf86872cb2006-08-22 00:01:08 -07001204 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1205 int type = nla->nla_type;
1206
1207 if (type) {
1208 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 err = -EINVAL;
1210 goto out;
1211 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001212
1213 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 }
1216 }
1217
1218 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1219 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1220 if (!rt->u.dst.metrics[RTAX_MTU-1])
1221 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1222 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1223 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1224 rt->u.dst.dev = dev;
1225 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001226 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001227 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
1229out:
1230 if (dev)
1231 dev_put(dev);
1232 if (idev)
1233 in6_dev_put(idev);
1234 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001235 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 return err;
1237}
1238
Thomas Graf86872cb2006-08-22 00:01:08 -07001239static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240{
1241 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001242 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
Patrick McHardy6c813a72006-08-06 22:22:47 -07001244 if (rt == &ip6_null_entry)
1245 return -ENOENT;
1246
Thomas Grafc71099a2006-08-04 23:20:06 -07001247 table = rt->rt6i_table;
1248 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
Thomas Graf86872cb2006-08-22 00:01:08 -07001250 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 dst_release(&rt->u.dst);
1252
Thomas Grafc71099a2006-08-04 23:20:06 -07001253 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
1255 return err;
1256}
1257
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001258int ip6_del_rt(struct rt6_info *rt)
1259{
Thomas Graf86872cb2006-08-22 00:01:08 -07001260 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001261}
1262
Thomas Graf86872cb2006-08-22 00:01:08 -07001263static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264{
Thomas Grafc71099a2006-08-04 23:20:06 -07001265 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 struct fib6_node *fn;
1267 struct rt6_info *rt;
1268 int err = -ESRCH;
1269
Thomas Graf86872cb2006-08-22 00:01:08 -07001270 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001271 if (table == NULL)
1272 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
Thomas Grafc71099a2006-08-04 23:20:06 -07001274 read_lock_bh(&table->tb6_lock);
1275
1276 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001277 &cfg->fc_dst, cfg->fc_dst_len,
1278 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001279
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001281 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001282 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001284 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001286 if (cfg->fc_flags & RTF_GATEWAY &&
1287 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 continue;
1291 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001292 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
Thomas Graf86872cb2006-08-22 00:01:08 -07001294 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 }
1296 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001297 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
1299 return err;
1300}
1301
1302/*
1303 * Handle redirects
1304 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001305struct ip6rd_flowi {
1306 struct flowi fl;
1307 struct in6_addr gateway;
1308};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001310static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1311 struct flowi *fl,
1312 int flags)
1313{
1314 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1315 struct rt6_info *rt;
1316 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001317
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001319 * Get the "current" route for this destination and
1320 * check if the redirect has come from approriate router.
1321 *
1322 * RFC 2461 specifies that redirects should only be
1323 * accepted if they come from the nexthop to the target.
1324 * Due to the way the routes are chosen, this notion
1325 * is a bit fuzzy and one might need to check all possible
1326 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
Thomas Grafc71099a2006-08-04 23:20:06 -07001329 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001330 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001331restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001332 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001333 /*
1334 * Current route is on-link; redirect is always invalid.
1335 *
1336 * Seems, previous statement is not true. It could
1337 * be node, which looks for us as on-link (f.e. proxy ndisc)
1338 * But then router serving it might decide, that we should
1339 * know truth 8)8) --ANK (980726).
1340 */
1341 if (rt6_check_expired(rt))
1342 continue;
1343 if (!(rt->rt6i_flags & RTF_GATEWAY))
1344 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001345 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001346 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001347 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001348 continue;
1349 break;
1350 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001351
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001352 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001353 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001354 BACKTRACK(&fl->fl6_src);
1355out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001356 dst_hold(&rt->u.dst);
1357
1358 read_unlock_bh(&table->tb6_lock);
1359
1360 return rt;
1361};
1362
1363static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1364 struct in6_addr *src,
1365 struct in6_addr *gateway,
1366 struct net_device *dev)
1367{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001368 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001369 struct ip6rd_flowi rdfl = {
1370 .fl = {
1371 .oif = dev->ifindex,
1372 .nl_u = {
1373 .ip6_u = {
1374 .daddr = *dest,
1375 .saddr = *src,
1376 },
1377 },
1378 },
1379 .gateway = *gateway,
1380 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001381
1382 if (rt6_need_strict(dest))
1383 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001384
1385 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1386}
1387
1388void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1389 struct in6_addr *saddr,
1390 struct neighbour *neigh, u8 *lladdr, int on_link)
1391{
1392 struct rt6_info *rt, *nrt = NULL;
1393 struct netevent_redirect netevent;
1394
1395 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1396
1397 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 if (net_ratelimit())
1399 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1400 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001401 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 }
1403
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 /*
1405 * We have finally decided to accept it.
1406 */
1407
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001408 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1410 NEIGH_UPDATE_F_OVERRIDE|
1411 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1412 NEIGH_UPDATE_F_ISROUTER))
1413 );
1414
1415 /*
1416 * Redirect received -> path was valid.
1417 * Look, redirects are sent only in response to data packets,
1418 * so that this nexthop apparently is reachable. --ANK
1419 */
1420 dst_confirm(&rt->u.dst);
1421
1422 /* Duplicate redirect: silently ignore. */
1423 if (neigh == rt->u.dst.neighbour)
1424 goto out;
1425
1426 nrt = ip6_rt_copy(rt);
1427 if (nrt == NULL)
1428 goto out;
1429
1430 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1431 if (on_link)
1432 nrt->rt6i_flags &= ~RTF_GATEWAY;
1433
1434 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1435 nrt->rt6i_dst.plen = 128;
1436 nrt->u.dst.flags |= DST_HOST;
1437
1438 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1439 nrt->rt6i_nexthop = neigh_clone(neigh);
1440 /* Reset pmtu, it may be better */
1441 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1442 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1443
Thomas Graf40e22e82006-08-22 00:00:45 -07001444 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 goto out;
1446
Tom Tucker8d717402006-07-30 20:43:36 -07001447 netevent.old = &rt->u.dst;
1448 netevent.new = &nrt->u.dst;
1449 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1450
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001452 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 return;
1454 }
1455
1456out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001457 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 return;
1459}
1460
1461/*
1462 * Handle ICMP "packet too big" messages
1463 * i.e. Path MTU discovery
1464 */
1465
1466void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1467 struct net_device *dev, u32 pmtu)
1468{
1469 struct rt6_info *rt, *nrt;
1470 int allfrag = 0;
1471
1472 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1473 if (rt == NULL)
1474 return;
1475
1476 if (pmtu >= dst_mtu(&rt->u.dst))
1477 goto out;
1478
1479 if (pmtu < IPV6_MIN_MTU) {
1480 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001481 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 * MTU (1280) and a fragment header should always be included
1483 * after a node receiving Too Big message reporting PMTU is
1484 * less than the IPv6 Minimum Link MTU.
1485 */
1486 pmtu = IPV6_MIN_MTU;
1487 allfrag = 1;
1488 }
1489
1490 /* New mtu received -> path was valid.
1491 They are sent only in response to data packets,
1492 so that this nexthop apparently is reachable. --ANK
1493 */
1494 dst_confirm(&rt->u.dst);
1495
1496 /* Host route. If it is static, it would be better
1497 not to override it, but add new one, so that
1498 when cache entry will expire old pmtu
1499 would return automatically.
1500 */
1501 if (rt->rt6i_flags & RTF_CACHE) {
1502 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1503 if (allfrag)
1504 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1505 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1506 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1507 goto out;
1508 }
1509
1510 /* Network route.
1511 Two cases are possible:
1512 1. It is connected route. Action: COW
1513 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1514 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001515 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001516 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001517 else
1518 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001519
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001520 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001521 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1522 if (allfrag)
1523 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1524
1525 /* According to RFC 1981, detecting PMTU increase shouldn't be
1526 * happened within 5 mins, the recommended timer is 10 mins.
1527 * Here this route expiration time is set to ip6_rt_mtu_expires
1528 * which is 10 mins. After 10 mins the decreased pmtu is expired
1529 * and detecting PMTU increase will be automatically happened.
1530 */
1531 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1532 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1533
Thomas Graf40e22e82006-08-22 00:00:45 -07001534 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536out:
1537 dst_release(&rt->u.dst);
1538}
1539
1540/*
1541 * Misc support functions
1542 */
1543
1544static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1545{
1546 struct rt6_info *rt = ip6_dst_alloc();
1547
1548 if (rt) {
1549 rt->u.dst.input = ort->u.dst.input;
1550 rt->u.dst.output = ort->u.dst.output;
1551
1552 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001553 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 rt->u.dst.dev = ort->u.dst.dev;
1555 if (rt->u.dst.dev)
1556 dev_hold(rt->u.dst.dev);
1557 rt->rt6i_idev = ort->rt6i_idev;
1558 if (rt->rt6i_idev)
1559 in6_dev_hold(rt->rt6i_idev);
1560 rt->u.dst.lastuse = jiffies;
1561 rt->rt6i_expires = 0;
1562
1563 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1564 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1565 rt->rt6i_metric = 0;
1566
1567 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1568#ifdef CONFIG_IPV6_SUBTREES
1569 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1570#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001571 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 }
1573 return rt;
1574}
1575
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001576#ifdef CONFIG_IPV6_ROUTE_INFO
1577static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1578 struct in6_addr *gwaddr, int ifindex)
1579{
1580 struct fib6_node *fn;
1581 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001582 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001583
Thomas Grafc71099a2006-08-04 23:20:06 -07001584 table = fib6_get_table(RT6_TABLE_INFO);
1585 if (table == NULL)
1586 return NULL;
1587
1588 write_lock_bh(&table->tb6_lock);
1589 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001590 if (!fn)
1591 goto out;
1592
Eric Dumazet7cc48262007-02-09 16:22:57 -08001593 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001594 if (rt->rt6i_dev->ifindex != ifindex)
1595 continue;
1596 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1597 continue;
1598 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1599 continue;
1600 dst_hold(&rt->u.dst);
1601 break;
1602 }
1603out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001604 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001605 return rt;
1606}
1607
1608static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1609 struct in6_addr *gwaddr, int ifindex,
1610 unsigned pref)
1611{
Thomas Graf86872cb2006-08-22 00:01:08 -07001612 struct fib6_config cfg = {
1613 .fc_table = RT6_TABLE_INFO,
1614 .fc_metric = 1024,
1615 .fc_ifindex = ifindex,
1616 .fc_dst_len = prefixlen,
1617 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1618 RTF_UP | RTF_PREF(pref),
1619 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001620
Thomas Graf86872cb2006-08-22 00:01:08 -07001621 ipv6_addr_copy(&cfg.fc_dst, prefix);
1622 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1623
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001624 /* We should treat it as a default route if prefix length is 0. */
1625 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001626 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001627
Thomas Graf86872cb2006-08-22 00:01:08 -07001628 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001629
1630 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1631}
1632#endif
1633
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001635{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001637 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
Thomas Grafc71099a2006-08-04 23:20:06 -07001639 table = fib6_get_table(RT6_TABLE_DFLT);
1640 if (table == NULL)
1641 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
Thomas Grafc71099a2006-08-04 23:20:06 -07001643 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001644 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001646 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1648 break;
1649 }
1650 if (rt)
1651 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001652 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 return rt;
1654}
1655
1656struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001657 struct net_device *dev,
1658 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659{
Thomas Graf86872cb2006-08-22 00:01:08 -07001660 struct fib6_config cfg = {
1661 .fc_table = RT6_TABLE_DFLT,
1662 .fc_metric = 1024,
1663 .fc_ifindex = dev->ifindex,
1664 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1665 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1666 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667
Thomas Graf86872cb2006-08-22 00:01:08 -07001668 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669
Thomas Graf86872cb2006-08-22 00:01:08 -07001670 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 return rt6_get_dflt_router(gwaddr, dev);
1673}
1674
1675void rt6_purge_dflt_routers(void)
1676{
1677 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001678 struct fib6_table *table;
1679
1680 /* NOTE: Keep consistent with rt6_get_dflt_router */
1681 table = fib6_get_table(RT6_TABLE_DFLT);
1682 if (table == NULL)
1683 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684
1685restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001686 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001687 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1689 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001690 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001691 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 goto restart;
1693 }
1694 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001695 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696}
1697
Thomas Graf86872cb2006-08-22 00:01:08 -07001698static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1699 struct fib6_config *cfg)
1700{
1701 memset(cfg, 0, sizeof(*cfg));
1702
1703 cfg->fc_table = RT6_TABLE_MAIN;
1704 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1705 cfg->fc_metric = rtmsg->rtmsg_metric;
1706 cfg->fc_expires = rtmsg->rtmsg_info;
1707 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1708 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1709 cfg->fc_flags = rtmsg->rtmsg_flags;
1710
1711 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1712 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1713 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1714}
1715
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1717{
Thomas Graf86872cb2006-08-22 00:01:08 -07001718 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 struct in6_rtmsg rtmsg;
1720 int err;
1721
1722 switch(cmd) {
1723 case SIOCADDRT: /* Add a route */
1724 case SIOCDELRT: /* Delete a route */
1725 if (!capable(CAP_NET_ADMIN))
1726 return -EPERM;
1727 err = copy_from_user(&rtmsg, arg,
1728 sizeof(struct in6_rtmsg));
1729 if (err)
1730 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001731
1732 rtmsg_to_fib6_config(&rtmsg, &cfg);
1733
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 rtnl_lock();
1735 switch (cmd) {
1736 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001737 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 break;
1739 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001740 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 break;
1742 default:
1743 err = -EINVAL;
1744 }
1745 rtnl_unlock();
1746
1747 return err;
1748 };
1749
1750 return -EINVAL;
1751}
1752
1753/*
1754 * Drop the packet on the floor
1755 */
1756
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001757static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001759 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1760 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +09001761 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
Lv Liangying76d0cc12006-08-29 00:00:47 -07001762
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +09001763 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001764 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 kfree_skb(skb);
1766 return 0;
1767}
1768
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001769static int ip6_pkt_discard(struct sk_buff *skb)
1770{
1771 return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1772}
1773
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001774static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775{
1776 skb->dev = skb->dst->dev;
1777 return ip6_pkt_discard(skb);
1778}
1779
David S. Miller6723ab52006-10-18 21:20:57 -07001780#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1781
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001782static int ip6_pkt_prohibit(struct sk_buff *skb)
1783{
1784 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1785}
1786
1787static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1788{
1789 skb->dev = skb->dst->dev;
1790 return ip6_pkt_prohibit(skb);
1791}
1792
1793static int ip6_pkt_blk_hole(struct sk_buff *skb)
1794{
1795 kfree_skb(skb);
1796 return 0;
1797}
1798
David S. Miller6723ab52006-10-18 21:20:57 -07001799#endif
1800
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801/*
1802 * Allocate a dst for local (unicast / anycast) address.
1803 */
1804
1805struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1806 const struct in6_addr *addr,
1807 int anycast)
1808{
1809 struct rt6_info *rt = ip6_dst_alloc();
1810
1811 if (rt == NULL)
1812 return ERR_PTR(-ENOMEM);
1813
1814 dev_hold(&loopback_dev);
1815 in6_dev_hold(idev);
1816
1817 rt->u.dst.flags = DST_HOST;
1818 rt->u.dst.input = ip6_input;
1819 rt->u.dst.output = ip6_output;
1820 rt->rt6i_dev = &loopback_dev;
1821 rt->rt6i_idev = idev;
1822 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1823 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1824 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1825 rt->u.dst.obsolete = -1;
1826
1827 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001828 if (anycast)
1829 rt->rt6i_flags |= RTF_ANYCAST;
1830 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 rt->rt6i_flags |= RTF_LOCAL;
1832 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1833 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001834 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 return ERR_PTR(-ENOMEM);
1836 }
1837
1838 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1839 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001840 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841
1842 atomic_set(&rt->u.dst.__refcnt, 1);
1843
1844 return rt;
1845}
1846
1847static int fib6_ifdown(struct rt6_info *rt, void *arg)
1848{
1849 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1850 rt != &ip6_null_entry) {
1851 RT6_TRACE("deleted by ifdown %p\n", rt);
1852 return -1;
1853 }
1854 return 0;
1855}
1856
1857void rt6_ifdown(struct net_device *dev)
1858{
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
1861
1862struct rt6_mtu_change_arg
1863{
1864 struct net_device *dev;
1865 unsigned mtu;
1866};
1867
1868static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1869{
1870 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1871 struct inet6_dev *idev;
1872
1873 /* In IPv6 pmtu discovery is not optional,
1874 so that RTAX_MTU lock cannot disable it.
1875 We still use this lock to block changes
1876 caused by addrconf/ndisc.
1877 */
1878
1879 idev = __in6_dev_get(arg->dev);
1880 if (idev == NULL)
1881 return 0;
1882
1883 /* For administrative MTU increase, there is no way to discover
1884 IPv6 PMTU increase, so PMTU increase should be updated here.
1885 Since RFC 1981 doesn't include administrative MTU increase
1886 update PMTU increase is a MUST. (i.e. jumbo frame)
1887 */
1888 /*
1889 If new MTU is less than route PMTU, this new MTU will be the
1890 lowest MTU in the path, update the route PMTU to reflect PMTU
1891 decreases; if new MTU is greater than route PMTU, and the
1892 old MTU is the lowest MTU in the path, update the route PMTU
1893 to reflect the increase. In this case if the other nodes' MTU
1894 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1895 PMTU discouvery.
1896 */
1897 if (rt->rt6i_dev == arg->dev &&
1898 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001899 (dst_mtu(&rt->u.dst) > arg->mtu ||
1900 (dst_mtu(&rt->u.dst) < arg->mtu &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1902 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1903 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1904 return 0;
1905}
1906
1907void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1908{
Thomas Grafc71099a2006-08-04 23:20:06 -07001909 struct rt6_mtu_change_arg arg = {
1910 .dev = dev,
1911 .mtu = mtu,
1912 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
Thomas Grafc71099a2006-08-04 23:20:06 -07001914 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915}
1916
Thomas Graf86872cb2006-08-22 00:01:08 -07001917static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
Thomas Graf5176f912006-08-26 20:13:18 -07001918 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001919 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001920 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001921 [RTA_PRIORITY] = { .type = NLA_U32 },
1922 [RTA_METRICS] = { .type = NLA_NESTED },
1923};
1924
1925static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1926 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927{
Thomas Graf86872cb2006-08-22 00:01:08 -07001928 struct rtmsg *rtm;
1929 struct nlattr *tb[RTA_MAX+1];
1930 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931
Thomas Graf86872cb2006-08-22 00:01:08 -07001932 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1933 if (err < 0)
1934 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935
Thomas Graf86872cb2006-08-22 00:01:08 -07001936 err = -EINVAL;
1937 rtm = nlmsg_data(nlh);
1938 memset(cfg, 0, sizeof(*cfg));
1939
1940 cfg->fc_table = rtm->rtm_table;
1941 cfg->fc_dst_len = rtm->rtm_dst_len;
1942 cfg->fc_src_len = rtm->rtm_src_len;
1943 cfg->fc_flags = RTF_UP;
1944 cfg->fc_protocol = rtm->rtm_protocol;
1945
1946 if (rtm->rtm_type == RTN_UNREACHABLE)
1947 cfg->fc_flags |= RTF_REJECT;
1948
1949 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1950 cfg->fc_nlinfo.nlh = nlh;
1951
1952 if (tb[RTA_GATEWAY]) {
1953 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1954 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001956
1957 if (tb[RTA_DST]) {
1958 int plen = (rtm->rtm_dst_len + 7) >> 3;
1959
1960 if (nla_len(tb[RTA_DST]) < plen)
1961 goto errout;
1962
1963 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001965
1966 if (tb[RTA_SRC]) {
1967 int plen = (rtm->rtm_src_len + 7) >> 3;
1968
1969 if (nla_len(tb[RTA_SRC]) < plen)
1970 goto errout;
1971
1972 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001974
1975 if (tb[RTA_OIF])
1976 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1977
1978 if (tb[RTA_PRIORITY])
1979 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1980
1981 if (tb[RTA_METRICS]) {
1982 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1983 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001985
1986 if (tb[RTA_TABLE])
1987 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1988
1989 err = 0;
1990errout:
1991 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992}
1993
1994int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1995{
Thomas Graf86872cb2006-08-22 00:01:08 -07001996 struct fib6_config cfg;
1997 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998
Thomas Graf86872cb2006-08-22 00:01:08 -07001999 err = rtm_to_fib6_config(skb, nlh, &cfg);
2000 if (err < 0)
2001 return err;
2002
2003 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004}
2005
2006int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2007{
Thomas Graf86872cb2006-08-22 00:01:08 -07002008 struct fib6_config cfg;
2009 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010
Thomas Graf86872cb2006-08-22 00:01:08 -07002011 err = rtm_to_fib6_config(skb, nlh, &cfg);
2012 if (err < 0)
2013 return err;
2014
2015 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016}
2017
Thomas Graf339bf982006-11-10 14:10:15 -08002018static inline size_t rt6_nlmsg_size(void)
2019{
2020 return NLMSG_ALIGN(sizeof(struct rtmsg))
2021 + nla_total_size(16) /* RTA_SRC */
2022 + nla_total_size(16) /* RTA_DST */
2023 + nla_total_size(16) /* RTA_GATEWAY */
2024 + nla_total_size(16) /* RTA_PREFSRC */
2025 + nla_total_size(4) /* RTA_TABLE */
2026 + nla_total_size(4) /* RTA_IIF */
2027 + nla_total_size(4) /* RTA_OIF */
2028 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002029 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002030 + nla_total_size(sizeof(struct rta_cacheinfo));
2031}
2032
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002034 struct in6_addr *dst, struct in6_addr *src,
2035 int iif, int type, u32 pid, u32 seq,
2036 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037{
2038 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002039 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002040 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002041 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042
2043 if (prefix) { /* user wants prefix routes only */
2044 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2045 /* success since this is not a prefix route */
2046 return 1;
2047 }
2048 }
2049
Thomas Graf2d7202b2006-08-22 00:01:27 -07002050 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2051 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002052 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002053
2054 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 rtm->rtm_family = AF_INET6;
2056 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2057 rtm->rtm_src_len = rt->rt6i_src.plen;
2058 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002059 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002060 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002061 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002062 table = RT6_TABLE_UNSPEC;
2063 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002064 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 if (rt->rt6i_flags&RTF_REJECT)
2066 rtm->rtm_type = RTN_UNREACHABLE;
2067 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2068 rtm->rtm_type = RTN_LOCAL;
2069 else
2070 rtm->rtm_type = RTN_UNICAST;
2071 rtm->rtm_flags = 0;
2072 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2073 rtm->rtm_protocol = rt->rt6i_protocol;
2074 if (rt->rt6i_flags&RTF_DYNAMIC)
2075 rtm->rtm_protocol = RTPROT_REDIRECT;
2076 else if (rt->rt6i_flags & RTF_ADDRCONF)
2077 rtm->rtm_protocol = RTPROT_KERNEL;
2078 else if (rt->rt6i_flags&RTF_DEFAULT)
2079 rtm->rtm_protocol = RTPROT_RA;
2080
2081 if (rt->rt6i_flags&RTF_CACHE)
2082 rtm->rtm_flags |= RTM_F_CLONED;
2083
2084 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002085 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002086 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002088 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089#ifdef CONFIG_IPV6_SUBTREES
2090 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002091 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002092 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002094 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095#endif
2096 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002097 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 else if (dst) {
2099 struct in6_addr saddr_buf;
2100 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002101 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002103
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002105 goto nla_put_failure;
2106
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002108 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2109
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002111 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2112
2113 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002114
2115 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2116 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2117 expires, rt->u.dst.error) < 0)
2118 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119
Thomas Graf2d7202b2006-08-22 00:01:27 -07002120 return nlmsg_end(skb, nlh);
2121
2122nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002123 nlmsg_cancel(skb, nlh);
2124 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125}
2126
Patrick McHardy1b43af52006-08-10 23:11:17 -07002127int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128{
2129 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2130 int prefix;
2131
Thomas Graf2d7202b2006-08-22 00:01:27 -07002132 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2133 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2135 } else
2136 prefix = 0;
2137
2138 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2139 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002140 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141}
2142
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2144{
Thomas Grafab364a62006-08-22 00:01:47 -07002145 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002147 struct sk_buff *skb;
2148 struct rtmsg *rtm;
2149 struct flowi fl;
2150 int err, iif = 0;
2151
2152 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2153 if (err < 0)
2154 goto errout;
2155
2156 err = -EINVAL;
2157 memset(&fl, 0, sizeof(fl));
2158
2159 if (tb[RTA_SRC]) {
2160 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2161 goto errout;
2162
2163 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2164 }
2165
2166 if (tb[RTA_DST]) {
2167 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2168 goto errout;
2169
2170 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2171 }
2172
2173 if (tb[RTA_IIF])
2174 iif = nla_get_u32(tb[RTA_IIF]);
2175
2176 if (tb[RTA_OIF])
2177 fl.oif = nla_get_u32(tb[RTA_OIF]);
2178
2179 if (iif) {
2180 struct net_device *dev;
2181 dev = __dev_get_by_index(iif);
2182 if (!dev) {
2183 err = -ENODEV;
2184 goto errout;
2185 }
2186 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187
2188 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002189 if (skb == NULL) {
2190 err = -ENOBUFS;
2191 goto errout;
2192 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193
2194 /* Reserve room for dummy headers, this skb can pass
2195 through good chunk of routing engine.
2196 */
2197 skb->mac.raw = skb->data;
2198 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2199
Thomas Grafab364a62006-08-22 00:01:47 -07002200 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 skb->dst = &rt->u.dst;
2202
Thomas Grafab364a62006-08-22 00:01:47 -07002203 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002205 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002207 kfree_skb(skb);
2208 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 }
2210
Thomas Graf2942e902006-08-15 00:30:25 -07002211 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002212errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214}
2215
Thomas Graf86872cb2006-08-22 00:01:08 -07002216void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217{
2218 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002219 u32 pid = 0, seq = 0;
2220 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002221 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222
Thomas Graf86872cb2006-08-22 00:01:08 -07002223 if (info) {
2224 pid = info->pid;
2225 nlh = info->nlh;
2226 if (nlh)
2227 seq = nlh->nlmsg_seq;
2228 }
2229
Thomas Graf339bf982006-11-10 14:10:15 -08002230 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002231 if (skb == NULL)
2232 goto errout;
2233
2234 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002235 if (err < 0) {
2236 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2237 WARN_ON(err == -EMSGSIZE);
2238 kfree_skb(skb);
2239 goto errout;
2240 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002241 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2242errout:
2243 if (err < 0)
2244 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245}
2246
2247/*
2248 * /proc
2249 */
2250
2251#ifdef CONFIG_PROC_FS
2252
2253#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2254
2255struct rt6_proc_arg
2256{
2257 char *buffer;
2258 int offset;
2259 int length;
2260 int skip;
2261 int len;
2262};
2263
2264static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2265{
2266 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267
2268 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2269 arg->skip++;
2270 return 0;
2271 }
2272
2273 if (arg->len >= arg->length)
2274 return 0;
2275
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002276 arg->len += sprintf(arg->buffer + arg->len,
2277 NIP6_SEQFMT " %02x ",
2278 NIP6(rt->rt6i_dst.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 rt->rt6i_dst.plen);
2280
2281#ifdef CONFIG_IPV6_SUBTREES
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002282 arg->len += sprintf(arg->buffer + arg->len,
2283 NIP6_SEQFMT " %02x ",
2284 NIP6(rt->rt6i_src.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 rt->rt6i_src.plen);
2286#else
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002287 arg->len += sprintf(arg->buffer + arg->len,
2288 "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289#endif
2290
2291 if (rt->rt6i_nexthop) {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002292 arg->len += sprintf(arg->buffer + arg->len,
2293 NIP6_SEQFMT,
2294 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 } else {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002296 arg->len += sprintf(arg->buffer + arg->len,
2297 "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 }
2299 arg->len += sprintf(arg->buffer + arg->len,
2300 " %08x %08x %08x %08x %8s\n",
2301 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002302 rt->u.dst.__use, rt->rt6i_flags,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2304 return 0;
2305}
2306
2307static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2308{
Thomas Grafc71099a2006-08-04 23:20:06 -07002309 struct rt6_proc_arg arg = {
2310 .buffer = buffer,
2311 .offset = offset,
2312 .length = length,
2313 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314
Thomas Grafc71099a2006-08-04 23:20:06 -07002315 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316
2317 *start = buffer;
2318 if (offset)
2319 *start += offset % RT6_INFO_LEN;
2320
2321 arg.len -= offset % RT6_INFO_LEN;
2322
2323 if (arg.len > length)
2324 arg.len = length;
2325 if (arg.len < 0)
2326 arg.len = 0;
2327
2328 return arg.len;
2329}
2330
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2332{
2333 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2334 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2335 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2336 rt6_stats.fib_rt_cache,
2337 atomic_read(&ip6_dst_ops.entries),
2338 rt6_stats.fib_discarded_routes);
2339
2340 return 0;
2341}
2342
2343static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2344{
2345 return single_open(file, rt6_stats_seq_show, NULL);
2346}
2347
2348static struct file_operations rt6_stats_seq_fops = {
2349 .owner = THIS_MODULE,
2350 .open = rt6_stats_seq_open,
2351 .read = seq_read,
2352 .llseek = seq_lseek,
2353 .release = single_release,
2354};
2355#endif /* CONFIG_PROC_FS */
2356
2357#ifdef CONFIG_SYSCTL
2358
2359static int flush_delay;
2360
2361static
2362int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2363 void __user *buffer, size_t *lenp, loff_t *ppos)
2364{
2365 if (write) {
2366 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2367 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2368 return 0;
2369 } else
2370 return -EINVAL;
2371}
2372
2373ctl_table ipv6_route_table[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002374 {
2375 .ctl_name = NET_IPV6_ROUTE_FLUSH,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 .procname = "flush",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002377 .data = &flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002379 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002380 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 },
2382 {
2383 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2384 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002385 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386 .maxlen = sizeof(int),
2387 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002388 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 },
2390 {
2391 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2392 .procname = "max_size",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002393 .data = &ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 .maxlen = sizeof(int),
2395 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002396 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 },
2398 {
2399 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2400 .procname = "gc_min_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002401 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 .maxlen = sizeof(int),
2403 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002404 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 .strategy = &sysctl_jiffies,
2406 },
2407 {
2408 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2409 .procname = "gc_timeout",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002410 .data = &ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411 .maxlen = sizeof(int),
2412 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002413 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 .strategy = &sysctl_jiffies,
2415 },
2416 {
2417 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2418 .procname = "gc_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002419 .data = &ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420 .maxlen = sizeof(int),
2421 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002422 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 .strategy = &sysctl_jiffies,
2424 },
2425 {
2426 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2427 .procname = "gc_elasticity",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002428 .data = &ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 .maxlen = sizeof(int),
2430 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002431 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 .strategy = &sysctl_jiffies,
2433 },
2434 {
2435 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2436 .procname = "mtu_expires",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002437 .data = &ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 .maxlen = sizeof(int),
2439 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002440 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 .strategy = &sysctl_jiffies,
2442 },
2443 {
2444 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2445 .procname = "min_adv_mss",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002446 .data = &ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 .maxlen = sizeof(int),
2448 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002449 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 .strategy = &sysctl_jiffies,
2451 },
2452 {
2453 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2454 .procname = "gc_min_interval_ms",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002455 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456 .maxlen = sizeof(int),
2457 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002458 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 .strategy = &sysctl_ms_jiffies,
2460 },
2461 { .ctl_name = 0 }
2462};
2463
2464#endif
2465
2466void __init ip6_route_init(void)
2467{
2468 struct proc_dir_entry *p;
2469
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002470 ip6_dst_ops.kmem_cachep =
2471 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2472 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 fib6_init();
2474#ifdef CONFIG_PROC_FS
2475 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2476 if (p)
2477 p->owner = THIS_MODULE;
2478
2479 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2480#endif
2481#ifdef CONFIG_XFRM
2482 xfrm6_init();
2483#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002484#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2485 fib6_rules_init();
2486#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487}
2488
2489void ip6_route_cleanup(void)
2490{
Thomas Graf101367c2006-08-04 03:39:02 -07002491#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2492 fib6_rules_cleanup();
2493#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494#ifdef CONFIG_PROC_FS
2495 proc_net_remove("ipv6_route");
2496 proc_net_remove("rt6_stats");
2497#endif
2498#ifdef CONFIG_XFRM
2499 xfrm6_fini();
2500#endif
2501 rt6_ifdown(NULL);
2502 fib6_gc_cleanup();
2503 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2504}