blob: 72609f1c61581bfa8e8fbd7cf4e577166f5ade04 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Linus Torvalds1da177e2005-04-16 15:20:36 -070075static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080077static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080078static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070079static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
David S. Miller06582542011-01-27 14:58:42 -0800100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800130static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800132 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800136 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800137 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800138 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700144 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145};
146
David S. Miller14e50e52007-05-24 18:17:54 -0700147static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
148{
149}
150
151static struct dst_ops ip6_dst_blackhole_ops = {
152 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800153 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700154 .destroy = ip6_dst_destroy,
155 .check = ip6_dst_check,
156 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700157};
158
David S. Miller62fa8a82011-01-26 20:51:05 -0800159static const u32 ip6_template_metrics[RTAX_MAX] = {
160 [RTAX_HOPLIMIT - 1] = 255,
161};
162
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800163static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700169 .input = ip6_pkt_discard,
170 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 },
172 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700173 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 .rt6i_metric = ~(u32) 0,
175 .rt6i_ref = ATOMIC_INIT(1),
176};
177
Thomas Graf101367c2006-08-04 03:39:02 -0700178#ifdef CONFIG_IPV6_MULTIPLE_TABLES
179
David S. Miller6723ab52006-10-18 21:20:57 -0700180static int ip6_pkt_prohibit(struct sk_buff *skb);
181static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700182
Adrian Bunk280a34c2008-04-21 02:29:32 -0700183static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700184 .dst = {
185 .__refcnt = ATOMIC_INIT(1),
186 .__use = 1,
187 .obsolete = -1,
188 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700189 .input = ip6_pkt_prohibit,
190 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700191 },
192 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700193 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700194 .rt6i_metric = ~(u32) 0,
195 .rt6i_ref = ATOMIC_INIT(1),
196};
197
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800198static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700199 .dst = {
200 .__refcnt = ATOMIC_INIT(1),
201 .__use = 1,
202 .obsolete = -1,
203 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700204 .input = dst_discard,
205 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700206 },
207 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700208 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700209 .rt6i_metric = ~(u32) 0,
210 .rt6i_ref = ATOMIC_INIT(1),
211};
212
213#endif
214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800216static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800218 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219}
220
221static void ip6_dst_destroy(struct dst_entry *dst)
222{
223 struct rt6_info *rt = (struct rt6_info *)dst;
224 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800225 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
227 if (idev != NULL) {
228 rt->rt6i_idev = NULL;
229 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900230 }
David S. Millerb3419362010-11-30 12:27:11 -0800231 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800232 rt->rt6i_peer = NULL;
233 inet_putpeer(peer);
234 }
235}
236
237void rt6_bind_peer(struct rt6_info *rt, int create)
238{
239 struct inet_peer *peer;
240
David S. Millerb3419362010-11-30 12:27:11 -0800241 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
242 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
243 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244}
245
246static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
247 int how)
248{
249 struct rt6_info *rt = (struct rt6_info *)dst;
250 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800251 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900252 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800254 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
255 struct inet6_dev *loopback_idev =
256 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 if (loopback_idev != NULL) {
258 rt->rt6i_idev = loopback_idev;
259 in6_dev_put(idev);
260 }
261 }
262}
263
264static __inline__ int rt6_check_expired(const struct rt6_info *rt)
265{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000266 return (rt->rt6i_flags & RTF_EXPIRES) &&
267 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268}
269
Thomas Grafc71099a2006-08-04 23:20:06 -0700270static inline int rt6_need_strict(struct in6_addr *daddr)
271{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000272 return ipv6_addr_type(daddr) &
273 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700274}
275
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700277 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 */
279
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800280static inline struct rt6_info *rt6_device_match(struct net *net,
281 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900282 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700284 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285{
286 struct rt6_info *local = NULL;
287 struct rt6_info *sprt;
288
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900289 if (!oif && ipv6_addr_any(saddr))
290 goto out;
291
Changli Gaod8d1f302010-06-10 23:31:35 -0700292 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900293 struct net_device *dev = sprt->rt6i_dev;
294
295 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 if (dev->ifindex == oif)
297 return sprt;
298 if (dev->flags & IFF_LOOPBACK) {
299 if (sprt->rt6i_idev == NULL ||
300 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700301 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900303 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 local->rt6i_idev->dev->ifindex == oif))
305 continue;
306 }
307 local = sprt;
308 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900309 } else {
310 if (ipv6_chk_addr(net, saddr, dev,
311 flags & RT6_LOOKUP_F_IFACE))
312 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900314 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900316 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 if (local)
318 return local;
319
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700320 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800321 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900323out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 return rt;
325}
326
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800327#ifdef CONFIG_IPV6_ROUTER_PREF
328static void rt6_probe(struct rt6_info *rt)
329{
330 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
331 /*
332 * Okay, this does not seem to be appropriate
333 * for now, however, we need to check if it
334 * is really so; aka Router Reachability Probing.
335 *
336 * Router Reachability Probe MUST be rate-limited
337 * to no more than one per minute.
338 */
339 if (!neigh || (neigh->nud_state & NUD_VALID))
340 return;
341 read_lock_bh(&neigh->lock);
342 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800343 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800344 struct in6_addr mcaddr;
345 struct in6_addr *target;
346
347 neigh->updated = jiffies;
348 read_unlock_bh(&neigh->lock);
349
350 target = (struct in6_addr *)&neigh->primary_key;
351 addrconf_addr_solict_mult(target, &mcaddr);
352 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
353 } else
354 read_unlock_bh(&neigh->lock);
355}
356#else
357static inline void rt6_probe(struct rt6_info *rt)
358{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800359}
360#endif
361
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700365static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800367 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700368 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800369 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700370 if ((dev->flags & IFF_LOOPBACK) &&
371 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
372 return 1;
373 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374}
375
Dave Jonesb6f99a22007-03-22 12:27:49 -0700376static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800379 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700380 if (rt->rt6i_flags & RTF_NONEXTHOP ||
381 !(rt->rt6i_flags & RTF_GATEWAY))
382 m = 1;
383 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800384 read_lock_bh(&neigh->lock);
385 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700386 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800387#ifdef CONFIG_IPV6_ROUTER_PREF
388 else if (neigh->nud_state & NUD_FAILED)
389 m = 0;
390#endif
391 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800392 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800393 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800394 } else
395 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800396 return m;
397}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800399static int rt6_score_route(struct rt6_info *rt, int oif,
400 int strict)
401{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700402 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900403
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700404 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700405 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800407#ifdef CONFIG_IPV6_ROUTER_PREF
408 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
409#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700410 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800411 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800412 return -1;
413 return m;
414}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
David S. Millerf11e6652007-03-24 20:36:25 -0700416static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
417 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418{
David S. Millerf11e6652007-03-24 20:36:25 -0700419 int m;
420
421 if (rt6_check_expired(rt))
422 goto out;
423
424 m = rt6_score_route(rt, oif, strict);
425 if (m < 0)
426 goto out;
427
428 if (m > *mpri) {
429 if (strict & RT6_LOOKUP_F_REACHABLE)
430 rt6_probe(match);
431 *mpri = m;
432 match = rt;
433 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
434 rt6_probe(rt);
435 }
436
437out:
438 return match;
439}
440
441static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
442 struct rt6_info *rr_head,
443 u32 metric, int oif, int strict)
444{
445 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800446 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
David S. Millerf11e6652007-03-24 20:36:25 -0700448 match = NULL;
449 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700450 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700451 match = find_match(rt, oif, strict, &mpri, match);
452 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700453 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700454 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800455
David S. Millerf11e6652007-03-24 20:36:25 -0700456 return match;
457}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800458
David S. Millerf11e6652007-03-24 20:36:25 -0700459static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
460{
461 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800462 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
David S. Millerf11e6652007-03-24 20:36:25 -0700464 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800465 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
David S. Millerf11e6652007-03-24 20:36:25 -0700467 rt0 = fn->rr_ptr;
468 if (!rt0)
469 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
David S. Millerf11e6652007-03-24 20:36:25 -0700471 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800473 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700474 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700475 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700476
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800477 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700478 if (!next || next->rt6i_metric != rt0->rt6i_metric)
479 next = fn->leaf;
480
481 if (next != rt0)
482 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 }
484
David S. Millerf11e6652007-03-24 20:36:25 -0700485 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800486 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900488 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000489 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490}
491
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800492#ifdef CONFIG_IPV6_ROUTE_INFO
493int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
494 struct in6_addr *gwaddr)
495{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900496 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497 struct route_info *rinfo = (struct route_info *) opt;
498 struct in6_addr prefix_buf, *prefix;
499 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900500 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800501 struct rt6_info *rt;
502
503 if (len < sizeof(struct route_info)) {
504 return -EINVAL;
505 }
506
507 /* Sanity check for prefix_len and length */
508 if (rinfo->length > 3) {
509 return -EINVAL;
510 } else if (rinfo->prefix_len > 128) {
511 return -EINVAL;
512 } else if (rinfo->prefix_len > 64) {
513 if (rinfo->length < 2) {
514 return -EINVAL;
515 }
516 } else if (rinfo->prefix_len > 0) {
517 if (rinfo->length < 1) {
518 return -EINVAL;
519 }
520 }
521
522 pref = rinfo->route_pref;
523 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000524 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800525
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900526 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800527
528 if (rinfo->length == 3)
529 prefix = (struct in6_addr *)rinfo->prefix;
530 else {
531 /* this function is safe */
532 ipv6_addr_prefix(&prefix_buf,
533 (struct in6_addr *)rinfo->prefix,
534 rinfo->prefix_len);
535 prefix = &prefix_buf;
536 }
537
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800538 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
539 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800540
541 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700542 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800543 rt = NULL;
544 }
545
546 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800547 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800548 pref);
549 else if (rt)
550 rt->rt6i_flags = RTF_ROUTEINFO |
551 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
552
553 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900554 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800555 rt->rt6i_flags &= ~RTF_EXPIRES;
556 } else {
557 rt->rt6i_expires = jiffies + HZ * lifetime;
558 rt->rt6i_flags |= RTF_EXPIRES;
559 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700560 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800561 }
562 return 0;
563}
564#endif
565
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800566#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700567do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800568 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700569 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700570 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700571 if (fn->fn_flags & RTN_TL_ROOT) \
572 goto out; \
573 pn = fn->parent; \
574 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800575 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700576 else \
577 fn = pn; \
578 if (fn->fn_flags & RTN_RTINFO) \
579 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700580 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700581 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700582} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700583
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800584static struct rt6_info *ip6_pol_route_lookup(struct net *net,
585 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700586 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587{
588 struct fib6_node *fn;
589 struct rt6_info *rt;
590
Thomas Grafc71099a2006-08-04 23:20:06 -0700591 read_lock_bh(&table->tb6_lock);
592 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
593restart:
594 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900595 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800596 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700597out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700598 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700599 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700600 return rt;
601
602}
603
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900604struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
605 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700606{
607 struct flowi fl = {
608 .oif = oif,
Changli Gao58116622010-11-12 18:43:55 +0000609 .fl6_dst = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700610 };
611 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700612 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700613
Thomas Grafadaa70b2006-10-13 15:01:03 -0700614 if (saddr) {
615 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
616 flags |= RT6_LOOKUP_F_HAS_SADDR;
617 }
618
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800619 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700620 if (dst->error == 0)
621 return (struct rt6_info *) dst;
622
623 dst_release(dst);
624
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 return NULL;
626}
627
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900628EXPORT_SYMBOL(rt6_lookup);
629
Thomas Grafc71099a2006-08-04 23:20:06 -0700630/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 It takes new route entry, the addition fails by any reason the
632 route is freed. In any case, if caller does not hold it, it may
633 be destroyed.
634 */
635
Thomas Graf86872cb2006-08-22 00:01:08 -0700636static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637{
638 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700639 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
Thomas Grafc71099a2006-08-04 23:20:06 -0700641 table = rt->rt6i_table;
642 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700643 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700644 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646 return err;
647}
648
Thomas Graf40e22e82006-08-22 00:00:45 -0700649int ip6_ins_rt(struct rt6_info *rt)
650{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800651 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900652 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800653 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800654 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700655}
656
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800657static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
658 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 struct rt6_info *rt;
661
662 /*
663 * Clone the route.
664 */
665
666 rt = ip6_rt_copy(ort);
667
668 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800669 struct neighbour *neigh;
670 int attempts = !in_softirq();
671
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900672 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
673 if (rt->rt6i_dst.plen != 128 &&
674 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
675 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900677 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900679 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 rt->rt6i_dst.plen = 128;
681 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700682 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
684#ifdef CONFIG_IPV6_SUBTREES
685 if (rt->rt6i_src.plen && saddr) {
686 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
687 rt->rt6i_src.plen = 128;
688 }
689#endif
690
David S. Miller14deae42009-01-04 16:04:39 -0800691 retry:
692 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
693 if (IS_ERR(neigh)) {
694 struct net *net = dev_net(rt->rt6i_dev);
695 int saved_rt_min_interval =
696 net->ipv6.sysctl.ip6_rt_gc_min_interval;
697 int saved_rt_elasticity =
698 net->ipv6.sysctl.ip6_rt_gc_elasticity;
699
700 if (attempts-- > 0) {
701 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
702 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
703
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000704 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800705
706 net->ipv6.sysctl.ip6_rt_gc_elasticity =
707 saved_rt_elasticity;
708 net->ipv6.sysctl.ip6_rt_gc_min_interval =
709 saved_rt_min_interval;
710 goto retry;
711 }
712
713 if (net_ratelimit())
714 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700715 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700716 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800717 return NULL;
718 }
719 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800721 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800723 return rt;
724}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800726static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
727{
728 struct rt6_info *rt = ip6_rt_copy(ort);
729 if (rt) {
730 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
731 rt->rt6i_dst.plen = 128;
732 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700733 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800734 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
735 }
736 return rt;
737}
738
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800739static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
740 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741{
742 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800743 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700744 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800746 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700747 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700749 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750
751relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700752 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800754restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700755 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
757restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700758 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800759
760 BACKTRACK(net, &fl->fl6_src);
761 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800762 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800763 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
Changli Gaod8d1f302010-06-10 23:31:35 -0700765 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700766 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800767
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800768 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800769 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
David S. Millerd80bc0f2011-01-24 16:01:58 -0800770 else
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800771 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800772
Changli Gaod8d1f302010-06-10 23:31:35 -0700773 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800774 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800775
Changli Gaod8d1f302010-06-10 23:31:35 -0700776 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800777 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700778 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800779 if (!err)
780 goto out2;
781 }
782
783 if (--attempts <= 0)
784 goto out2;
785
786 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700787 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800788 * released someone could insert this route. Relookup.
789 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700790 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800791 goto relookup;
792
793out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800794 if (reachable) {
795 reachable = 0;
796 goto restart_2;
797 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700798 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700799 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700801 rt->dst.lastuse = jiffies;
802 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700803
804 return rt;
805}
806
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800807static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700808 struct flowi *fl, int flags)
809{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800810 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700811}
812
Thomas Grafc71099a2006-08-04 23:20:06 -0700813void ip6_route_input(struct sk_buff *skb)
814{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700815 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900816 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700817 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700818 struct flowi fl = {
819 .iif = skb->dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +0000820 .fl6_dst = iph->daddr,
821 .fl6_src = iph->saddr,
822 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900823 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700824 .proto = iph->nexthdr,
825 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700826
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800827 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700828 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700829
Eric Dumazetadf30902009-06-02 05:19:30 +0000830 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700831}
832
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800833static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700834 struct flowi *fl, int flags)
835{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800836 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700837}
838
Daniel Lezcano4591db42008-03-05 10:48:10 -0800839struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
840 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700841{
842 int flags = 0;
843
Brian Haley6057fd72010-05-28 23:02:35 -0700844 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700845 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700846
Thomas Grafadaa70b2006-10-13 15:01:03 -0700847 if (!ipv6_addr_any(&fl->fl6_src))
848 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000849 else if (sk)
850 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700851
Daniel Lezcano4591db42008-03-05 10:48:10 -0800852 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853}
854
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900855EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856
David S. Miller14e50e52007-05-24 18:17:54 -0700857int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
858{
859 struct rt6_info *ort = (struct rt6_info *) *dstp;
860 struct rt6_info *rt = (struct rt6_info *)
861 dst_alloc(&ip6_dst_blackhole_ops);
862 struct dst_entry *new = NULL;
863
864 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700865 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700866
867 atomic_set(&new->__refcnt, 1);
868 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800869 new->input = dst_discard;
870 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700871
David S. Millerdefb3512010-12-08 21:16:57 -0800872 dst_copy_metrics(new, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -0700873 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700874 if (new->dev)
875 dev_hold(new->dev);
876 rt->rt6i_idev = ort->rt6i_idev;
877 if (rt->rt6i_idev)
878 in6_dev_hold(rt->rt6i_idev);
879 rt->rt6i_expires = 0;
880
881 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
882 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
883 rt->rt6i_metric = 0;
884
885 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
886#ifdef CONFIG_IPV6_SUBTREES
887 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
888#endif
889
890 dst_free(new);
891 }
892
893 dst_release(*dstp);
894 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000895 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700896}
897EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899/*
900 * Destination cache support functions
901 */
902
903static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
904{
905 struct rt6_info *rt;
906
907 rt = (struct rt6_info *) dst;
908
Herbert Xu10414442010-03-18 23:00:22 +0000909 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 return dst;
911
912 return NULL;
913}
914
915static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
916{
917 struct rt6_info *rt = (struct rt6_info *) dst;
918
919 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000920 if (rt->rt6i_flags & RTF_CACHE) {
921 if (rt6_check_expired(rt)) {
922 ip6_del_rt(rt);
923 dst = NULL;
924 }
925 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000927 dst = NULL;
928 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000930 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931}
932
933static void ip6_link_failure(struct sk_buff *skb)
934{
935 struct rt6_info *rt;
936
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000937 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938
Eric Dumazetadf30902009-06-02 05:19:30 +0000939 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940 if (rt) {
941 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700942 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 rt->rt6i_flags |= RTF_EXPIRES;
944 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
945 rt->rt6i_node->fn_sernum = -1;
946 }
947}
948
949static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
950{
951 struct rt6_info *rt6 = (struct rt6_info*)dst;
952
953 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
954 rt6->rt6i_flags |= RTF_MODIFIED;
955 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -0800956 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -0800958 features |= RTAX_FEATURE_ALLFRAG;
959 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 }
David S. Millerdefb3512010-12-08 21:16:57 -0800961 dst_metric_set(dst, RTAX_MTU, mtu);
Tom Tucker8d717402006-07-30 20:43:36 -0700962 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 }
964}
965
David S. Miller0dbaee32010-12-13 12:52:14 -0800966static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967{
David S. Miller0dbaee32010-12-13 12:52:14 -0800968 struct net_device *dev = dst->dev;
969 unsigned int mtu = dst_mtu(dst);
970 struct net *net = dev_net(dev);
971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
973
Daniel Lezcano55786892008-03-04 13:47:47 -0800974 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
975 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900978 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
979 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
980 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 * rely only on pmtu discovery"
982 */
983 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
984 mtu = IPV6_MAXPLEN;
985 return mtu;
986}
987
David S. Millerd33e4552010-12-14 13:01:14 -0800988static unsigned int ip6_default_mtu(const struct dst_entry *dst)
989{
990 unsigned int mtu = IPV6_MIN_MTU;
991 struct inet6_dev *idev;
992
993 rcu_read_lock();
994 idev = __in6_dev_get(dst->dev);
995 if (idev)
996 mtu = idev->cnf.mtu6;
997 rcu_read_unlock();
998
999 return mtu;
1000}
1001
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001002static struct dst_entry *icmp6_dst_gc_list;
1003static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001004
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001005struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001007 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008{
1009 struct rt6_info *rt;
1010 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001011 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
1013 if (unlikely(idev == NULL))
1014 return NULL;
1015
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001016 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 if (unlikely(rt == NULL)) {
1018 in6_dev_put(idev);
1019 goto out;
1020 }
1021
1022 dev_hold(dev);
1023 if (neigh)
1024 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001025 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001027 if (IS_ERR(neigh))
1028 neigh = NULL;
1029 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
1031 rt->rt6i_dev = dev;
1032 rt->rt6i_idev = idev;
1033 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -07001034 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001035 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Changli Gaod8d1f302010-06-10 23:31:35 -07001036 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037
1038#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -07001039 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001040 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 : 0;
1042 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1043 rt->rt6i_dst.plen = 128;
1044#endif
1045
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001046 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001047 rt->dst.next = icmp6_dst_gc_list;
1048 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001049 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050
Daniel Lezcano55786892008-03-04 13:47:47 -08001051 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052
1053out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001054 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055}
1056
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001057int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058{
1059 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001060 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
1062 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001063
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001064 spin_lock_bh(&icmp6_dst_lock);
1065 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001066
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 while ((dst = *pprev) != NULL) {
1068 if (!atomic_read(&dst->__refcnt)) {
1069 *pprev = dst->next;
1070 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 } else {
1072 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001073 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 }
1075 }
1076
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001077 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001078
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001079 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080}
1081
David S. Miller1e493d12008-09-10 17:27:15 -07001082static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1083 void *arg)
1084{
1085 struct dst_entry *dst, **pprev;
1086
1087 spin_lock_bh(&icmp6_dst_lock);
1088 pprev = &icmp6_dst_gc_list;
1089 while ((dst = *pprev) != NULL) {
1090 struct rt6_info *rt = (struct rt6_info *) dst;
1091 if (func(rt, arg)) {
1092 *pprev = dst->next;
1093 dst_free(dst);
1094 } else {
1095 pprev = &dst->next;
1096 }
1097 }
1098 spin_unlock_bh(&icmp6_dst_lock);
1099}
1100
Daniel Lezcano569d3642008-01-18 03:56:57 -08001101static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001104 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001105 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1106 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1107 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1108 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1109 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001110 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
Eric Dumazetfc66f952010-10-08 06:37:34 +00001112 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001113 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001114 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 goto out;
1116
Benjamin Thery6891a342008-03-04 13:49:47 -08001117 net->ipv6.ip6_rt_gc_expire++;
1118 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1119 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001120 entries = dst_entries_get_slow(ops);
1121 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001122 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001124 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001125 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126}
1127
1128/* Clean host part of a prefix. Not necessary in radix tree,
1129 but results in cleaner routing tables.
1130
1131 Remove it only when all the things will work!
1132 */
1133
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001134int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135{
David S. Miller5170ae82010-12-12 21:35:57 -08001136 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001137 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001138 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001139 struct inet6_dev *idev;
1140
1141 rcu_read_lock();
1142 idev = __in6_dev_get(dev);
1143 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001144 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001145 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001146 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001147 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 }
1149 return hoplimit;
1150}
David S. Millerabbf46a2010-12-12 21:14:46 -08001151EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152
1153/*
1154 *
1155 */
1156
Thomas Graf86872cb2006-08-22 00:01:08 -07001157int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158{
1159 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001160 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 struct rt6_info *rt = NULL;
1162 struct net_device *dev = NULL;
1163 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001164 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 int addr_type;
1166
Thomas Graf86872cb2006-08-22 00:01:08 -07001167 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 return -EINVAL;
1169#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001170 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 return -EINVAL;
1172#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001173 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001175 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 if (!dev)
1177 goto out;
1178 idev = in6_dev_get(dev);
1179 if (!idev)
1180 goto out;
1181 }
1182
Thomas Graf86872cb2006-08-22 00:01:08 -07001183 if (cfg->fc_metric == 0)
1184 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
Daniel Lezcano55786892008-03-04 13:47:47 -08001186 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001187 if (table == NULL) {
1188 err = -ENOBUFS;
1189 goto out;
1190 }
1191
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001192 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
1194 if (rt == NULL) {
1195 err = -ENOMEM;
1196 goto out;
1197 }
1198
Changli Gaod8d1f302010-06-10 23:31:35 -07001199 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001200 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1201 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1202 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Thomas Graf86872cb2006-08-22 00:01:08 -07001204 if (cfg->fc_protocol == RTPROT_UNSPEC)
1205 cfg->fc_protocol = RTPROT_BOOT;
1206 rt->rt6i_protocol = cfg->fc_protocol;
1207
1208 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
1210 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001211 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001212 else if (cfg->fc_flags & RTF_LOCAL)
1213 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001215 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216
Changli Gaod8d1f302010-06-10 23:31:35 -07001217 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
Thomas Graf86872cb2006-08-22 00:01:08 -07001219 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1220 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001222 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223
1224#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001225 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1226 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227#endif
1228
Thomas Graf86872cb2006-08-22 00:01:08 -07001229 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230
1231 /* We cannot add true routes via loopback here,
1232 they would result in kernel looping; promote them to reject routes
1233 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001234 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001235 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1236 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001238 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 if (dev) {
1240 dev_put(dev);
1241 in6_dev_put(idev);
1242 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001243 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 dev_hold(dev);
1245 idev = in6_dev_get(dev);
1246 if (!idev) {
1247 err = -ENODEV;
1248 goto out;
1249 }
1250 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001251 rt->dst.output = ip6_pkt_discard_out;
1252 rt->dst.input = ip6_pkt_discard;
1253 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1255 goto install_route;
1256 }
1257
Thomas Graf86872cb2006-08-22 00:01:08 -07001258 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 struct in6_addr *gw_addr;
1260 int gwa_type;
1261
Thomas Graf86872cb2006-08-22 00:01:08 -07001262 gw_addr = &cfg->fc_gateway;
1263 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 gwa_type = ipv6_addr_type(gw_addr);
1265
1266 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1267 struct rt6_info *grt;
1268
1269 /* IPv6 strictly inhibits using not link-local
1270 addresses as nexthop address.
1271 Otherwise, router will not able to send redirects.
1272 It is very good, but in some (rare!) circumstances
1273 (SIT, PtP, NBMA NOARP links) it is handy to allow
1274 some exceptions. --ANK
1275 */
1276 err = -EINVAL;
1277 if (!(gwa_type&IPV6_ADDR_UNICAST))
1278 goto out;
1279
Daniel Lezcano55786892008-03-04 13:47:47 -08001280 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281
1282 err = -EHOSTUNREACH;
1283 if (grt == NULL)
1284 goto out;
1285 if (dev) {
1286 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001287 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 goto out;
1289 }
1290 } else {
1291 dev = grt->rt6i_dev;
1292 idev = grt->rt6i_idev;
1293 dev_hold(dev);
1294 in6_dev_hold(grt->rt6i_idev);
1295 }
1296 if (!(grt->rt6i_flags&RTF_GATEWAY))
1297 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001298 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
1300 if (err)
1301 goto out;
1302 }
1303 err = -EINVAL;
1304 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1305 goto out;
1306 }
1307
1308 err = -ENODEV;
1309 if (dev == NULL)
1310 goto out;
1311
Thomas Graf86872cb2006-08-22 00:01:08 -07001312 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1314 if (IS_ERR(rt->rt6i_nexthop)) {
1315 err = PTR_ERR(rt->rt6i_nexthop);
1316 rt->rt6i_nexthop = NULL;
1317 goto out;
1318 }
1319 }
1320
Thomas Graf86872cb2006-08-22 00:01:08 -07001321 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322
1323install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001324 if (cfg->fc_mx) {
1325 struct nlattr *nla;
1326 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327
Thomas Graf86872cb2006-08-22 00:01:08 -07001328 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001329 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001330
1331 if (type) {
1332 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 err = -EINVAL;
1334 goto out;
1335 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001336
David S. Millerdefb3512010-12-08 21:16:57 -08001337 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 }
1340 }
1341
Changli Gaod8d1f302010-06-10 23:31:35 -07001342 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001344 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001345
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001346 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001347
Thomas Graf86872cb2006-08-22 00:01:08 -07001348 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
1350out:
1351 if (dev)
1352 dev_put(dev);
1353 if (idev)
1354 in6_dev_put(idev);
1355 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001356 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 return err;
1358}
1359
Thomas Graf86872cb2006-08-22 00:01:08 -07001360static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361{
1362 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001363 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001364 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001366 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001367 return -ENOENT;
1368
Thomas Grafc71099a2006-08-04 23:20:06 -07001369 table = rt->rt6i_table;
1370 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371
Thomas Graf86872cb2006-08-22 00:01:08 -07001372 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001373 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374
Thomas Grafc71099a2006-08-04 23:20:06 -07001375 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376
1377 return err;
1378}
1379
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001380int ip6_del_rt(struct rt6_info *rt)
1381{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001382 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001383 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001384 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001385 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001386}
1387
Thomas Graf86872cb2006-08-22 00:01:08 -07001388static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389{
Thomas Grafc71099a2006-08-04 23:20:06 -07001390 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 struct fib6_node *fn;
1392 struct rt6_info *rt;
1393 int err = -ESRCH;
1394
Daniel Lezcano55786892008-03-04 13:47:47 -08001395 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001396 if (table == NULL)
1397 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
Thomas Grafc71099a2006-08-04 23:20:06 -07001399 read_lock_bh(&table->tb6_lock);
1400
1401 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001402 &cfg->fc_dst, cfg->fc_dst_len,
1403 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001404
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001406 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001407 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001409 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001411 if (cfg->fc_flags & RTF_GATEWAY &&
1412 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001414 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001416 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001417 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418
Thomas Graf86872cb2006-08-22 00:01:08 -07001419 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 }
1421 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001422 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423
1424 return err;
1425}
1426
1427/*
1428 * Handle redirects
1429 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001430struct ip6rd_flowi {
1431 struct flowi fl;
1432 struct in6_addr gateway;
1433};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001435static struct rt6_info *__ip6_route_redirect(struct net *net,
1436 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001437 struct flowi *fl,
1438 int flags)
1439{
1440 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1441 struct rt6_info *rt;
1442 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001443
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001445 * Get the "current" route for this destination and
1446 * check if the redirect has come from approriate router.
1447 *
1448 * RFC 2461 specifies that redirects should only be
1449 * accepted if they come from the nexthop to the target.
1450 * Due to the way the routes are chosen, this notion
1451 * is a bit fuzzy and one might need to check all possible
1452 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454
Thomas Grafc71099a2006-08-04 23:20:06 -07001455 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001456 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001457restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001458 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001459 /*
1460 * Current route is on-link; redirect is always invalid.
1461 *
1462 * Seems, previous statement is not true. It could
1463 * be node, which looks for us as on-link (f.e. proxy ndisc)
1464 * But then router serving it might decide, that we should
1465 * know truth 8)8) --ANK (980726).
1466 */
1467 if (rt6_check_expired(rt))
1468 continue;
1469 if (!(rt->rt6i_flags & RTF_GATEWAY))
1470 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001471 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001472 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001473 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001474 continue;
1475 break;
1476 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001477
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001478 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001479 rt = net->ipv6.ip6_null_entry;
1480 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001481out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001482 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001483
1484 read_unlock_bh(&table->tb6_lock);
1485
1486 return rt;
1487};
1488
1489static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1490 struct in6_addr *src,
1491 struct in6_addr *gateway,
1492 struct net_device *dev)
1493{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001494 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001495 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001496 struct ip6rd_flowi rdfl = {
1497 .fl = {
1498 .oif = dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +00001499 .fl6_dst = *dest,
1500 .fl6_src = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001501 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001502 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001503
Brian Haley86c36ce2009-10-07 13:58:01 -07001504 ipv6_addr_copy(&rdfl.gateway, gateway);
1505
Thomas Grafadaa70b2006-10-13 15:01:03 -07001506 if (rt6_need_strict(dest))
1507 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001508
Daniel Lezcano55786892008-03-04 13:47:47 -08001509 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001510 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001511}
1512
1513void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1514 struct in6_addr *saddr,
1515 struct neighbour *neigh, u8 *lladdr, int on_link)
1516{
1517 struct rt6_info *rt, *nrt = NULL;
1518 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001519 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001520
1521 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1522
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001523 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 if (net_ratelimit())
1525 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1526 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001527 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 }
1529
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 /*
1531 * We have finally decided to accept it.
1532 */
1533
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001534 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1536 NEIGH_UPDATE_F_OVERRIDE|
1537 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1538 NEIGH_UPDATE_F_ISROUTER))
1539 );
1540
1541 /*
1542 * Redirect received -> path was valid.
1543 * Look, redirects are sent only in response to data packets,
1544 * so that this nexthop apparently is reachable. --ANK
1545 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001546 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547
1548 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001549 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 goto out;
1551
1552 nrt = ip6_rt_copy(rt);
1553 if (nrt == NULL)
1554 goto out;
1555
1556 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1557 if (on_link)
1558 nrt->rt6i_flags &= ~RTF_GATEWAY;
1559
1560 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1561 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001562 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563
1564 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1565 nrt->rt6i_nexthop = neigh_clone(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566
Thomas Graf40e22e82006-08-22 00:00:45 -07001567 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 goto out;
1569
Changli Gaod8d1f302010-06-10 23:31:35 -07001570 netevent.old = &rt->dst;
1571 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001572 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1573
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001575 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 return;
1577 }
1578
1579out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001580 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581}
1582
1583/*
1584 * Handle ICMP "packet too big" messages
1585 * i.e. Path MTU discovery
1586 */
1587
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001588static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1589 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590{
1591 struct rt6_info *rt, *nrt;
1592 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001593again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001594 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 if (rt == NULL)
1596 return;
1597
Andrey Vagind3052b52010-12-11 15:20:11 +00001598 if (rt6_check_expired(rt)) {
1599 ip6_del_rt(rt);
1600 goto again;
1601 }
1602
Changli Gaod8d1f302010-06-10 23:31:35 -07001603 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 goto out;
1605
1606 if (pmtu < IPV6_MIN_MTU) {
1607 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001608 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 * MTU (1280) and a fragment header should always be included
1610 * after a node receiving Too Big message reporting PMTU is
1611 * less than the IPv6 Minimum Link MTU.
1612 */
1613 pmtu = IPV6_MIN_MTU;
1614 allfrag = 1;
1615 }
1616
1617 /* New mtu received -> path was valid.
1618 They are sent only in response to data packets,
1619 so that this nexthop apparently is reachable. --ANK
1620 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001621 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622
1623 /* Host route. If it is static, it would be better
1624 not to override it, but add new one, so that
1625 when cache entry will expire old pmtu
1626 would return automatically.
1627 */
1628 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001629 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1630 if (allfrag) {
1631 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1632 features |= RTAX_FEATURE_ALLFRAG;
1633 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1634 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001635 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1637 goto out;
1638 }
1639
1640 /* Network route.
1641 Two cases are possible:
1642 1. It is connected route. Action: COW
1643 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1644 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001646 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001647 else
1648 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001649
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001650 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001651 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1652 if (allfrag) {
1653 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1654 features |= RTAX_FEATURE_ALLFRAG;
1655 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1656 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001657
1658 /* According to RFC 1981, detecting PMTU increase shouldn't be
1659 * happened within 5 mins, the recommended timer is 10 mins.
1660 * Here this route expiration time is set to ip6_rt_mtu_expires
1661 * which is 10 mins. After 10 mins the decreased pmtu is expired
1662 * and detecting PMTU increase will be automatically happened.
1663 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001664 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001665 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1666
Thomas Graf40e22e82006-08-22 00:00:45 -07001667 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001670 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671}
1672
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001673void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1674 struct net_device *dev, u32 pmtu)
1675{
1676 struct net *net = dev_net(dev);
1677
1678 /*
1679 * RFC 1981 states that a node "MUST reduce the size of the packets it
1680 * is sending along the path" that caused the Packet Too Big message.
1681 * Since it's not possible in the general case to determine which
1682 * interface was used to send the original packet, we update the MTU
1683 * on the interface that will be used to send future packets. We also
1684 * update the MTU on the interface that received the Packet Too Big in
1685 * case the original packet was forced out that interface with
1686 * SO_BINDTODEVICE or similar. This is the next best thing to the
1687 * correct behaviour, which would be to update the MTU on all
1688 * interfaces.
1689 */
1690 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1691 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1692}
1693
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694/*
1695 * Misc support functions
1696 */
1697
1698static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1699{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001700 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001701 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702
1703 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001704 rt->dst.input = ort->dst.input;
1705 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
David S. Millerdefb3512010-12-08 21:16:57 -08001707 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001708 rt->dst.error = ort->dst.error;
1709 rt->dst.dev = ort->dst.dev;
1710 if (rt->dst.dev)
1711 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 rt->rt6i_idev = ort->rt6i_idev;
1713 if (rt->rt6i_idev)
1714 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001715 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 rt->rt6i_expires = 0;
1717
1718 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1719 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1720 rt->rt6i_metric = 0;
1721
1722 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1723#ifdef CONFIG_IPV6_SUBTREES
1724 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1725#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001726 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 }
1728 return rt;
1729}
1730
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001731#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001732static struct rt6_info *rt6_get_route_info(struct net *net,
1733 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001734 struct in6_addr *gwaddr, int ifindex)
1735{
1736 struct fib6_node *fn;
1737 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001738 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001739
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001740 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001741 if (table == NULL)
1742 return NULL;
1743
1744 write_lock_bh(&table->tb6_lock);
1745 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001746 if (!fn)
1747 goto out;
1748
Changli Gaod8d1f302010-06-10 23:31:35 -07001749 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001750 if (rt->rt6i_dev->ifindex != ifindex)
1751 continue;
1752 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1753 continue;
1754 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1755 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001756 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001757 break;
1758 }
1759out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001760 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001761 return rt;
1762}
1763
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001764static struct rt6_info *rt6_add_route_info(struct net *net,
1765 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001766 struct in6_addr *gwaddr, int ifindex,
1767 unsigned pref)
1768{
Thomas Graf86872cb2006-08-22 00:01:08 -07001769 struct fib6_config cfg = {
1770 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001771 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001772 .fc_ifindex = ifindex,
1773 .fc_dst_len = prefixlen,
1774 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1775 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001776 .fc_nlinfo.pid = 0,
1777 .fc_nlinfo.nlh = NULL,
1778 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001779 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001780
Thomas Graf86872cb2006-08-22 00:01:08 -07001781 ipv6_addr_copy(&cfg.fc_dst, prefix);
1782 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1783
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001784 /* We should treat it as a default route if prefix length is 0. */
1785 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001786 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001787
Thomas Graf86872cb2006-08-22 00:01:08 -07001788 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001789
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001790 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001791}
1792#endif
1793
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001795{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001797 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001799 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001800 if (table == NULL)
1801 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Thomas Grafc71099a2006-08-04 23:20:06 -07001803 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001804 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001806 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1808 break;
1809 }
1810 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001811 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001812 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 return rt;
1814}
1815
1816struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001817 struct net_device *dev,
1818 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819{
Thomas Graf86872cb2006-08-22 00:01:08 -07001820 struct fib6_config cfg = {
1821 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001822 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001823 .fc_ifindex = dev->ifindex,
1824 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1825 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001826 .fc_nlinfo.pid = 0,
1827 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001828 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001829 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
Thomas Graf86872cb2006-08-22 00:01:08 -07001831 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832
Thomas Graf86872cb2006-08-22 00:01:08 -07001833 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 return rt6_get_dflt_router(gwaddr, dev);
1836}
1837
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001838void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839{
1840 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001841 struct fib6_table *table;
1842
1843 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001844 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001845 if (table == NULL)
1846 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847
1848restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001849 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001850 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001852 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001853 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001854 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 goto restart;
1856 }
1857 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001858 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859}
1860
Daniel Lezcano55786892008-03-04 13:47:47 -08001861static void rtmsg_to_fib6_config(struct net *net,
1862 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001863 struct fib6_config *cfg)
1864{
1865 memset(cfg, 0, sizeof(*cfg));
1866
1867 cfg->fc_table = RT6_TABLE_MAIN;
1868 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1869 cfg->fc_metric = rtmsg->rtmsg_metric;
1870 cfg->fc_expires = rtmsg->rtmsg_info;
1871 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1872 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1873 cfg->fc_flags = rtmsg->rtmsg_flags;
1874
Daniel Lezcano55786892008-03-04 13:47:47 -08001875 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001876
Thomas Graf86872cb2006-08-22 00:01:08 -07001877 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1878 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1879 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1880}
1881
Daniel Lezcano55786892008-03-04 13:47:47 -08001882int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883{
Thomas Graf86872cb2006-08-22 00:01:08 -07001884 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 struct in6_rtmsg rtmsg;
1886 int err;
1887
1888 switch(cmd) {
1889 case SIOCADDRT: /* Add a route */
1890 case SIOCDELRT: /* Delete a route */
1891 if (!capable(CAP_NET_ADMIN))
1892 return -EPERM;
1893 err = copy_from_user(&rtmsg, arg,
1894 sizeof(struct in6_rtmsg));
1895 if (err)
1896 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001897
Daniel Lezcano55786892008-03-04 13:47:47 -08001898 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001899
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 rtnl_lock();
1901 switch (cmd) {
1902 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001903 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 break;
1905 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001906 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 break;
1908 default:
1909 err = -EINVAL;
1910 }
1911 rtnl_unlock();
1912
1913 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001914 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915
1916 return -EINVAL;
1917}
1918
1919/*
1920 * Drop the packet on the floor
1921 */
1922
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001923static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001925 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001926 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001927 switch (ipstats_mib_noroutes) {
1928 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001929 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001930 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001931 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1932 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001933 break;
1934 }
1935 /* FALLTHROUGH */
1936 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001937 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1938 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001939 break;
1940 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001941 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 kfree_skb(skb);
1943 return 0;
1944}
1945
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001946static int ip6_pkt_discard(struct sk_buff *skb)
1947{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001948 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001949}
1950
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001951static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952{
Eric Dumazetadf30902009-06-02 05:19:30 +00001953 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001954 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955}
1956
David S. Miller6723ab52006-10-18 21:20:57 -07001957#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1958
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001959static int ip6_pkt_prohibit(struct sk_buff *skb)
1960{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001961 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001962}
1963
1964static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1965{
Eric Dumazetadf30902009-06-02 05:19:30 +00001966 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001967 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001968}
1969
David S. Miller6723ab52006-10-18 21:20:57 -07001970#endif
1971
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972/*
1973 * Allocate a dst for local (unicast / anycast) address.
1974 */
1975
1976struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1977 const struct in6_addr *addr,
1978 int anycast)
1979{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001980 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001981 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001982 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983
Ben Greear40385652010-11-08 12:33:48 +00001984 if (rt == NULL) {
1985 if (net_ratelimit())
1986 pr_warning("IPv6: Maximum number of routes reached,"
1987 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00001989 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
Daniel Lezcano55786892008-03-04 13:47:47 -08001991 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 in6_dev_hold(idev);
1993
Changli Gaod8d1f302010-06-10 23:31:35 -07001994 rt->dst.flags = DST_HOST;
1995 rt->dst.input = ip6_input;
1996 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001997 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 rt->rt6i_idev = idev;
David S. Millerdefb3512010-12-08 21:16:57 -08001999 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
Changli Gaod8d1f302010-06-10 23:31:35 -07002000 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001
2002 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002003 if (anycast)
2004 rt->rt6i_flags |= RTF_ANYCAST;
2005 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002007 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2008 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002009 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002010
2011 /* We are casting this because that is the return
2012 * value type. But an errno encoded pointer is the
2013 * same regardless of the underlying pointer type,
2014 * and that's what we are returning. So this is OK.
2015 */
2016 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 }
David S. Miller14deae42009-01-04 16:04:39 -08002018 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019
2020 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2021 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002022 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023
Changli Gaod8d1f302010-06-10 23:31:35 -07002024 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025
2026 return rt;
2027}
2028
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002029struct arg_dev_net {
2030 struct net_device *dev;
2031 struct net *net;
2032};
2033
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034static int fib6_ifdown(struct rt6_info *rt, void *arg)
2035{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002036 const struct arg_dev_net *adn = arg;
2037 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002038
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002039 if ((rt->rt6i_dev == dev || dev == NULL) &&
2040 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 RT6_TRACE("deleted by ifdown %p\n", rt);
2042 return -1;
2043 }
2044 return 0;
2045}
2046
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002047void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002049 struct arg_dev_net adn = {
2050 .dev = dev,
2051 .net = net,
2052 };
2053
2054 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002055 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056}
2057
2058struct rt6_mtu_change_arg
2059{
2060 struct net_device *dev;
2061 unsigned mtu;
2062};
2063
2064static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2065{
2066 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2067 struct inet6_dev *idev;
2068
2069 /* In IPv6 pmtu discovery is not optional,
2070 so that RTAX_MTU lock cannot disable it.
2071 We still use this lock to block changes
2072 caused by addrconf/ndisc.
2073 */
2074
2075 idev = __in6_dev_get(arg->dev);
2076 if (idev == NULL)
2077 return 0;
2078
2079 /* For administrative MTU increase, there is no way to discover
2080 IPv6 PMTU increase, so PMTU increase should be updated here.
2081 Since RFC 1981 doesn't include administrative MTU increase
2082 update PMTU increase is a MUST. (i.e. jumbo frame)
2083 */
2084 /*
2085 If new MTU is less than route PMTU, this new MTU will be the
2086 lowest MTU in the path, update the route PMTU to reflect PMTU
2087 decreases; if new MTU is greater than route PMTU, and the
2088 old MTU is the lowest MTU in the path, update the route PMTU
2089 to reflect the increase. In this case if the other nodes' MTU
2090 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2091 PMTU discouvery.
2092 */
2093 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002094 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2095 (dst_mtu(&rt->dst) >= arg->mtu ||
2096 (dst_mtu(&rt->dst) < arg->mtu &&
2097 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002098 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002099 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 return 0;
2101}
2102
2103void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2104{
Thomas Grafc71099a2006-08-04 23:20:06 -07002105 struct rt6_mtu_change_arg arg = {
2106 .dev = dev,
2107 .mtu = mtu,
2108 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002110 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111}
2112
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002113static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002114 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002115 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002116 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002117 [RTA_PRIORITY] = { .type = NLA_U32 },
2118 [RTA_METRICS] = { .type = NLA_NESTED },
2119};
2120
2121static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2122 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123{
Thomas Graf86872cb2006-08-22 00:01:08 -07002124 struct rtmsg *rtm;
2125 struct nlattr *tb[RTA_MAX+1];
2126 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127
Thomas Graf86872cb2006-08-22 00:01:08 -07002128 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2129 if (err < 0)
2130 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131
Thomas Graf86872cb2006-08-22 00:01:08 -07002132 err = -EINVAL;
2133 rtm = nlmsg_data(nlh);
2134 memset(cfg, 0, sizeof(*cfg));
2135
2136 cfg->fc_table = rtm->rtm_table;
2137 cfg->fc_dst_len = rtm->rtm_dst_len;
2138 cfg->fc_src_len = rtm->rtm_src_len;
2139 cfg->fc_flags = RTF_UP;
2140 cfg->fc_protocol = rtm->rtm_protocol;
2141
2142 if (rtm->rtm_type == RTN_UNREACHABLE)
2143 cfg->fc_flags |= RTF_REJECT;
2144
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002145 if (rtm->rtm_type == RTN_LOCAL)
2146 cfg->fc_flags |= RTF_LOCAL;
2147
Thomas Graf86872cb2006-08-22 00:01:08 -07002148 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2149 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002150 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002151
2152 if (tb[RTA_GATEWAY]) {
2153 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2154 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002156
2157 if (tb[RTA_DST]) {
2158 int plen = (rtm->rtm_dst_len + 7) >> 3;
2159
2160 if (nla_len(tb[RTA_DST]) < plen)
2161 goto errout;
2162
2163 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002165
2166 if (tb[RTA_SRC]) {
2167 int plen = (rtm->rtm_src_len + 7) >> 3;
2168
2169 if (nla_len(tb[RTA_SRC]) < plen)
2170 goto errout;
2171
2172 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002174
2175 if (tb[RTA_OIF])
2176 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2177
2178 if (tb[RTA_PRIORITY])
2179 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2180
2181 if (tb[RTA_METRICS]) {
2182 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2183 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002185
2186 if (tb[RTA_TABLE])
2187 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2188
2189 err = 0;
2190errout:
2191 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192}
2193
Thomas Grafc127ea22007-03-22 11:58:32 -07002194static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195{
Thomas Graf86872cb2006-08-22 00:01:08 -07002196 struct fib6_config cfg;
2197 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198
Thomas Graf86872cb2006-08-22 00:01:08 -07002199 err = rtm_to_fib6_config(skb, nlh, &cfg);
2200 if (err < 0)
2201 return err;
2202
2203 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204}
2205
Thomas Grafc127ea22007-03-22 11:58:32 -07002206static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207{
Thomas Graf86872cb2006-08-22 00:01:08 -07002208 struct fib6_config cfg;
2209 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
Thomas Graf86872cb2006-08-22 00:01:08 -07002211 err = rtm_to_fib6_config(skb, nlh, &cfg);
2212 if (err < 0)
2213 return err;
2214
2215 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216}
2217
Thomas Graf339bf982006-11-10 14:10:15 -08002218static inline size_t rt6_nlmsg_size(void)
2219{
2220 return NLMSG_ALIGN(sizeof(struct rtmsg))
2221 + nla_total_size(16) /* RTA_SRC */
2222 + nla_total_size(16) /* RTA_DST */
2223 + nla_total_size(16) /* RTA_GATEWAY */
2224 + nla_total_size(16) /* RTA_PREFSRC */
2225 + nla_total_size(4) /* RTA_TABLE */
2226 + nla_total_size(4) /* RTA_IIF */
2227 + nla_total_size(4) /* RTA_OIF */
2228 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002229 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002230 + nla_total_size(sizeof(struct rta_cacheinfo));
2231}
2232
Brian Haley191cd582008-08-14 15:33:21 -07002233static int rt6_fill_node(struct net *net,
2234 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002235 struct in6_addr *dst, struct in6_addr *src,
2236 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002237 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238{
2239 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002240 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002241 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002242 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243
2244 if (prefix) { /* user wants prefix routes only */
2245 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2246 /* success since this is not a prefix route */
2247 return 1;
2248 }
2249 }
2250
Thomas Graf2d7202b2006-08-22 00:01:27 -07002251 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2252 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002253 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002254
2255 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 rtm->rtm_family = AF_INET6;
2257 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2258 rtm->rtm_src_len = rt->rt6i_src.plen;
2259 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002260 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002261 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002262 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002263 table = RT6_TABLE_UNSPEC;
2264 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002265 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266 if (rt->rt6i_flags&RTF_REJECT)
2267 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002268 else if (rt->rt6i_flags&RTF_LOCAL)
2269 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2271 rtm->rtm_type = RTN_LOCAL;
2272 else
2273 rtm->rtm_type = RTN_UNICAST;
2274 rtm->rtm_flags = 0;
2275 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2276 rtm->rtm_protocol = rt->rt6i_protocol;
2277 if (rt->rt6i_flags&RTF_DYNAMIC)
2278 rtm->rtm_protocol = RTPROT_REDIRECT;
2279 else if (rt->rt6i_flags & RTF_ADDRCONF)
2280 rtm->rtm_protocol = RTPROT_KERNEL;
2281 else if (rt->rt6i_flags&RTF_DEFAULT)
2282 rtm->rtm_protocol = RTPROT_RA;
2283
2284 if (rt->rt6i_flags&RTF_CACHE)
2285 rtm->rtm_flags |= RTM_F_CLONED;
2286
2287 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002288 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002289 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002291 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292#ifdef CONFIG_IPV6_SUBTREES
2293 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002294 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002295 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002297 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002299 if (iif) {
2300#ifdef CONFIG_IPV6_MROUTE
2301 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002302 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002303 if (err <= 0) {
2304 if (!nowait) {
2305 if (err == 0)
2306 return 0;
2307 goto nla_put_failure;
2308 } else {
2309 if (err == -EMSGSIZE)
2310 goto nla_put_failure;
2311 }
2312 }
2313 } else
2314#endif
2315 NLA_PUT_U32(skb, RTA_IIF, iif);
2316 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002317 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002319 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002320 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002321 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002323
David S. Millerdefb3512010-12-08 21:16:57 -08002324 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002325 goto nla_put_failure;
2326
Changli Gaod8d1f302010-06-10 23:31:35 -07002327 if (rt->dst.neighbour)
2328 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002329
Changli Gaod8d1f302010-06-10 23:31:35 -07002330 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002331 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2332
2333 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002334
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002335 if (!(rt->rt6i_flags & RTF_EXPIRES))
2336 expires = 0;
2337 else if (rt->rt6i_expires - jiffies < INT_MAX)
2338 expires = rt->rt6i_expires - jiffies;
2339 else
2340 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002341
Changli Gaod8d1f302010-06-10 23:31:35 -07002342 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2343 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002344 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345
Thomas Graf2d7202b2006-08-22 00:01:27 -07002346 return nlmsg_end(skb, nlh);
2347
2348nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002349 nlmsg_cancel(skb, nlh);
2350 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351}
2352
Patrick McHardy1b43af52006-08-10 23:11:17 -07002353int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354{
2355 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2356 int prefix;
2357
Thomas Graf2d7202b2006-08-22 00:01:27 -07002358 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2359 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2361 } else
2362 prefix = 0;
2363
Brian Haley191cd582008-08-14 15:33:21 -07002364 return rt6_fill_node(arg->net,
2365 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002367 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368}
2369
Thomas Grafc127ea22007-03-22 11:58:32 -07002370static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002372 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002373 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002375 struct sk_buff *skb;
2376 struct rtmsg *rtm;
2377 struct flowi fl;
2378 int err, iif = 0;
2379
2380 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2381 if (err < 0)
2382 goto errout;
2383
2384 err = -EINVAL;
2385 memset(&fl, 0, sizeof(fl));
2386
2387 if (tb[RTA_SRC]) {
2388 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2389 goto errout;
2390
2391 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2392 }
2393
2394 if (tb[RTA_DST]) {
2395 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2396 goto errout;
2397
2398 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2399 }
2400
2401 if (tb[RTA_IIF])
2402 iif = nla_get_u32(tb[RTA_IIF]);
2403
2404 if (tb[RTA_OIF])
2405 fl.oif = nla_get_u32(tb[RTA_OIF]);
2406
2407 if (iif) {
2408 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002409 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002410 if (!dev) {
2411 err = -ENODEV;
2412 goto errout;
2413 }
2414 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415
2416 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002417 if (skb == NULL) {
2418 err = -ENOBUFS;
2419 goto errout;
2420 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421
2422 /* Reserve room for dummy headers, this skb can pass
2423 through good chunk of routing engine.
2424 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002425 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2427
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002428 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002429 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430
Brian Haley191cd582008-08-14 15:33:21 -07002431 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002433 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002435 kfree_skb(skb);
2436 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 }
2438
Daniel Lezcano55786892008-03-04 13:47:47 -08002439 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002440errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442}
2443
Thomas Graf86872cb2006-08-22 00:01:08 -07002444void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445{
2446 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002447 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002448 u32 seq;
2449 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002451 err = -ENOBUFS;
2452 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002453
Thomas Graf339bf982006-11-10 14:10:15 -08002454 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002455 if (skb == NULL)
2456 goto errout;
2457
Brian Haley191cd582008-08-14 15:33:21 -07002458 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002459 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002460 if (err < 0) {
2461 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2462 WARN_ON(err == -EMSGSIZE);
2463 kfree_skb(skb);
2464 goto errout;
2465 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002466 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2467 info->nlh, gfp_any());
2468 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002469errout:
2470 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002471 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472}
2473
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002474static int ip6_route_dev_notify(struct notifier_block *this,
2475 unsigned long event, void *data)
2476{
2477 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002478 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002479
2480 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002481 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002482 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2483#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002484 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002485 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002486 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002487 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2488#endif
2489 }
2490
2491 return NOTIFY_OK;
2492}
2493
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494/*
2495 * /proc
2496 */
2497
2498#ifdef CONFIG_PROC_FS
2499
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500struct rt6_proc_arg
2501{
2502 char *buffer;
2503 int offset;
2504 int length;
2505 int skip;
2506 int len;
2507};
2508
2509static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2510{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002511 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002513 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514
2515#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002516 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002518 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519#endif
2520
2521 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002522 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002524 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002526 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002527 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2528 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002529 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 return 0;
2531}
2532
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002533static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002535 struct net *net = (struct net *)m->private;
2536 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002537 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538}
2539
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002540static int ipv6_route_open(struct inode *inode, struct file *file)
2541{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002542 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002543}
2544
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002545static const struct file_operations ipv6_route_proc_fops = {
2546 .owner = THIS_MODULE,
2547 .open = ipv6_route_open,
2548 .read = seq_read,
2549 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002550 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002551};
2552
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2554{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002555 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002557 net->ipv6.rt6_stats->fib_nodes,
2558 net->ipv6.rt6_stats->fib_route_nodes,
2559 net->ipv6.rt6_stats->fib_rt_alloc,
2560 net->ipv6.rt6_stats->fib_rt_entries,
2561 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002562 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002563 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564
2565 return 0;
2566}
2567
2568static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2569{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002570 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002571}
2572
Arjan van de Ven9a321442007-02-12 00:55:35 -08002573static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 .owner = THIS_MODULE,
2575 .open = rt6_stats_seq_open,
2576 .read = seq_read,
2577 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002578 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579};
2580#endif /* CONFIG_PROC_FS */
2581
2582#ifdef CONFIG_SYSCTL
2583
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002585int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 void __user *buffer, size_t *lenp, loff_t *ppos)
2587{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002588 struct net *net = current->nsproxy->net_ns;
2589 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002591 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002592 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 return 0;
2594 } else
2595 return -EINVAL;
2596}
2597
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002598ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002599 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002601 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002603 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002604 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 },
2606 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002608 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609 .maxlen = sizeof(int),
2610 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002611 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612 },
2613 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002615 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 .maxlen = sizeof(int),
2617 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002618 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619 },
2620 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002622 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 .maxlen = sizeof(int),
2624 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002625 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626 },
2627 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002629 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 .maxlen = sizeof(int),
2631 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002632 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633 },
2634 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002636 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 .maxlen = sizeof(int),
2638 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002639 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640 },
2641 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002643 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644 .maxlen = sizeof(int),
2645 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002646 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647 },
2648 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002650 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651 .maxlen = sizeof(int),
2652 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002653 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002654 },
2655 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002657 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658 .maxlen = sizeof(int),
2659 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002660 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002661 },
2662 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002664 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665 .maxlen = sizeof(int),
2666 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002667 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002668 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002669 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670};
2671
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002672struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002673{
2674 struct ctl_table *table;
2675
2676 table = kmemdup(ipv6_route_table_template,
2677 sizeof(ipv6_route_table_template),
2678 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002679
2680 if (table) {
2681 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002682 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002683 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2684 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2685 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2686 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2687 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2688 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2689 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002690 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002691 }
2692
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002693 return table;
2694}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695#endif
2696
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002697static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002698{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002699 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002700
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002701 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2702 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002703
Eric Dumazetfc66f952010-10-08 06:37:34 +00002704 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2705 goto out_ip6_dst_ops;
2706
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002707 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2708 sizeof(*net->ipv6.ip6_null_entry),
2709 GFP_KERNEL);
2710 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002711 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002712 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002713 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002714 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002715 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2716 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002717
2718#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2719 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2720 sizeof(*net->ipv6.ip6_prohibit_entry),
2721 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002722 if (!net->ipv6.ip6_prohibit_entry)
2723 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002724 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002725 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002726 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002727 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2728 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002729
2730 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2731 sizeof(*net->ipv6.ip6_blk_hole_entry),
2732 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002733 if (!net->ipv6.ip6_blk_hole_entry)
2734 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002735 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002736 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002737 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002738 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2739 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002740#endif
2741
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002742 net->ipv6.sysctl.flush_delay = 0;
2743 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2744 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2745 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2746 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2747 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2748 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2749 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2750
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002751#ifdef CONFIG_PROC_FS
2752 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2753 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2754#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002755 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2756
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002757 ret = 0;
2758out:
2759 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002760
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002761#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2762out_ip6_prohibit_entry:
2763 kfree(net->ipv6.ip6_prohibit_entry);
2764out_ip6_null_entry:
2765 kfree(net->ipv6.ip6_null_entry);
2766#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002767out_ip6_dst_entries:
2768 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002769out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002770 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002771}
2772
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002773static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002774{
2775#ifdef CONFIG_PROC_FS
2776 proc_net_remove(net, "ipv6_route");
2777 proc_net_remove(net, "rt6_stats");
2778#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002779 kfree(net->ipv6.ip6_null_entry);
2780#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2781 kfree(net->ipv6.ip6_prohibit_entry);
2782 kfree(net->ipv6.ip6_blk_hole_entry);
2783#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002784 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002785}
2786
2787static struct pernet_operations ip6_route_net_ops = {
2788 .init = ip6_route_net_init,
2789 .exit = ip6_route_net_exit,
2790};
2791
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002792static struct notifier_block ip6_route_dev_notifier = {
2793 .notifier_call = ip6_route_dev_notify,
2794 .priority = 0,
2795};
2796
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002797int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002799 int ret;
2800
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002801 ret = -ENOMEM;
2802 ip6_dst_ops_template.kmem_cachep =
2803 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2804 SLAB_HWCACHE_ALIGN, NULL);
2805 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002806 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002807
Eric Dumazetfc66f952010-10-08 06:37:34 +00002808 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002809 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002810 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002811
Eric Dumazetfc66f952010-10-08 06:37:34 +00002812 ret = register_pernet_subsys(&ip6_route_net_ops);
2813 if (ret)
2814 goto out_dst_entries;
2815
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002816 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2817
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002818 /* Registering of the loopback is done before this portion of code,
2819 * the loopback reference in rt6_info will not be taken, do it
2820 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002821 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002822 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2823 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002824 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002825 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002826 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002827 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2828 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002829 ret = fib6_init();
2830 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002831 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002832
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002833 ret = xfrm6_init();
2834 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002835 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002836
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002837 ret = fib6_rules_init();
2838 if (ret)
2839 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002840
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002841 ret = -ENOBUFS;
2842 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2843 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2844 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2845 goto fib6_rules_init;
2846
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002847 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002848 if (ret)
2849 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002850
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002851out:
2852 return ret;
2853
2854fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002855 fib6_rules_cleanup();
2856xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002857 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002858out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002859 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002860out_register_subsys:
2861 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002862out_dst_entries:
2863 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002864out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002865 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002866 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002867}
2868
2869void ip6_route_cleanup(void)
2870{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002871 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002872 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002873 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002874 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002875 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002876 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002877 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002878}