blob: ffd986104468050e5798ff57d7ef86a3bb632f50 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
Benjamin Therycf958ae32009-01-22 04:56:16 +000080#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock);
86
87/* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
91
92 In this case data path is free of exclusive locks at all.
93 */
94
Christoph Lametere18b8902006-12-06 20:33:20 -080095static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
Benjamin Thery4feb88e2009-01-22 04:56:23 +000098static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102#ifdef CONFIG_IP_PIMSM_V2
103static struct net_protocol pim_protocol;
104#endif
105
106static struct timer_list ipmr_expire_timer;
107
108/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109
Wang Chend6070322008-07-14 20:55:26 -0700110static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000112 struct net *net = dev_net(dev);
113
Wang Chend6070322008-07-14 20:55:26 -0700114 dev_close(dev);
115
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000116 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700117 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800118 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700119 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700120 struct ip_tunnel_parm p;
121
122 memset(&p, 0, sizeof(p));
123 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 p.iph.version = 4;
126 p.iph.ihl = 5;
127 p.iph.protocol = IPPROTO_IPIP;
128 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800131 if (ops->ndo_do_ioctl) {
132 mm_segment_t oldfs = get_fs();
133
134 set_fs(KERNEL_DS);
135 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 set_fs(oldfs);
137 }
Wang Chend6070322008-07-14 20:55:26 -0700138 }
139}
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000142struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143{
144 struct net_device *dev;
145
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000146 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
148 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800149 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 int err;
151 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 struct ip_tunnel_parm p;
153 struct in_device *in_dev;
154
155 memset(&p, 0, sizeof(p));
156 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 p.iph.version = 4;
159 p.iph.ihl = 5;
160 p.iph.protocol = IPPROTO_IPIP;
161 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800162 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800164 if (ops->ndo_do_ioctl) {
165 mm_segment_t oldfs = get_fs();
166
167 set_fs(KERNEL_DS);
168 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 set_fs(oldfs);
170 } else
171 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
173 dev = NULL;
174
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000175 if (err == 0 &&
176 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 dev->flags |= IFF_MULTICAST;
178
Herbert Xue5ed6392005-10-03 14:35:55 -0700179 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700180 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700182
183 ipv4_devconf_setall(in_dev);
184 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185
186 if (dev_open(dev))
187 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700188 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 }
190 }
191 return dev;
192
193failure:
194 /* allow the register to be completed before unregistering. */
195 rtnl_unlock();
196 rtnl_lock();
197
198 unregister_netdevice(dev);
199 return NULL;
200}
201
202#ifdef CONFIG_IP_PIMSM
203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000206 struct net *net = dev_net(dev);
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700209 dev->stats.tx_bytes += skb->len;
210 dev->stats.tx_packets++;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000211 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 read_unlock(&mrt_lock);
214 kfree_skb(skb);
215 return 0;
216}
217
Stephen Hemminger007c3832008-11-20 20:28:35 -0800218static const struct net_device_ops reg_vif_netdev_ops = {
219 .ndo_start_xmit = reg_vif_xmit,
220};
221
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222static void reg_vif_setup(struct net_device *dev)
223{
224 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800225 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800227 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 dev->destructor = free_netdev;
229}
230
231static struct net_device *ipmr_reg_vif(void)
232{
233 struct net_device *dev;
234 struct in_device *in_dev;
235
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700236 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237
238 if (dev == NULL)
239 return NULL;
240
241 if (register_netdevice(dev)) {
242 free_netdev(dev);
243 return NULL;
244 }
245 dev->iflink = 0;
246
Herbert Xu71e27da2007-06-04 23:36:06 -0700247 rcu_read_lock();
248 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700251 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
Herbert Xu71e27da2007-06-04 23:36:06 -0700253 ipv4_devconf_setall(in_dev);
254 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257 if (dev_open(dev))
258 goto failure;
259
Wang Chen7dc00c82008-07-14 20:56:34 -0700260 dev_hold(dev);
261
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 return dev;
263
264failure:
265 /* allow the register to be completed before unregistering. */
266 rtnl_unlock();
267 rtnl_lock();
268
269 unregister_netdevice(dev);
270 return NULL;
271}
272#endif
273
274/*
275 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700276 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000279static int vif_delete(struct net *net, int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280{
281 struct vif_device *v;
282 struct net_device *dev;
283 struct in_device *in_dev;
284
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000285 if (vifi < 0 || vifi >= net->ipv4.maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 return -EADDRNOTAVAIL;
287
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000288 v = &net->ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289
290 write_lock_bh(&mrt_lock);
291 dev = v->dev;
292 v->dev = NULL;
293
294 if (!dev) {
295 write_unlock_bh(&mrt_lock);
296 return -EADDRNOTAVAIL;
297 }
298
299#ifdef CONFIG_IP_PIMSM
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000300 if (vifi == net->ipv4.mroute_reg_vif_num)
301 net->ipv4.mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302#endif
303
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000304 if (vifi+1 == net->ipv4.maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 int tmp;
306 for (tmp=vifi-1; tmp>=0; tmp--) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000307 if (VIF_EXISTS(net, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 break;
309 }
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000310 net->ipv4.maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 }
312
313 write_unlock_bh(&mrt_lock);
314
315 dev_set_allmulti(dev, -1);
316
Herbert Xue5ed6392005-10-03 14:35:55 -0700317 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700318 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 ip_rt_multicast_event(in_dev);
320 }
321
Wang Chen7dc00c82008-07-14 20:56:34 -0700322 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 unregister_netdevice(dev);
324
325 dev_put(dev);
326 return 0;
327}
328
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000329static inline void ipmr_cache_free(struct mfc_cache *c)
330{
331 release_net(mfc_net(c));
332 kmem_cache_free(mrt_cachep, c);
333}
334
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335/* Destroy an unresolved cache entry, killing queued skbs
336 and reporting error to netlink readers.
337 */
338
339static void ipmr_destroy_unres(struct mfc_cache *c)
340{
341 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700342 struct nlmsgerr *e;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000343 struct net *net = mfc_net(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000345 atomic_dec(&net->ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346
Jianjun Kongc354e122008-11-03 00:28:02 -0800347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700348 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 nlh->nlmsg_type = NLMSG_ERROR;
351 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700353 e = NLMSG_DATA(nlh);
354 e->error = -ETIMEDOUT;
355 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700356
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000357 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 } else
359 kfree_skb(skb);
360 }
361
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000362 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363}
364
365
366/* Single timer process for all the unresolved queue. */
367
368static void ipmr_expire_process(unsigned long dummy)
369{
370 unsigned long now;
371 unsigned long expires;
372 struct mfc_cache *c, **cp;
373
374 if (!spin_trylock(&mfc_unres_lock)) {
375 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 return;
377 }
378
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000379 if (mfc_unres_queue == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 goto out;
381
382 now = jiffies;
383 expires = 10*HZ;
384 cp = &mfc_unres_queue;
385
386 while ((c=*cp) != NULL) {
387 if (time_after(c->mfc_un.unres.expires, now)) {
388 unsigned long interval = c->mfc_un.unres.expires - now;
389 if (interval < expires)
390 expires = interval;
391 cp = &c->next;
392 continue;
393 }
394
395 *cp = c->next;
396
397 ipmr_destroy_unres(c);
398 }
399
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000400 if (mfc_unres_queue != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403out:
404 spin_unlock(&mfc_unres_lock);
405}
406
407/* Fill oifs list. It is called under write locked mrt_lock. */
408
Baruch Evend1b04c02005-07-30 17:41:59 -0700409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410{
411 int vifi;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000412 struct net *net = mfc_net(cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 cache->mfc_un.res.minvif = MAXVIFS;
415 cache->mfc_un.res.maxvif = 0;
416 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000418 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419 if (VIF_EXISTS(net, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000420 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422 if (cache->mfc_un.res.minvif > vifi)
423 cache->mfc_un.res.minvif = vifi;
424 if (cache->mfc_un.res.maxvif <= vifi)
425 cache->mfc_un.res.maxvif = vifi + 1;
426 }
427 }
428}
429
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000430static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431{
432 int vifi = vifc->vifc_vifi;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000433 struct vif_device *v = &net->ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 struct net_device *dev;
435 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700436 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
438 /* Is vif busy ? */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000439 if (VIF_EXISTS(net, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 return -EADDRINUSE;
441
442 switch (vifc->vifc_flags) {
443#ifdef CONFIG_IP_PIMSM
444 case VIFF_REGISTER:
445 /*
446 * Special Purpose VIF in PIM
447 * All the packets will be sent to the daemon
448 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000449 if (net->ipv4.mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 return -EADDRINUSE;
451 dev = ipmr_reg_vif();
452 if (!dev)
453 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700454 err = dev_set_allmulti(dev, 1);
455 if (err) {
456 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700457 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700458 return err;
459 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 break;
461#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900462 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000463 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 if (!dev)
465 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700466 err = dev_set_allmulti(dev, 1);
467 if (err) {
468 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700469 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700470 return err;
471 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 break;
473 case 0:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000474 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 if (!dev)
476 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700477 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700478 if (err) {
479 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700480 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700481 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 break;
483 default:
484 return -EINVAL;
485 }
486
Herbert Xue5ed6392005-10-03 14:35:55 -0700487 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700489 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 ip_rt_multicast_event(in_dev);
491
492 /*
493 * Fill in the VIF structures
494 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800495 v->rate_limit = vifc->vifc_rate_limit;
496 v->local = vifc->vifc_lcl_addr.s_addr;
497 v->remote = vifc->vifc_rmt_addr.s_addr;
498 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 if (!mrtsock)
500 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800501 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 v->bytes_in = 0;
503 v->bytes_out = 0;
504 v->pkt_in = 0;
505 v->pkt_out = 0;
506 v->link = dev->ifindex;
507 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
508 v->link = dev->iflink;
509
510 /* And finish update writing critical data */
511 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800512 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513#ifdef CONFIG_IP_PIMSM
514 if (v->flags&VIFF_REGISTER)
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000515 net->ipv4.mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516#endif
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000517 if (vifi+1 > net->ipv4.maxvif)
518 net->ipv4.maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 write_unlock_bh(&mrt_lock);
520 return 0;
521}
522
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000523static struct mfc_cache *ipmr_cache_find(struct net *net,
524 __be32 origin,
525 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526{
Jianjun Kongc354e122008-11-03 00:28:02 -0800527 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 struct mfc_cache *c;
529
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000530 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
532 break;
533 }
534 return c;
535}
536
537/*
538 * Allocate a multicast cache entry
539 */
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000540static struct mfc_cache *ipmr_cache_alloc(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541{
Jianjun Kongc354e122008-11-03 00:28:02 -0800542 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
543 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 c->mfc_un.res.minvif = MAXVIFS;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000546 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 return c;
548}
549
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000550static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551{
Jianjun Kongc354e122008-11-03 00:28:02 -0800552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
553 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 skb_queue_head_init(&c->mfc_un.unres.unresolved);
556 c->mfc_un.unres.expires = jiffies + 10*HZ;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000557 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 return c;
559}
560
561/*
562 * A cache entry has gone into a resolved state from queued
563 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900564
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
566{
567 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700568 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569
570 /*
571 * Play the pending entries through our router
572 */
573
Jianjun Kongc354e122008-11-03 00:28:02 -0800574 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700575 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
577
578 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700579 nlh->nlmsg_len = (skb_tail_pointer(skb) -
580 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 } else {
582 nlh->nlmsg_type = NLMSG_ERROR;
583 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
584 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700585 e = NLMSG_DATA(nlh);
586 e->error = -EMSGSIZE;
587 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 }
Thomas Graf2942e902006-08-15 00:30:25 -0700589
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000590 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 } else
592 ip_mr_forward(skb, c, 0);
593 }
594}
595
596/*
597 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
598 * expects the following bizarre scheme.
599 *
600 * Called under mrt_lock.
601 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900602
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000603static int ipmr_cache_report(struct net *net,
604 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605{
606 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300607 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 struct igmphdr *igmp;
609 struct igmpmsg *msg;
610 int ret;
611
612#ifdef CONFIG_IP_PIMSM
613 if (assert == IGMPMSG_WHOLEPKT)
614 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
615 else
616#endif
617 skb = alloc_skb(128, GFP_ATOMIC);
618
Stephen Hemminger132adf52007-03-08 20:44:43 -0800619 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 return -ENOBUFS;
621
622#ifdef CONFIG_IP_PIMSM
623 if (assert == IGMPMSG_WHOLEPKT) {
624 /* Ugly, but we have no choice with this interface.
625 Duplicate old header, fix ihl, length etc.
626 And all this only to mangle msg->im_msgtype and
627 to set msg->im_mbz to "mbz" :-)
628 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300629 skb_push(skb, sizeof(struct iphdr));
630 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300631 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300632 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700633 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 msg->im_msgtype = IGMPMSG_WHOLEPKT;
635 msg->im_mbz = 0;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000636 msg->im_vif = net->ipv4.mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700637 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
638 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
639 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900640 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900642 {
643
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 /*
645 * Copy the IP header
646 */
647
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700648 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300649 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300650 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700651 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
652 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000654 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
656 /*
657 * Add our header
658 */
659
Jianjun Kongc354e122008-11-03 00:28:02 -0800660 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 igmp->type =
662 msg->im_msgtype = assert;
663 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700664 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700665 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900666 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000668 if (net->ipv4.mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 kfree_skb(skb);
670 return -EINVAL;
671 }
672
673 /*
674 * Deliver to mrouted
675 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000676 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000677 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 if (net_ratelimit())
679 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
680 kfree_skb(skb);
681 }
682
683 return ret;
684}
685
686/*
687 * Queue a packet for resolution. It gets locked cache entry!
688 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900689
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690static int
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000691ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692{
693 int err;
694 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700695 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
697 spin_lock_bh(&mfc_unres_lock);
698 for (c=mfc_unres_queue; c; c=c->next) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000699 if (net_eq(mfc_net(c), net) &&
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000700 c->mfc_mcastgrp == iph->daddr &&
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700701 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 break;
703 }
704
705 if (c == NULL) {
706 /*
707 * Create a new entry if allowable
708 */
709
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000710 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
711 (c = ipmr_cache_alloc_unres(net)) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 spin_unlock_bh(&mfc_unres_lock);
713
714 kfree_skb(skb);
715 return -ENOBUFS;
716 }
717
718 /*
719 * Fill in the new cache entry
720 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700721 c->mfc_parent = -1;
722 c->mfc_origin = iph->saddr;
723 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
725 /*
726 * Reflect first query at mrouted.
727 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000728 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900730 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 out - Brad Parker
732 */
733 spin_unlock_bh(&mfc_unres_lock);
734
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000735 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 kfree_skb(skb);
737 return err;
738 }
739
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000740 atomic_inc(&net->ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 c->next = mfc_unres_queue;
742 mfc_unres_queue = c;
743
744 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
745 }
746
747 /*
748 * See if we can append the packet
749 */
750 if (c->mfc_un.unres.unresolved.qlen>3) {
751 kfree_skb(skb);
752 err = -ENOBUFS;
753 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -0800754 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 err = 0;
756 }
757
758 spin_unlock_bh(&mfc_unres_lock);
759 return err;
760}
761
762/*
763 * MFC cache manipulation by user space mroute daemon
764 */
765
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000766static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767{
768 int line;
769 struct mfc_cache *c, **cp;
770
Jianjun Kongc354e122008-11-03 00:28:02 -0800771 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000773 for (cp = &net->ipv4.mfc_cache_array[line];
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000774 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
776 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
777 write_lock_bh(&mrt_lock);
778 *cp = c->next;
779 write_unlock_bh(&mrt_lock);
780
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000781 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 return 0;
783 }
784 }
785 return -ENOENT;
786}
787
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000788static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789{
790 int line;
791 struct mfc_cache *uc, *c, **cp;
792
Jianjun Kongc354e122008-11-03 00:28:02 -0800793 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000795 for (cp = &net->ipv4.mfc_cache_array[line];
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000796 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
798 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
799 break;
800 }
801
802 if (c != NULL) {
803 write_lock_bh(&mrt_lock);
804 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700805 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 if (!mrtsock)
807 c->mfc_flags |= MFC_STATIC;
808 write_unlock_bh(&mrt_lock);
809 return 0;
810 }
811
Joe Perchesf97c1e02007-12-16 13:45:43 -0800812 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 return -EINVAL;
814
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000815 c = ipmr_cache_alloc(net);
Jianjun Kongc354e122008-11-03 00:28:02 -0800816 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 return -ENOMEM;
818
Jianjun Kongc354e122008-11-03 00:28:02 -0800819 c->mfc_origin = mfc->mfcc_origin.s_addr;
820 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
821 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700822 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 if (!mrtsock)
824 c->mfc_flags |= MFC_STATIC;
825
826 write_lock_bh(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000827 c->next = net->ipv4.mfc_cache_array[line];
828 net->ipv4.mfc_cache_array[line] = c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 write_unlock_bh(&mrt_lock);
830
831 /*
832 * Check to see if we resolved a queued list. If so we
833 * need to send on the frames and tidy up.
834 */
835 spin_lock_bh(&mfc_unres_lock);
836 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
837 cp = &uc->next) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000838 if (net_eq(mfc_net(uc), net) &&
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000839 uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
841 *cp = uc->next;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000842 atomic_dec(&net->ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 break;
844 }
845 }
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000846 if (mfc_unres_queue == NULL)
847 del_timer(&ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 spin_unlock_bh(&mfc_unres_lock);
849
850 if (uc) {
851 ipmr_cache_resolve(uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000852 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 }
854 return 0;
855}
856
857/*
858 * Close the multicast socket, and clear the vif tables etc
859 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900860
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000861static void mroute_clean_tables(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862{
863 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 /*
866 * Shut down all active vif entries
867 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000868 for (i = 0; i < net->ipv4.maxvif; i++) {
869 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
870 vif_delete(net, i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 }
872
873 /*
874 * Wipe the cache
875 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800876 for (i=0; i<MFC_LINES; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 struct mfc_cache *c, **cp;
878
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000879 cp = &net->ipv4.mfc_cache_array[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 while ((c = *cp) != NULL) {
881 if (c->mfc_flags&MFC_STATIC) {
882 cp = &c->next;
883 continue;
884 }
885 write_lock_bh(&mrt_lock);
886 *cp = c->next;
887 write_unlock_bh(&mrt_lock);
888
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000889 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 }
891 }
892
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000893 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000894 struct mfc_cache *c, **cp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895
896 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000897 cp = &mfc_unres_queue;
898 while ((c = *cp) != NULL) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000899 if (!net_eq(mfc_net(c), net)) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000900 cp = &c->next;
901 continue;
902 }
903 *cp = c->next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 ipmr_destroy_unres(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 }
907 spin_unlock_bh(&mfc_unres_lock);
908 }
909}
910
911static void mrtsock_destruct(struct sock *sk)
912{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000913 struct net *net = sock_net(sk);
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 rtnl_lock();
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000916 if (sk == net->ipv4.mroute_sk) {
917 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
919 write_lock_bh(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000920 net->ipv4.mroute_sk = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 write_unlock_bh(&mrt_lock);
922
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000923 mroute_clean_tables(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 }
925 rtnl_unlock();
926}
927
928/*
929 * Socket options and virtual interface manipulation. The whole
930 * virtual interface system is a complete heap, but unfortunately
931 * that's how BSD mrouted happens to think. Maybe one day with a proper
932 * MOSPF/PIM router set up we can clean this up.
933 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900934
Jianjun Kongc354e122008-11-03 00:28:02 -0800935int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936{
937 int ret;
938 struct vifctl vif;
939 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000940 struct net *net = sock_net(sk);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900941
Stephen Hemminger132adf52007-03-08 20:44:43 -0800942 if (optname != MRT_INIT) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000943 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 return -EACCES;
945 }
946
Stephen Hemminger132adf52007-03-08 20:44:43 -0800947 switch (optname) {
948 case MRT_INIT:
949 if (sk->sk_type != SOCK_RAW ||
950 inet_sk(sk)->num != IPPROTO_IGMP)
951 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -0800952 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800953 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
Stephen Hemminger132adf52007-03-08 20:44:43 -0800955 rtnl_lock();
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000956 if (net->ipv4.mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800958 return -EADDRINUSE;
959 }
960
961 ret = ip_ra_control(sk, 1, mrtsock_destruct);
962 if (ret == 0) {
963 write_lock_bh(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000964 net->ipv4.mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800965 write_unlock_bh(&mrt_lock);
966
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000967 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800968 }
969 rtnl_unlock();
970 return ret;
971 case MRT_DONE:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000972 if (sk != net->ipv4.mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800973 return -EACCES;
974 return ip_ra_control(sk, 0, NULL);
975 case MRT_ADD_VIF:
976 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -0800977 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800978 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800979 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800980 return -EFAULT;
981 if (vif.vifc_vifi >= MAXVIFS)
982 return -ENFILE;
983 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800984 if (optname == MRT_ADD_VIF) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000985 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800986 } else {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000987 ret = vif_delete(net, vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800988 }
989 rtnl_unlock();
990 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
992 /*
993 * Manipulate the forwarding caches. These live
994 * in a sort of kernel/user symbiosis.
995 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800996 case MRT_ADD_MFC:
997 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -0800998 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800999 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001000 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001001 return -EFAULT;
1002 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001003 if (optname == MRT_DEL_MFC)
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001004 ret = ipmr_mfc_delete(net, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001005 else
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001006 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001007 rtnl_unlock();
1008 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 /*
1010 * Control PIM assert.
1011 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001012 case MRT_ASSERT:
1013 {
1014 int v;
1015 if (get_user(v,(int __user *)optval))
1016 return -EFAULT;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001017 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001018 return 0;
1019 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001021 case MRT_PIM:
1022 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001023 int v;
1024
Stephen Hemminger132adf52007-03-08 20:44:43 -08001025 if (get_user(v,(int __user *)optval))
1026 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001027 v = (v) ? 1 : 0;
1028
Stephen Hemminger132adf52007-03-08 20:44:43 -08001029 rtnl_lock();
1030 ret = 0;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001031 if (v != net->ipv4.mroute_do_pim) {
1032 net->ipv4.mroute_do_pim = v;
1033 net->ipv4.mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034#ifdef CONFIG_IP_PIMSM_V2
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001035 if (net->ipv4.mroute_do_pim)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001036 ret = inet_add_protocol(&pim_protocol,
1037 IPPROTO_PIM);
1038 else
1039 ret = inet_del_protocol(&pim_protocol,
1040 IPPROTO_PIM);
1041 if (ret < 0)
1042 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001045 rtnl_unlock();
1046 return ret;
1047 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001049 /*
1050 * Spurious command, or MRT_VERSION which you cannot
1051 * set.
1052 */
1053 default:
1054 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 }
1056}
1057
1058/*
1059 * Getsock opt support for the multicast routing system.
1060 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001061
Jianjun Kongc354e122008-11-03 00:28:02 -08001062int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063{
1064 int olr;
1065 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001066 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
Jianjun Kongc354e122008-11-03 00:28:02 -08001068 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069#ifdef CONFIG_IP_PIMSM
1070 optname!=MRT_PIM &&
1071#endif
1072 optname!=MRT_ASSERT)
1073 return -ENOPROTOOPT;
1074
1075 if (get_user(olr, optlen))
1076 return -EFAULT;
1077
1078 olr = min_t(unsigned int, olr, sizeof(int));
1079 if (olr < 0)
1080 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001081
Jianjun Kongc354e122008-11-03 00:28:02 -08001082 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001084 if (optname == MRT_VERSION)
1085 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001087 else if (optname == MRT_PIM)
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001088 val = net->ipv4.mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089#endif
1090 else
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001091 val = net->ipv4.mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001092 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 return -EFAULT;
1094 return 0;
1095}
1096
1097/*
1098 * The IP multicast ioctl support routines.
1099 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001100
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1102{
1103 struct sioc_sg_req sr;
1104 struct sioc_vif_req vr;
1105 struct vif_device *vif;
1106 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001107 struct net *net = sock_net(sk);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001108
Stephen Hemminger132adf52007-03-08 20:44:43 -08001109 switch (cmd) {
1110 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001111 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001112 return -EFAULT;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001113 if (vr.vifi >= net->ipv4.maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001114 return -EINVAL;
1115 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001116 vif = &net->ipv4.vif_table[vr.vifi];
1117 if (VIF_EXISTS(net, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001118 vr.icount = vif->pkt_in;
1119 vr.ocount = vif->pkt_out;
1120 vr.ibytes = vif->bytes_in;
1121 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001123
Jianjun Kongc354e122008-11-03 00:28:02 -08001124 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001126 return 0;
1127 }
1128 read_unlock(&mrt_lock);
1129 return -EADDRNOTAVAIL;
1130 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001131 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001132 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133
Stephen Hemminger132adf52007-03-08 20:44:43 -08001134 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001135 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001136 if (c) {
1137 sr.pktcnt = c->mfc_un.res.pkt;
1138 sr.bytecnt = c->mfc_un.res.bytes;
1139 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001141
Jianjun Kongc354e122008-11-03 00:28:02 -08001142 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001143 return -EFAULT;
1144 return 0;
1145 }
1146 read_unlock(&mrt_lock);
1147 return -EADDRNOTAVAIL;
1148 default:
1149 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 }
1151}
1152
1153
1154static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1155{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001156 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001157 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 struct vif_device *v;
1159 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001160
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001161 if (!net_eq(dev_net(dev), net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001162 return NOTIFY_DONE;
1163
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 if (event != NETDEV_UNREGISTER)
1165 return NOTIFY_DONE;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001166 v = &net->ipv4.vif_table[0];
1167 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001168 if (v->dev == dev)
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001169 vif_delete(net, ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 }
1171 return NOTIFY_DONE;
1172}
1173
1174
Jianjun Kongc354e122008-11-03 00:28:02 -08001175static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 .notifier_call = ipmr_device_event,
1177};
1178
1179/*
1180 * Encapsulate a packet by attaching a valid IPIP header to it.
1181 * This avoids tunnel drivers and other mess and gives us the speed so
1182 * important for multicast video.
1183 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001184
Al Viro114c7842006-09-27 18:39:29 -07001185static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001187 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001188 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001189
1190 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001191 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001192 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001193 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194
1195 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001196 iph->tos = old_iph->tos;
1197 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 iph->frag_off = 0;
1199 iph->daddr = daddr;
1200 iph->saddr = saddr;
1201 iph->protocol = IPPROTO_IPIP;
1202 iph->ihl = 5;
1203 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001204 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 ip_send_check(iph);
1206
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1208 nf_reset(skb);
1209}
1210
1211static inline int ipmr_forward_finish(struct sk_buff *skb)
1212{
1213 struct ip_options * opt = &(IPCB(skb)->opt);
1214
Eric Dumazetadf30902009-06-02 05:19:30 +00001215 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216
1217 if (unlikely(opt->optlen))
1218 ip_forward_options(skb);
1219
1220 return dst_output(skb);
1221}
1222
1223/*
1224 * Processing handlers for ipmr_forward
1225 */
1226
1227static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1228{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001229 struct net *net = mfc_net(c);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001230 const struct iphdr *iph = ip_hdr(skb);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001231 struct vif_device *vif = &net->ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 struct net_device *dev;
1233 struct rtable *rt;
1234 int encap = 0;
1235
1236 if (vif->dev == NULL)
1237 goto out_free;
1238
1239#ifdef CONFIG_IP_PIMSM
1240 if (vif->flags & VIFF_REGISTER) {
1241 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001242 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001243 vif->dev->stats.tx_bytes += skb->len;
1244 vif->dev->stats.tx_packets++;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001245 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001246 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 }
1248#endif
1249
1250 if (vif->flags&VIFF_TUNNEL) {
1251 struct flowi fl = { .oif = vif->link,
1252 .nl_u = { .ip4_u =
1253 { .daddr = vif->remote,
1254 .saddr = vif->local,
1255 .tos = RT_TOS(iph->tos) } },
1256 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001257 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 goto out_free;
1259 encap = sizeof(struct iphdr);
1260 } else {
1261 struct flowi fl = { .oif = vif->link,
1262 .nl_u = { .ip4_u =
1263 { .daddr = iph->daddr,
1264 .tos = RT_TOS(iph->tos) } },
1265 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001266 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 goto out_free;
1268 }
1269
1270 dev = rt->u.dst.dev;
1271
1272 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1273 /* Do not fragment multicasts. Alas, IPv4 does not
1274 allow to send ICMP, so that packets will disappear
1275 to blackhole.
1276 */
1277
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001278 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 ip_rt_put(rt);
1280 goto out_free;
1281 }
1282
1283 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1284
1285 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001286 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 goto out_free;
1288 }
1289
1290 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001291 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292
Eric Dumazetadf30902009-06-02 05:19:30 +00001293 skb_dst_drop(skb);
1294 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001295 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296
1297 /* FIXME: forward and output firewalls used to be called here.
1298 * What do we do with netfilter? -- RR */
1299 if (vif->flags & VIFF_TUNNEL) {
1300 ip_encap(skb, vif->local, vif->remote);
1301 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001302 vif->dev->stats.tx_packets++;
1303 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 }
1305
1306 IPCB(skb)->flags |= IPSKB_FORWARDED;
1307
1308 /*
1309 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1310 * not only before forwarding, but after forwarding on all output
1311 * interfaces. It is clear, if mrouter runs a multicasting
1312 * program, it should receive packets not depending to what interface
1313 * program is joined.
1314 * If we will not make it, the program will have to join on all
1315 * interfaces. On the other hand, multihoming host (or router, but
1316 * not mrouter) cannot join to more than one interface - it will
1317 * result in receiving multiple packets.
1318 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001319 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 ipmr_forward_finish);
1321 return;
1322
1323out_free:
1324 kfree_skb(skb);
1325 return;
1326}
1327
1328static int ipmr_find_vif(struct net_device *dev)
1329{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001330 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 int ct;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001332 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1333 if (net->ipv4.vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 break;
1335 }
1336 return ct;
1337}
1338
1339/* "local" means that we should preserve one skb (for local delivery) */
1340
1341static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1342{
1343 int psend = -1;
1344 int vif, ct;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001345 struct net *net = mfc_net(cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346
1347 vif = cache->mfc_parent;
1348 cache->mfc_un.res.pkt++;
1349 cache->mfc_un.res.bytes += skb->len;
1350
1351 /*
1352 * Wrong interface: drop packet and (maybe) send PIM assert.
1353 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001354 if (net->ipv4.vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 int true_vifi;
1356
Eric Dumazet511c3f92009-06-02 05:14:27 +00001357 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 /* It is our own packet, looped back.
1359 Very complicated situation...
1360
1361 The best workaround until routing daemons will be
1362 fixed is not to redistribute packet, if it was
1363 send through wrong interface. It means, that
1364 multicast applications WILL NOT work for
1365 (S,G), which have default multicast route pointing
1366 to wrong oif. In any case, it is not a good
1367 idea to use multicasting applications on router.
1368 */
1369 goto dont_forward;
1370 }
1371
1372 cache->mfc_un.res.wrong_if++;
1373 true_vifi = ipmr_find_vif(skb->dev);
1374
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001375 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 /* pimsm uses asserts, when switching from RPT to SPT,
1377 so that we cannot check that packet arrived on an oif.
1378 It is bad, but otherwise we would need to move pretty
1379 large chunk of pimd to kernel. Ough... --ANK
1380 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001381 (net->ipv4.mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001382 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001383 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1385 cache->mfc_un.res.last_assert = jiffies;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001386 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 }
1388 goto dont_forward;
1389 }
1390
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001391 net->ipv4.vif_table[vif].pkt_in++;
1392 net->ipv4.vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 /*
1395 * Forward the frame
1396 */
1397 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001398 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 if (psend != -1) {
1400 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401 if (skb2)
1402 ipmr_queue_xmit(skb2, cache, psend);
1403 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001404 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 }
1406 }
1407 if (psend != -1) {
1408 if (local) {
1409 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1410 if (skb2)
1411 ipmr_queue_xmit(skb2, cache, psend);
1412 } else {
1413 ipmr_queue_xmit(skb, cache, psend);
1414 return 0;
1415 }
1416 }
1417
1418dont_forward:
1419 if (!local)
1420 kfree_skb(skb);
1421 return 0;
1422}
1423
1424
1425/*
1426 * Multicast packets for forwarding arrive here
1427 */
1428
1429int ip_mr_input(struct sk_buff *skb)
1430{
1431 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001432 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001433 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434
1435 /* Packet is looped back after forward, it should not be
1436 forwarded second time, but still can be delivered locally.
1437 */
1438 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1439 goto dont_forward;
1440
1441 if (!local) {
1442 if (IPCB(skb)->opt.router_alert) {
1443 if (ip_call_ra_chain(skb))
1444 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001445 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 /* IGMPv1 (and broken IGMPv2 implementations sort of
1447 Cisco IOS <= 11.2(8)) do not put router alert
1448 option to IGMP packets destined to routable
1449 groups. It is very bad, because it means
1450 that we can forward NO IGMP messages.
1451 */
1452 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001453 if (net->ipv4.mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001454 nf_reset(skb);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001455 raw_rcv(net->ipv4.mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 read_unlock(&mrt_lock);
1457 return 0;
1458 }
1459 read_unlock(&mrt_lock);
1460 }
1461 }
1462
1463 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001464 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465
1466 /*
1467 * No usable cache entry
1468 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001469 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 int vif;
1471
1472 if (local) {
1473 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1474 ip_local_deliver(skb);
1475 if (skb2 == NULL) {
1476 read_unlock(&mrt_lock);
1477 return -ENOBUFS;
1478 }
1479 skb = skb2;
1480 }
1481
1482 vif = ipmr_find_vif(skb->dev);
1483 if (vif >= 0) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001484 int err = ipmr_cache_unresolved(net, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 read_unlock(&mrt_lock);
1486
1487 return err;
1488 }
1489 read_unlock(&mrt_lock);
1490 kfree_skb(skb);
1491 return -ENODEV;
1492 }
1493
1494 ip_mr_forward(skb, cache, local);
1495
1496 read_unlock(&mrt_lock);
1497
1498 if (local)
1499 return ip_local_deliver(skb);
1500
1501 return 0;
1502
1503dont_forward:
1504 if (local)
1505 return ip_local_deliver(skb);
1506 kfree_skb(skb);
1507 return 0;
1508}
1509
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001510#ifdef CONFIG_IP_PIMSM
1511static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001513 struct net_device *reg_dev = NULL;
1514 struct iphdr *encap;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001515 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001517 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 /*
1519 Check that:
1520 a. packet is really destinted to a multicast group
1521 b. packet is not a NULL-REGISTER
1522 c. packet is not truncated
1523 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001524 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001526 ntohs(encap->tot_len) + pimlen > skb->len)
1527 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528
1529 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001530 if (net->ipv4.mroute_reg_vif_num >= 0)
1531 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 if (reg_dev)
1533 dev_hold(reg_dev);
1534 read_unlock(&mrt_lock);
1535
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001536 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001537 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001539 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001541 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 skb->protocol = htons(ETH_P_IP);
1544 skb->ip_summed = 0;
1545 skb->pkt_type = PACKET_HOST;
Eric Dumazetadf30902009-06-02 05:19:30 +00001546 skb_dst_drop(skb);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001547 reg_dev->stats.rx_bytes += skb->len;
1548 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 nf_reset(skb);
1550 netif_rx(skb);
1551 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001552
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001554}
1555#endif
1556
1557#ifdef CONFIG_IP_PIMSM_V1
1558/*
1559 * Handle IGMP messages of PIMv1
1560 */
1561
1562int pim_rcv_v1(struct sk_buff * skb)
1563{
1564 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001565 struct net *net = dev_net(skb->dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001566
1567 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1568 goto drop;
1569
1570 pim = igmp_hdr(skb);
1571
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001572 if (!net->ipv4.mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001573 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1574 goto drop;
1575
1576 if (__pim_rcv(skb, sizeof(*pim))) {
1577drop:
1578 kfree_skb(skb);
1579 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 return 0;
1581}
1582#endif
1583
1584#ifdef CONFIG_IP_PIMSM_V2
1585static int pim_rcv(struct sk_buff * skb)
1586{
1587 struct pimreghdr *pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001589 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 goto drop;
1591
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001592 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001593 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001595 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001596 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 goto drop;
1598
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001599 if (__pim_rcv(skb, sizeof(*pim))) {
1600drop:
1601 kfree_skb(skb);
1602 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 return 0;
1604}
1605#endif
1606
1607static int
1608ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1609{
1610 int ct;
1611 struct rtnexthop *nhp;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001612 struct net *net = mfc_net(c);
1613 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001614 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 struct rtattr *mp_head;
1616
1617 if (dev)
1618 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1619
Jianjun Kongc354e122008-11-03 00:28:02 -08001620 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
1622 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1623 if (c->mfc_un.res.ttls[ct] < 255) {
1624 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1625 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001626 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 nhp->rtnh_flags = 0;
1628 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001629 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 nhp->rtnh_len = sizeof(*nhp);
1631 }
1632 }
1633 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001634 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 rtm->rtm_type = RTN_MULTICAST;
1636 return 1;
1637
1638rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001639 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 return -EMSGSIZE;
1641}
1642
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001643int ipmr_get_route(struct net *net,
1644 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645{
1646 int err;
1647 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001648 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649
1650 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001651 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652
Jianjun Kongc354e122008-11-03 00:28:02 -08001653 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001654 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001655 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 struct net_device *dev;
1657 int vif;
1658
1659 if (nowait) {
1660 read_unlock(&mrt_lock);
1661 return -EAGAIN;
1662 }
1663
1664 dev = skb->dev;
1665 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1666 read_unlock(&mrt_lock);
1667 return -ENODEV;
1668 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001669 skb2 = skb_clone(skb, GFP_ATOMIC);
1670 if (!skb2) {
1671 read_unlock(&mrt_lock);
1672 return -ENOMEM;
1673 }
1674
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001675 skb_push(skb2, sizeof(struct iphdr));
1676 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001677 iph = ip_hdr(skb2);
1678 iph->ihl = sizeof(struct iphdr) >> 2;
1679 iph->saddr = rt->rt_src;
1680 iph->daddr = rt->rt_dst;
1681 iph->version = 0;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001682 err = ipmr_cache_unresolved(net, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 read_unlock(&mrt_lock);
1684 return err;
1685 }
1686
1687 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1688 cache->mfc_flags |= MFC_NOTIFY;
1689 err = ipmr_fill_mroute(skb, cache, rtm);
1690 read_unlock(&mrt_lock);
1691 return err;
1692}
1693
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001694#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695/*
1696 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1697 */
1698struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001699 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 int ct;
1701};
1702
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001703static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1704 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 loff_t pos)
1706{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001707 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1708 if (!VIF_EXISTS(net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001710 if (pos-- == 0)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001711 return &net->ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 }
1713 return NULL;
1714}
1715
1716static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001717 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001719 struct net *net = seq_file_net(seq);
1720
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001722 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 : SEQ_START_TOKEN;
1724}
1725
1726static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1727{
1728 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001729 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730
1731 ++*pos;
1732 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001733 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001734
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001735 while (++iter->ct < net->ipv4.maxvif) {
1736 if (!VIF_EXISTS(net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 continue;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001738 return &net->ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 }
1740 return NULL;
1741}
1742
1743static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001744 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745{
1746 read_unlock(&mrt_lock);
1747}
1748
1749static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1750{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001751 struct net *net = seq_file_net(seq);
1752
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001754 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1756 } else {
1757 const struct vif_device *vif = v;
1758 const char *name = vif->dev ? vif->dev->name : "none";
1759
1760 seq_printf(seq,
1761 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001762 vif - net->ipv4.vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001763 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 vif->bytes_out, vif->pkt_out,
1765 vif->flags, vif->local, vif->remote);
1766 }
1767 return 0;
1768}
1769
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001770static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 .start = ipmr_vif_seq_start,
1772 .next = ipmr_vif_seq_next,
1773 .stop = ipmr_vif_seq_stop,
1774 .show = ipmr_vif_seq_show,
1775};
1776
1777static int ipmr_vif_open(struct inode *inode, struct file *file)
1778{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001779 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1780 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781}
1782
Arjan van de Ven9a321442007-02-12 00:55:35 -08001783static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 .owner = THIS_MODULE,
1785 .open = ipmr_vif_open,
1786 .read = seq_read,
1787 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001788 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789};
1790
1791struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001792 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 struct mfc_cache **cache;
1794 int ct;
1795};
1796
1797
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001798static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1799 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800{
1801 struct mfc_cache *mfc;
1802
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001803 it->cache = net->ipv4.mfc_cache_array;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001805 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001806 for (mfc = net->ipv4.mfc_cache_array[it->ct];
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001807 mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001808 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 return mfc;
1810 read_unlock(&mrt_lock);
1811
1812 it->cache = &mfc_unres_queue;
1813 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001814 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001815 if (net_eq(mfc_net(mfc), net) &&
1816 pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 return mfc;
1818 spin_unlock_bh(&mfc_unres_lock);
1819
1820 it->cache = NULL;
1821 return NULL;
1822}
1823
1824
1825static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1826{
1827 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001828 struct net *net = seq_file_net(seq);
1829
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 it->cache = NULL;
1831 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001832 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 : SEQ_START_TOKEN;
1834}
1835
1836static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1837{
1838 struct mfc_cache *mfc = v;
1839 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001840 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841
1842 ++*pos;
1843
1844 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001845 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846
1847 if (mfc->next)
1848 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001849
1850 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 goto end_of_list;
1852
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001853 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
1855 while (++it->ct < MFC_LINES) {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001856 mfc = net->ipv4.mfc_cache_array[it->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 if (mfc)
1858 return mfc;
1859 }
1860
1861 /* exhausted cache_array, show unresolved */
1862 read_unlock(&mrt_lock);
1863 it->cache = &mfc_unres_queue;
1864 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001865
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 spin_lock_bh(&mfc_unres_lock);
1867 mfc = mfc_unres_queue;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001868 while (mfc && !net_eq(mfc_net(mfc), net))
1869 mfc = mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001870 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 return mfc;
1872
1873 end_of_list:
1874 spin_unlock_bh(&mfc_unres_lock);
1875 it->cache = NULL;
1876
1877 return NULL;
1878}
1879
1880static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1881{
1882 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001883 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884
1885 if (it->cache == &mfc_unres_queue)
1886 spin_unlock_bh(&mfc_unres_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001887 else if (it->cache == net->ipv4.mfc_cache_array)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 read_unlock(&mrt_lock);
1889}
1890
1891static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1892{
1893 int n;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001894 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895
1896 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001897 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1899 } else {
1900 const struct mfc_cache *mfc = v;
1901 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001902
Benjamin Thery999890b2008-12-03 22:22:16 -08001903 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 (unsigned long) mfc->mfc_mcastgrp,
1905 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001906 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907
1908 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001909 seq_printf(seq, " %8lu %8lu %8lu",
1910 mfc->mfc_un.res.pkt,
1911 mfc->mfc_un.res.bytes,
1912 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001913 for (n = mfc->mfc_un.res.minvif;
1914 n < mfc->mfc_un.res.maxvif; n++ ) {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001915 if (VIF_EXISTS(net, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00001916 mfc->mfc_un.res.ttls[n] < 255)
1917 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001918 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 n, mfc->mfc_un.res.ttls[n]);
1920 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001921 } else {
1922 /* unresolved mfc_caches don't contain
1923 * pkt, bytes and wrong_if values
1924 */
1925 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 }
1927 seq_putc(seq, '\n');
1928 }
1929 return 0;
1930}
1931
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001932static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 .start = ipmr_mfc_seq_start,
1934 .next = ipmr_mfc_seq_next,
1935 .stop = ipmr_mfc_seq_stop,
1936 .show = ipmr_mfc_seq_show,
1937};
1938
1939static int ipmr_mfc_open(struct inode *inode, struct file *file)
1940{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001941 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1942 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943}
1944
Arjan van de Ven9a321442007-02-12 00:55:35 -08001945static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 .owner = THIS_MODULE,
1947 .open = ipmr_mfc_open,
1948 .read = seq_read,
1949 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001950 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001952#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
1954#ifdef CONFIG_IP_PIMSM_V2
1955static struct net_protocol pim_protocol = {
1956 .handler = pim_rcv,
1957};
1958#endif
1959
1960
1961/*
1962 * Setup for IP multicast routing
1963 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001964static int __net_init ipmr_net_init(struct net *net)
1965{
1966 int err = 0;
1967
1968 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1969 GFP_KERNEL);
1970 if (!net->ipv4.vif_table) {
1971 err = -ENOMEM;
1972 goto fail;
1973 }
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001974
1975 /* Forwarding cache */
1976 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1977 sizeof(struct mfc_cache *),
1978 GFP_KERNEL);
1979 if (!net->ipv4.mfc_cache_array) {
1980 err = -ENOMEM;
1981 goto fail_mfc_cache;
1982 }
Benjamin Thery6c5143d2009-01-22 04:56:21 +00001983
1984#ifdef CONFIG_IP_PIMSM
1985 net->ipv4.mroute_reg_vif_num = -1;
1986#endif
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001987
1988#ifdef CONFIG_PROC_FS
1989 err = -ENOMEM;
1990 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1991 goto proc_vif_fail;
1992 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1993 goto proc_cache_fail;
1994#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001995 return 0;
1996
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001997#ifdef CONFIG_PROC_FS
1998proc_cache_fail:
1999 proc_net_remove(net, "ip_mr_vif");
2000proc_vif_fail:
2001 kfree(net->ipv4.mfc_cache_array);
2002#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002003fail_mfc_cache:
2004 kfree(net->ipv4.vif_table);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002005fail:
2006 return err;
2007}
2008
2009static void __net_exit ipmr_net_exit(struct net *net)
2010{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002011#ifdef CONFIG_PROC_FS
2012 proc_net_remove(net, "ip_mr_cache");
2013 proc_net_remove(net, "ip_mr_vif");
2014#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002015 kfree(net->ipv4.mfc_cache_array);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002016 kfree(net->ipv4.vif_table);
2017}
2018
2019static struct pernet_operations ipmr_net_ops = {
2020 .init = ipmr_net_init,
2021 .exit = ipmr_net_exit,
2022};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002023
Wang Chen03d2f892008-07-03 12:13:36 +08002024int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025{
Wang Chen03d2f892008-07-03 12:13:36 +08002026 int err;
2027
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2029 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002030 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002031 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002032 if (!mrt_cachep)
2033 return -ENOMEM;
2034
Benjamin Therycf958ae32009-01-22 04:56:16 +00002035 err = register_pernet_subsys(&ipmr_net_ops);
2036 if (err)
2037 goto reg_pernet_fail;
2038
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08002039 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08002040 err = register_netdevice_notifier(&ip_mr_notifier);
2041 if (err)
2042 goto reg_notif_fail;
Wang Chen03d2f892008-07-03 12:13:36 +08002043 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002044
Benjamin Theryc3e38892008-11-19 14:07:41 -08002045reg_notif_fail:
2046 del_timer(&ipmr_expire_timer);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002047 unregister_pernet_subsys(&ipmr_net_ops);
2048reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002049 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002050 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051}