blob: 8428a0fb5c10fdd09300c114ecd2f76aa4cf07da [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
Benjamin Therycf958ae32009-01-22 04:56:16 +000080#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
82static int mroute_do_assert; /* Set in PIM assert */
83static int mroute_do_pim;
84
85static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
86
87static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
88static atomic_t cache_resolve_queue_len; /* Size of unresolved */
89
90/* Special spinlock for queue of unresolved entries */
91static DEFINE_SPINLOCK(mfc_unres_lock);
92
93/* We return to original Alan's scheme. Hash table of resolved
94 entries is changed only in process context and protected
95 with weak lock mrt_lock. Queue of unresolved entries is protected
96 with strong spinlock mfc_unres_lock.
97
98 In this case data path is free of exclusive locks at all.
99 */
100
Christoph Lametere18b8902006-12-06 20:33:20 -0800101static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
103static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
104static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
105static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
106
107#ifdef CONFIG_IP_PIMSM_V2
108static struct net_protocol pim_protocol;
109#endif
110
111static struct timer_list ipmr_expire_timer;
112
113/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
114
Wang Chend6070322008-07-14 20:55:26 -0700115static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
116{
117 dev_close(dev);
118
119 dev = __dev_get_by_name(&init_net, "tunl0");
120 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800121 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700122 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700123 struct ip_tunnel_parm p;
124
125 memset(&p, 0, sizeof(p));
126 p.iph.daddr = v->vifc_rmt_addr.s_addr;
127 p.iph.saddr = v->vifc_lcl_addr.s_addr;
128 p.iph.version = 4;
129 p.iph.ihl = 5;
130 p.iph.protocol = IPPROTO_IPIP;
131 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
132 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
133
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800134 if (ops->ndo_do_ioctl) {
135 mm_segment_t oldfs = get_fs();
136
137 set_fs(KERNEL_DS);
138 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
139 set_fs(oldfs);
140 }
Wang Chend6070322008-07-14 20:55:26 -0700141 }
142}
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144static
145struct net_device *ipmr_new_tunnel(struct vifctl *v)
146{
147 struct net_device *dev;
148
Eric W. Biederman881d9662007-09-17 11:56:21 -0700149 dev = __dev_get_by_name(&init_net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
151 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800152 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 int err;
154 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 struct ip_tunnel_parm p;
156 struct in_device *in_dev;
157
158 memset(&p, 0, sizeof(p));
159 p.iph.daddr = v->vifc_rmt_addr.s_addr;
160 p.iph.saddr = v->vifc_lcl_addr.s_addr;
161 p.iph.version = 4;
162 p.iph.ihl = 5;
163 p.iph.protocol = IPPROTO_IPIP;
164 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800165 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800167 if (ops->ndo_do_ioctl) {
168 mm_segment_t oldfs = get_fs();
169
170 set_fs(KERNEL_DS);
171 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
172 set_fs(oldfs);
173 } else
174 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
176 dev = NULL;
177
Eric W. Biederman881d9662007-09-17 11:56:21 -0700178 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 dev->flags |= IFF_MULTICAST;
180
Herbert Xue5ed6392005-10-03 14:35:55 -0700181 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700182 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700184
185 ipv4_devconf_setall(in_dev);
186 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
188 if (dev_open(dev))
189 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700190 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 }
192 }
193 return dev;
194
195failure:
196 /* allow the register to be completed before unregistering. */
197 rtnl_unlock();
198 rtnl_lock();
199
200 unregister_netdevice(dev);
201 return NULL;
202}
203
204#ifdef CONFIG_IP_PIMSM
205
206static int reg_vif_num = -1;
207
208static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
209{
210 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700211 dev->stats.tx_bytes += skb->len;
212 dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
214 read_unlock(&mrt_lock);
215 kfree_skb(skb);
216 return 0;
217}
218
Stephen Hemminger007c3832008-11-20 20:28:35 -0800219static const struct net_device_ops reg_vif_netdev_ops = {
220 .ndo_start_xmit = reg_vif_xmit,
221};
222
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223static void reg_vif_setup(struct net_device *dev)
224{
225 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800226 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800228 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 dev->destructor = free_netdev;
230}
231
232static struct net_device *ipmr_reg_vif(void)
233{
234 struct net_device *dev;
235 struct in_device *in_dev;
236
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700237 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 if (dev == NULL)
240 return NULL;
241
242 if (register_netdevice(dev)) {
243 free_netdev(dev);
244 return NULL;
245 }
246 dev->iflink = 0;
247
Herbert Xu71e27da2007-06-04 23:36:06 -0700248 rcu_read_lock();
249 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
250 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700252 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
Herbert Xu71e27da2007-06-04 23:36:06 -0700254 ipv4_devconf_setall(in_dev);
255 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
256 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257
258 if (dev_open(dev))
259 goto failure;
260
Wang Chen7dc00c82008-07-14 20:56:34 -0700261 dev_hold(dev);
262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 return dev;
264
265failure:
266 /* allow the register to be completed before unregistering. */
267 rtnl_unlock();
268 rtnl_lock();
269
270 unregister_netdevice(dev);
271 return NULL;
272}
273#endif
274
275/*
276 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700277 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900279
Wang Chen7dc00c82008-07-14 20:56:34 -0700280static int vif_delete(int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281{
282 struct vif_device *v;
283 struct net_device *dev;
284 struct in_device *in_dev;
285
Benjamin Therycf958ae32009-01-22 04:56:16 +0000286 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 return -EADDRNOTAVAIL;
288
Benjamin Therycf958ae32009-01-22 04:56:16 +0000289 v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290
291 write_lock_bh(&mrt_lock);
292 dev = v->dev;
293 v->dev = NULL;
294
295 if (!dev) {
296 write_unlock_bh(&mrt_lock);
297 return -EADDRNOTAVAIL;
298 }
299
300#ifdef CONFIG_IP_PIMSM
301 if (vifi == reg_vif_num)
302 reg_vif_num = -1;
303#endif
304
Benjamin Therycf958ae32009-01-22 04:56:16 +0000305 if (vifi+1 == init_net.ipv4.maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 int tmp;
307 for (tmp=vifi-1; tmp>=0; tmp--) {
Benjamin Therycf958ae32009-01-22 04:56:16 +0000308 if (VIF_EXISTS(&init_net, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 break;
310 }
Benjamin Therycf958ae32009-01-22 04:56:16 +0000311 init_net.ipv4.maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 }
313
314 write_unlock_bh(&mrt_lock);
315
316 dev_set_allmulti(dev, -1);
317
Herbert Xue5ed6392005-10-03 14:35:55 -0700318 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700319 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 ip_rt_multicast_event(in_dev);
321 }
322
Wang Chen7dc00c82008-07-14 20:56:34 -0700323 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 unregister_netdevice(dev);
325
326 dev_put(dev);
327 return 0;
328}
329
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000330static inline void ipmr_cache_free(struct mfc_cache *c)
331{
332 release_net(mfc_net(c));
333 kmem_cache_free(mrt_cachep, c);
334}
335
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336/* Destroy an unresolved cache entry, killing queued skbs
337 and reporting error to netlink readers.
338 */
339
340static void ipmr_destroy_unres(struct mfc_cache *c)
341{
342 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700343 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344
345 atomic_dec(&cache_resolve_queue_len);
346
Jianjun Kongc354e122008-11-03 00:28:02 -0800347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700348 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350 nlh->nlmsg_type = NLMSG_ERROR;
351 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700353 e = NLMSG_DATA(nlh);
354 e->error = -ETIMEDOUT;
355 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700356
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800357 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 } else
359 kfree_skb(skb);
360 }
361
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000362 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363}
364
365
366/* Single timer process for all the unresolved queue. */
367
368static void ipmr_expire_process(unsigned long dummy)
369{
370 unsigned long now;
371 unsigned long expires;
372 struct mfc_cache *c, **cp;
373
374 if (!spin_trylock(&mfc_unres_lock)) {
375 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376 return;
377 }
378
379 if (atomic_read(&cache_resolve_queue_len) == 0)
380 goto out;
381
382 now = jiffies;
383 expires = 10*HZ;
384 cp = &mfc_unres_queue;
385
386 while ((c=*cp) != NULL) {
387 if (time_after(c->mfc_un.unres.expires, now)) {
388 unsigned long interval = c->mfc_un.unres.expires - now;
389 if (interval < expires)
390 expires = interval;
391 cp = &c->next;
392 continue;
393 }
394
395 *cp = c->next;
396
397 ipmr_destroy_unres(c);
398 }
399
400 if (atomic_read(&cache_resolve_queue_len))
401 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403out:
404 spin_unlock(&mfc_unres_lock);
405}
406
407/* Fill oifs list. It is called under write locked mrt_lock. */
408
Baruch Evend1b04c02005-07-30 17:41:59 -0700409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410{
411 int vifi;
412
413 cache->mfc_un.res.minvif = MAXVIFS;
414 cache->mfc_un.res.maxvif = 0;
415 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
416
Benjamin Therycf958ae32009-01-22 04:56:16 +0000417 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
418 if (VIF_EXISTS(&init_net, vifi) &&
419 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
421 if (cache->mfc_un.res.minvif > vifi)
422 cache->mfc_un.res.minvif = vifi;
423 if (cache->mfc_un.res.maxvif <= vifi)
424 cache->mfc_un.res.maxvif = vifi + 1;
425 }
426 }
427}
428
429static int vif_add(struct vifctl *vifc, int mrtsock)
430{
431 int vifi = vifc->vifc_vifi;
Benjamin Therycf958ae32009-01-22 04:56:16 +0000432 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 struct net_device *dev;
434 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700435 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
437 /* Is vif busy ? */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000438 if (VIF_EXISTS(&init_net, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 return -EADDRINUSE;
440
441 switch (vifc->vifc_flags) {
442#ifdef CONFIG_IP_PIMSM
443 case VIFF_REGISTER:
444 /*
445 * Special Purpose VIF in PIM
446 * All the packets will be sent to the daemon
447 */
448 if (reg_vif_num >= 0)
449 return -EADDRINUSE;
450 dev = ipmr_reg_vif();
451 if (!dev)
452 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700453 err = dev_set_allmulti(dev, 1);
454 if (err) {
455 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700456 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700457 return err;
458 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 break;
460#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900461 case VIFF_TUNNEL:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 dev = ipmr_new_tunnel(vifc);
463 if (!dev)
464 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700465 err = dev_set_allmulti(dev, 1);
466 if (err) {
467 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700468 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700469 return err;
470 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 break;
472 case 0:
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800473 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 if (!dev)
475 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700476 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700477 if (err) {
478 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700479 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700480 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 break;
482 default:
483 return -EINVAL;
484 }
485
Herbert Xue5ed6392005-10-03 14:35:55 -0700486 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700488 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 ip_rt_multicast_event(in_dev);
490
491 /*
492 * Fill in the VIF structures
493 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800494 v->rate_limit = vifc->vifc_rate_limit;
495 v->local = vifc->vifc_lcl_addr.s_addr;
496 v->remote = vifc->vifc_rmt_addr.s_addr;
497 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 if (!mrtsock)
499 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800500 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 v->bytes_in = 0;
502 v->bytes_out = 0;
503 v->pkt_in = 0;
504 v->pkt_out = 0;
505 v->link = dev->ifindex;
506 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
507 v->link = dev->iflink;
508
509 /* And finish update writing critical data */
510 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800511 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512#ifdef CONFIG_IP_PIMSM
513 if (v->flags&VIFF_REGISTER)
514 reg_vif_num = vifi;
515#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +0000516 if (vifi+1 > init_net.ipv4.maxvif)
517 init_net.ipv4.maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 write_unlock_bh(&mrt_lock);
519 return 0;
520}
521
Al Viro114c7842006-09-27 18:39:29 -0700522static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
Jianjun Kongc354e122008-11-03 00:28:02 -0800524 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 struct mfc_cache *c;
526
527 for (c=mfc_cache_array[line]; c; c = c->next) {
528 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
529 break;
530 }
531 return c;
532}
533
534/*
535 * Allocate a multicast cache entry
536 */
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000537static struct mfc_cache *ipmr_cache_alloc(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538{
Jianjun Kongc354e122008-11-03 00:28:02 -0800539 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
540 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 c->mfc_un.res.minvif = MAXVIFS;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000543 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 return c;
545}
546
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000547static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548{
Jianjun Kongc354e122008-11-03 00:28:02 -0800549 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
550 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 skb_queue_head_init(&c->mfc_un.unres.unresolved);
553 c->mfc_un.unres.expires = jiffies + 10*HZ;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000554 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 return c;
556}
557
558/*
559 * A cache entry has gone into a resolved state from queued
560 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900561
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
563{
564 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700565 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566
567 /*
568 * Play the pending entries through our router
569 */
570
Jianjun Kongc354e122008-11-03 00:28:02 -0800571 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700572 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
574
575 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700576 nlh->nlmsg_len = (skb_tail_pointer(skb) -
577 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 } else {
579 nlh->nlmsg_type = NLMSG_ERROR;
580 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
581 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700582 e = NLMSG_DATA(nlh);
583 e->error = -EMSGSIZE;
584 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 }
Thomas Graf2942e902006-08-15 00:30:25 -0700586
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800587 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 } else
589 ip_mr_forward(skb, c, 0);
590 }
591}
592
593/*
594 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
595 * expects the following bizarre scheme.
596 *
597 * Called under mrt_lock.
598 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900599
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
601{
602 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300603 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 struct igmphdr *igmp;
605 struct igmpmsg *msg;
606 int ret;
607
608#ifdef CONFIG_IP_PIMSM
609 if (assert == IGMPMSG_WHOLEPKT)
610 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
611 else
612#endif
613 skb = alloc_skb(128, GFP_ATOMIC);
614
Stephen Hemminger132adf52007-03-08 20:44:43 -0800615 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 return -ENOBUFS;
617
618#ifdef CONFIG_IP_PIMSM
619 if (assert == IGMPMSG_WHOLEPKT) {
620 /* Ugly, but we have no choice with this interface.
621 Duplicate old header, fix ihl, length etc.
622 And all this only to mangle msg->im_msgtype and
623 to set msg->im_mbz to "mbz" :-)
624 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300625 skb_push(skb, sizeof(struct iphdr));
626 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300627 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300628 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700629 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 msg->im_msgtype = IGMPMSG_WHOLEPKT;
631 msg->im_mbz = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900632 msg->im_vif = reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700633 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
634 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
635 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900636 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900638 {
639
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 /*
641 * Copy the IP header
642 */
643
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700644 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300645 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300646 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700647 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
648 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 msg->im_vif = vifi;
650 skb->dst = dst_clone(pkt->dst);
651
652 /*
653 * Add our header
654 */
655
Jianjun Kongc354e122008-11-03 00:28:02 -0800656 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 igmp->type =
658 msg->im_msgtype = assert;
659 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700660 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700661 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900662 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663
Benjamin Thery70a269e2009-01-22 04:56:15 +0000664 if (init_net.ipv4.mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 kfree_skb(skb);
666 return -EINVAL;
667 }
668
669 /*
670 * Deliver to mrouted
671 */
Benjamin Thery70a269e2009-01-22 04:56:15 +0000672 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
673 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 if (net_ratelimit())
675 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
676 kfree_skb(skb);
677 }
678
679 return ret;
680}
681
682/*
683 * Queue a packet for resolution. It gets locked cache entry!
684 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900685
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686static int
687ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
688{
689 int err;
690 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700691 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
693 spin_lock_bh(&mfc_unres_lock);
694 for (c=mfc_unres_queue; c; c=c->next) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700695 if (c->mfc_mcastgrp == iph->daddr &&
696 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 break;
698 }
699
700 if (c == NULL) {
701 /*
702 * Create a new entry if allowable
703 */
704
Jianjun Kongc354e122008-11-03 00:28:02 -0800705 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000706 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 spin_unlock_bh(&mfc_unres_lock);
708
709 kfree_skb(skb);
710 return -ENOBUFS;
711 }
712
713 /*
714 * Fill in the new cache entry
715 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700716 c->mfc_parent = -1;
717 c->mfc_origin = iph->saddr;
718 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719
720 /*
721 * Reflect first query at mrouted.
722 */
723 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900724 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 out - Brad Parker
726 */
727 spin_unlock_bh(&mfc_unres_lock);
728
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000729 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 kfree_skb(skb);
731 return err;
732 }
733
734 atomic_inc(&cache_resolve_queue_len);
735 c->next = mfc_unres_queue;
736 mfc_unres_queue = c;
737
738 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
739 }
740
741 /*
742 * See if we can append the packet
743 */
744 if (c->mfc_un.unres.unresolved.qlen>3) {
745 kfree_skb(skb);
746 err = -ENOBUFS;
747 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -0800748 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 err = 0;
750 }
751
752 spin_unlock_bh(&mfc_unres_lock);
753 return err;
754}
755
756/*
757 * MFC cache manipulation by user space mroute daemon
758 */
759
760static int ipmr_mfc_delete(struct mfcctl *mfc)
761{
762 int line;
763 struct mfc_cache *c, **cp;
764
Jianjun Kongc354e122008-11-03 00:28:02 -0800765 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766
767 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
768 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
769 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
770 write_lock_bh(&mrt_lock);
771 *cp = c->next;
772 write_unlock_bh(&mrt_lock);
773
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000774 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 return 0;
776 }
777 }
778 return -ENOENT;
779}
780
781static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
782{
783 int line;
784 struct mfc_cache *uc, *c, **cp;
785
Jianjun Kongc354e122008-11-03 00:28:02 -0800786 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787
788 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
789 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
791 break;
792 }
793
794 if (c != NULL) {
795 write_lock_bh(&mrt_lock);
796 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
800 write_unlock_bh(&mrt_lock);
801 return 0;
802 }
803
Joe Perchesf97c1e02007-12-16 13:45:43 -0800804 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 return -EINVAL;
806
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000807 c = ipmr_cache_alloc(&init_net);
Jianjun Kongc354e122008-11-03 00:28:02 -0800808 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 return -ENOMEM;
810
Jianjun Kongc354e122008-11-03 00:28:02 -0800811 c->mfc_origin = mfc->mfcc_origin.s_addr;
812 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
813 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700814 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 if (!mrtsock)
816 c->mfc_flags |= MFC_STATIC;
817
818 write_lock_bh(&mrt_lock);
819 c->next = mfc_cache_array[line];
820 mfc_cache_array[line] = c;
821 write_unlock_bh(&mrt_lock);
822
823 /*
824 * Check to see if we resolved a queued list. If so we
825 * need to send on the frames and tidy up.
826 */
827 spin_lock_bh(&mfc_unres_lock);
828 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
829 cp = &uc->next) {
830 if (uc->mfc_origin == c->mfc_origin &&
831 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
832 *cp = uc->next;
833 if (atomic_dec_and_test(&cache_resolve_queue_len))
834 del_timer(&ipmr_expire_timer);
835 break;
836 }
837 }
838 spin_unlock_bh(&mfc_unres_lock);
839
840 if (uc) {
841 ipmr_cache_resolve(uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000842 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 }
844 return 0;
845}
846
847/*
848 * Close the multicast socket, and clear the vif tables etc
849 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900850
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851static void mroute_clean_tables(struct sock *sk)
852{
853 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900854
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 /*
856 * Shut down all active vif entries
857 */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000858 for (i = 0; i < init_net.ipv4.maxvif; i++) {
859 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
Wang Chen7dc00c82008-07-14 20:56:34 -0700860 vif_delete(i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 }
862
863 /*
864 * Wipe the cache
865 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800866 for (i=0; i<MFC_LINES; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 struct mfc_cache *c, **cp;
868
869 cp = &mfc_cache_array[i];
870 while ((c = *cp) != NULL) {
871 if (c->mfc_flags&MFC_STATIC) {
872 cp = &c->next;
873 continue;
874 }
875 write_lock_bh(&mrt_lock);
876 *cp = c->next;
877 write_unlock_bh(&mrt_lock);
878
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000879 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 }
881 }
882
883 if (atomic_read(&cache_resolve_queue_len) != 0) {
884 struct mfc_cache *c;
885
886 spin_lock_bh(&mfc_unres_lock);
887 while (mfc_unres_queue != NULL) {
888 c = mfc_unres_queue;
889 mfc_unres_queue = c->next;
890 spin_unlock_bh(&mfc_unres_lock);
891
892 ipmr_destroy_unres(c);
893
894 spin_lock_bh(&mfc_unres_lock);
895 }
896 spin_unlock_bh(&mfc_unres_lock);
897 }
898}
899
900static void mrtsock_destruct(struct sock *sk)
901{
902 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000903 if (sk == init_net.ipv4.mroute_sk) {
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900904 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000907 init_net.ipv4.mroute_sk = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 write_unlock_bh(&mrt_lock);
909
910 mroute_clean_tables(sk);
911 }
912 rtnl_unlock();
913}
914
915/*
916 * Socket options and virtual interface manipulation. The whole
917 * virtual interface system is a complete heap, but unfortunately
918 * that's how BSD mrouted happens to think. Maybe one day with a proper
919 * MOSPF/PIM router set up we can clean this up.
920 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900921
Jianjun Kongc354e122008-11-03 00:28:02 -0800922int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923{
924 int ret;
925 struct vifctl vif;
926 struct mfcctl mfc;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900927
Stephen Hemminger132adf52007-03-08 20:44:43 -0800928 if (optname != MRT_INIT) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000929 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 return -EACCES;
931 }
932
Stephen Hemminger132adf52007-03-08 20:44:43 -0800933 switch (optname) {
934 case MRT_INIT:
935 if (sk->sk_type != SOCK_RAW ||
936 inet_sk(sk)->num != IPPROTO_IGMP)
937 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -0800938 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800939 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940
Stephen Hemminger132adf52007-03-08 20:44:43 -0800941 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000942 if (init_net.ipv4.mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800944 return -EADDRINUSE;
945 }
946
947 ret = ip_ra_control(sk, 1, mrtsock_destruct);
948 if (ret == 0) {
949 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000950 init_net.ipv4.mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800951 write_unlock_bh(&mrt_lock);
952
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900953 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800954 }
955 rtnl_unlock();
956 return ret;
957 case MRT_DONE:
Benjamin Thery70a269e2009-01-22 04:56:15 +0000958 if (sk != init_net.ipv4.mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800959 return -EACCES;
960 return ip_ra_control(sk, 0, NULL);
961 case MRT_ADD_VIF:
962 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -0800963 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800964 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800965 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800966 return -EFAULT;
967 if (vif.vifc_vifi >= MAXVIFS)
968 return -ENFILE;
969 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800970 if (optname == MRT_ADD_VIF) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000971 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800972 } else {
Wang Chen7dc00c82008-07-14 20:56:34 -0700973 ret = vif_delete(vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800974 }
975 rtnl_unlock();
976 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977
978 /*
979 * Manipulate the forwarding caches. These live
980 * in a sort of kernel/user symbiosis.
981 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800982 case MRT_ADD_MFC:
983 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -0800984 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800985 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800986 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800987 return -EFAULT;
988 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800989 if (optname == MRT_DEL_MFC)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800990 ret = ipmr_mfc_delete(&mfc);
991 else
Benjamin Thery70a269e2009-01-22 04:56:15 +0000992 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800993 rtnl_unlock();
994 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 /*
996 * Control PIM assert.
997 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800998 case MRT_ASSERT:
999 {
1000 int v;
1001 if (get_user(v,(int __user *)optval))
1002 return -EFAULT;
1003 mroute_do_assert=(v)?1:0;
1004 return 0;
1005 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001007 case MRT_PIM:
1008 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001009 int v;
1010
Stephen Hemminger132adf52007-03-08 20:44:43 -08001011 if (get_user(v,(int __user *)optval))
1012 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001013 v = (v) ? 1 : 0;
1014
Stephen Hemminger132adf52007-03-08 20:44:43 -08001015 rtnl_lock();
1016 ret = 0;
1017 if (v != mroute_do_pim) {
1018 mroute_do_pim = v;
1019 mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020#ifdef CONFIG_IP_PIMSM_V2
Stephen Hemminger132adf52007-03-08 20:44:43 -08001021 if (mroute_do_pim)
1022 ret = inet_add_protocol(&pim_protocol,
1023 IPPROTO_PIM);
1024 else
1025 ret = inet_del_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 if (ret < 0)
1028 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001031 rtnl_unlock();
1032 return ret;
1033 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001035 /*
1036 * Spurious command, or MRT_VERSION which you cannot
1037 * set.
1038 */
1039 default:
1040 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
1042}
1043
1044/*
1045 * Getsock opt support for the multicast routing system.
1046 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001047
Jianjun Kongc354e122008-11-03 00:28:02 -08001048int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049{
1050 int olr;
1051 int val;
1052
Jianjun Kongc354e122008-11-03 00:28:02 -08001053 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054#ifdef CONFIG_IP_PIMSM
1055 optname!=MRT_PIM &&
1056#endif
1057 optname!=MRT_ASSERT)
1058 return -ENOPROTOOPT;
1059
1060 if (get_user(olr, optlen))
1061 return -EFAULT;
1062
1063 olr = min_t(unsigned int, olr, sizeof(int));
1064 if (olr < 0)
1065 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001066
Jianjun Kongc354e122008-11-03 00:28:02 -08001067 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001069 if (optname == MRT_VERSION)
1070 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001072 else if (optname == MRT_PIM)
1073 val = mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074#endif
1075 else
Jianjun Kongc354e122008-11-03 00:28:02 -08001076 val = mroute_do_assert;
1077 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 return -EFAULT;
1079 return 0;
1080}
1081
1082/*
1083 * The IP multicast ioctl support routines.
1084 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001085
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1087{
1088 struct sioc_sg_req sr;
1089 struct sioc_vif_req vr;
1090 struct vif_device *vif;
1091 struct mfc_cache *c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001092
Stephen Hemminger132adf52007-03-08 20:44:43 -08001093 switch (cmd) {
1094 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001095 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001096 return -EFAULT;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001097 if (vr.vifi >= init_net.ipv4.maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001098 return -EINVAL;
1099 read_lock(&mrt_lock);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001100 vif = &init_net.ipv4.vif_table[vr.vifi];
1101 if (VIF_EXISTS(&init_net, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001102 vr.icount = vif->pkt_in;
1103 vr.ocount = vif->pkt_out;
1104 vr.ibytes = vif->bytes_in;
1105 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001107
Jianjun Kongc354e122008-11-03 00:28:02 -08001108 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001110 return 0;
1111 }
1112 read_unlock(&mrt_lock);
1113 return -EADDRNOTAVAIL;
1114 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001115 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001116 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
Stephen Hemminger132adf52007-03-08 20:44:43 -08001118 read_lock(&mrt_lock);
1119 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1120 if (c) {
1121 sr.pktcnt = c->mfc_un.res.pkt;
1122 sr.bytecnt = c->mfc_un.res.bytes;
1123 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001125
Jianjun Kongc354e122008-11-03 00:28:02 -08001126 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001127 return -EFAULT;
1128 return 0;
1129 }
1130 read_unlock(&mrt_lock);
1131 return -EADDRNOTAVAIL;
1132 default:
1133 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 }
1135}
1136
1137
1138static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1139{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001140 struct net_device *dev = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 struct vif_device *v;
1142 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001143
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -07001144 if (!net_eq(dev_net(dev), &init_net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001145 return NOTIFY_DONE;
1146
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 if (event != NETDEV_UNREGISTER)
1148 return NOTIFY_DONE;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001149 v = &init_net.ipv4.vif_table[0];
1150 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001151 if (v->dev == dev)
Wang Chen7dc00c82008-07-14 20:56:34 -07001152 vif_delete(ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 }
1154 return NOTIFY_DONE;
1155}
1156
1157
Jianjun Kongc354e122008-11-03 00:28:02 -08001158static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 .notifier_call = ipmr_device_event,
1160};
1161
1162/*
1163 * Encapsulate a packet by attaching a valid IPIP header to it.
1164 * This avoids tunnel drivers and other mess and gives us the speed so
1165 * important for multicast video.
1166 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001167
Al Viro114c7842006-09-27 18:39:29 -07001168static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001170 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001171 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001172
1173 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001174 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001175 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001176 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
1178 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001179 iph->tos = old_iph->tos;
1180 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 iph->frag_off = 0;
1182 iph->daddr = daddr;
1183 iph->saddr = saddr;
1184 iph->protocol = IPPROTO_IPIP;
1185 iph->ihl = 5;
1186 iph->tot_len = htons(skb->len);
1187 ip_select_ident(iph, skb->dst, NULL);
1188 ip_send_check(iph);
1189
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1191 nf_reset(skb);
1192}
1193
1194static inline int ipmr_forward_finish(struct sk_buff *skb)
1195{
1196 struct ip_options * opt = &(IPCB(skb)->opt);
1197
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001198 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200 if (unlikely(opt->optlen))
1201 ip_forward_options(skb);
1202
1203 return dst_output(skb);
1204}
1205
1206/*
1207 * Processing handlers for ipmr_forward
1208 */
1209
1210static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1211{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001212 const struct iphdr *iph = ip_hdr(skb);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001213 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 struct net_device *dev;
1215 struct rtable *rt;
1216 int encap = 0;
1217
1218 if (vif->dev == NULL)
1219 goto out_free;
1220
1221#ifdef CONFIG_IP_PIMSM
1222 if (vif->flags & VIFF_REGISTER) {
1223 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001224 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001225 vif->dev->stats.tx_bytes += skb->len;
1226 vif->dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1228 kfree_skb(skb);
1229 return;
1230 }
1231#endif
1232
1233 if (vif->flags&VIFF_TUNNEL) {
1234 struct flowi fl = { .oif = vif->link,
1235 .nl_u = { .ip4_u =
1236 { .daddr = vif->remote,
1237 .saddr = vif->local,
1238 .tos = RT_TOS(iph->tos) } },
1239 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001240 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 goto out_free;
1242 encap = sizeof(struct iphdr);
1243 } else {
1244 struct flowi fl = { .oif = vif->link,
1245 .nl_u = { .ip4_u =
1246 { .daddr = iph->daddr,
1247 .tos = RT_TOS(iph->tos) } },
1248 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001249 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 goto out_free;
1251 }
1252
1253 dev = rt->u.dst.dev;
1254
1255 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1256 /* Do not fragment multicasts. Alas, IPv4 does not
1257 allow to send ICMP, so that packets will disappear
1258 to blackhole.
1259 */
1260
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001261 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 ip_rt_put(rt);
1263 goto out_free;
1264 }
1265
1266 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1267
1268 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001269 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 goto out_free;
1271 }
1272
1273 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001274 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276 dst_release(skb->dst);
1277 skb->dst = &rt->u.dst;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001278 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
1280 /* FIXME: forward and output firewalls used to be called here.
1281 * What do we do with netfilter? -- RR */
1282 if (vif->flags & VIFF_TUNNEL) {
1283 ip_encap(skb, vif->local, vif->remote);
1284 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001285 vif->dev->stats.tx_packets++;
1286 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 }
1288
1289 IPCB(skb)->flags |= IPSKB_FORWARDED;
1290
1291 /*
1292 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1293 * not only before forwarding, but after forwarding on all output
1294 * interfaces. It is clear, if mrouter runs a multicasting
1295 * program, it should receive packets not depending to what interface
1296 * program is joined.
1297 * If we will not make it, the program will have to join on all
1298 * interfaces. On the other hand, multihoming host (or router, but
1299 * not mrouter) cannot join to more than one interface - it will
1300 * result in receiving multiple packets.
1301 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001302 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 ipmr_forward_finish);
1304 return;
1305
1306out_free:
1307 kfree_skb(skb);
1308 return;
1309}
1310
1311static int ipmr_find_vif(struct net_device *dev)
1312{
1313 int ct;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001314 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1315 if (init_net.ipv4.vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 break;
1317 }
1318 return ct;
1319}
1320
1321/* "local" means that we should preserve one skb (for local delivery) */
1322
1323static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1324{
1325 int psend = -1;
1326 int vif, ct;
1327
1328 vif = cache->mfc_parent;
1329 cache->mfc_un.res.pkt++;
1330 cache->mfc_un.res.bytes += skb->len;
1331
1332 /*
1333 * Wrong interface: drop packet and (maybe) send PIM assert.
1334 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001335 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 int true_vifi;
1337
Eric Dumazetee6b9672008-03-05 18:30:47 -08001338 if (skb->rtable->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 /* It is our own packet, looped back.
1340 Very complicated situation...
1341
1342 The best workaround until routing daemons will be
1343 fixed is not to redistribute packet, if it was
1344 send through wrong interface. It means, that
1345 multicast applications WILL NOT work for
1346 (S,G), which have default multicast route pointing
1347 to wrong oif. In any case, it is not a good
1348 idea to use multicasting applications on router.
1349 */
1350 goto dont_forward;
1351 }
1352
1353 cache->mfc_un.res.wrong_if++;
1354 true_vifi = ipmr_find_vif(skb->dev);
1355
1356 if (true_vifi >= 0 && mroute_do_assert &&
1357 /* pimsm uses asserts, when switching from RPT to SPT,
1358 so that we cannot check that packet arrived on an oif.
1359 It is bad, but otherwise we would need to move pretty
1360 large chunk of pimd to kernel. Ough... --ANK
1361 */
1362 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001363 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1365 cache->mfc_un.res.last_assert = jiffies;
1366 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1367 }
1368 goto dont_forward;
1369 }
1370
Benjamin Therycf958ae32009-01-22 04:56:16 +00001371 init_net.ipv4.vif_table[vif].pkt_in++;
1372 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373
1374 /*
1375 * Forward the frame
1376 */
1377 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001378 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 if (psend != -1) {
1380 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1381 if (skb2)
1382 ipmr_queue_xmit(skb2, cache, psend);
1383 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001384 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 }
1386 }
1387 if (psend != -1) {
1388 if (local) {
1389 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1390 if (skb2)
1391 ipmr_queue_xmit(skb2, cache, psend);
1392 } else {
1393 ipmr_queue_xmit(skb, cache, psend);
1394 return 0;
1395 }
1396 }
1397
1398dont_forward:
1399 if (!local)
1400 kfree_skb(skb);
1401 return 0;
1402}
1403
1404
1405/*
1406 * Multicast packets for forwarding arrive here
1407 */
1408
1409int ip_mr_input(struct sk_buff *skb)
1410{
1411 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001412 int local = skb->rtable->rt_flags&RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
1414 /* Packet is looped back after forward, it should not be
1415 forwarded second time, but still can be delivered locally.
1416 */
1417 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1418 goto dont_forward;
1419
1420 if (!local) {
1421 if (IPCB(skb)->opt.router_alert) {
1422 if (ip_call_ra_chain(skb))
1423 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001424 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 /* IGMPv1 (and broken IGMPv2 implementations sort of
1426 Cisco IOS <= 11.2(8)) do not put router alert
1427 option to IGMP packets destined to routable
1428 groups. It is very bad, because it means
1429 that we can forward NO IGMP messages.
1430 */
1431 read_lock(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001432 if (init_net.ipv4.mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001433 nf_reset(skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001434 raw_rcv(init_net.ipv4.mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 read_unlock(&mrt_lock);
1436 return 0;
1437 }
1438 read_unlock(&mrt_lock);
1439 }
1440 }
1441
1442 read_lock(&mrt_lock);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001443 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444
1445 /*
1446 * No usable cache entry
1447 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001448 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 int vif;
1450
1451 if (local) {
1452 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1453 ip_local_deliver(skb);
1454 if (skb2 == NULL) {
1455 read_unlock(&mrt_lock);
1456 return -ENOBUFS;
1457 }
1458 skb = skb2;
1459 }
1460
1461 vif = ipmr_find_vif(skb->dev);
1462 if (vif >= 0) {
1463 int err = ipmr_cache_unresolved(vif, skb);
1464 read_unlock(&mrt_lock);
1465
1466 return err;
1467 }
1468 read_unlock(&mrt_lock);
1469 kfree_skb(skb);
1470 return -ENODEV;
1471 }
1472
1473 ip_mr_forward(skb, cache, local);
1474
1475 read_unlock(&mrt_lock);
1476
1477 if (local)
1478 return ip_local_deliver(skb);
1479
1480 return 0;
1481
1482dont_forward:
1483 if (local)
1484 return ip_local_deliver(skb);
1485 kfree_skb(skb);
1486 return 0;
1487}
1488
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001489#ifdef CONFIG_IP_PIMSM
1490static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001492 struct net_device *reg_dev = NULL;
1493 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001495 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 /*
1497 Check that:
1498 a. packet is really destinted to a multicast group
1499 b. packet is not a NULL-REGISTER
1500 c. packet is not truncated
1501 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001502 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001504 ntohs(encap->tot_len) + pimlen > skb->len)
1505 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
1507 read_lock(&mrt_lock);
1508 if (reg_vif_num >= 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00001509 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 if (reg_dev)
1511 dev_hold(reg_dev);
1512 read_unlock(&mrt_lock);
1513
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001514 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001515 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001517 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001519 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 skb->protocol = htons(ETH_P_IP);
1522 skb->ip_summed = 0;
1523 skb->pkt_type = PACKET_HOST;
1524 dst_release(skb->dst);
1525 skb->dst = NULL;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001526 reg_dev->stats.rx_bytes += skb->len;
1527 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 nf_reset(skb);
1529 netif_rx(skb);
1530 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001531
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001533}
1534#endif
1535
1536#ifdef CONFIG_IP_PIMSM_V1
1537/*
1538 * Handle IGMP messages of PIMv1
1539 */
1540
1541int pim_rcv_v1(struct sk_buff * skb)
1542{
1543 struct igmphdr *pim;
1544
1545 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1546 goto drop;
1547
1548 pim = igmp_hdr(skb);
1549
1550 if (!mroute_do_pim ||
1551 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1552 goto drop;
1553
1554 if (__pim_rcv(skb, sizeof(*pim))) {
1555drop:
1556 kfree_skb(skb);
1557 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 return 0;
1559}
1560#endif
1561
1562#ifdef CONFIG_IP_PIMSM_V2
1563static int pim_rcv(struct sk_buff * skb)
1564{
1565 struct pimreghdr *pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001567 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 goto drop;
1569
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001570 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001571 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001573 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001574 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 goto drop;
1576
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001577 if (__pim_rcv(skb, sizeof(*pim))) {
1578drop:
1579 kfree_skb(skb);
1580 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 return 0;
1582}
1583#endif
1584
1585static int
1586ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1587{
1588 int ct;
1589 struct rtnexthop *nhp;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001590 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001591 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 struct rtattr *mp_head;
1593
1594 if (dev)
1595 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1596
Jianjun Kongc354e122008-11-03 00:28:02 -08001597 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598
1599 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1600 if (c->mfc_un.res.ttls[ct] < 255) {
1601 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1602 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001603 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 nhp->rtnh_flags = 0;
1605 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Therycf958ae32009-01-22 04:56:16 +00001606 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 nhp->rtnh_len = sizeof(*nhp);
1608 }
1609 }
1610 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001611 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 rtm->rtm_type = RTN_MULTICAST;
1613 return 1;
1614
1615rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001616 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 return -EMSGSIZE;
1618}
1619
1620int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1621{
1622 int err;
1623 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001624 struct rtable *rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625
1626 read_lock(&mrt_lock);
1627 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1628
Jianjun Kongc354e122008-11-03 00:28:02 -08001629 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001630 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001631 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 struct net_device *dev;
1633 int vif;
1634
1635 if (nowait) {
1636 read_unlock(&mrt_lock);
1637 return -EAGAIN;
1638 }
1639
1640 dev = skb->dev;
1641 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1642 read_unlock(&mrt_lock);
1643 return -ENODEV;
1644 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001645 skb2 = skb_clone(skb, GFP_ATOMIC);
1646 if (!skb2) {
1647 read_unlock(&mrt_lock);
1648 return -ENOMEM;
1649 }
1650
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001651 skb_push(skb2, sizeof(struct iphdr));
1652 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001653 iph = ip_hdr(skb2);
1654 iph->ihl = sizeof(struct iphdr) >> 2;
1655 iph->saddr = rt->rt_src;
1656 iph->daddr = rt->rt_dst;
1657 iph->version = 0;
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001658 err = ipmr_cache_unresolved(vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 read_unlock(&mrt_lock);
1660 return err;
1661 }
1662
1663 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1664 cache->mfc_flags |= MFC_NOTIFY;
1665 err = ipmr_fill_mroute(skb, cache, rtm);
1666 read_unlock(&mrt_lock);
1667 return err;
1668}
1669
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001670#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671/*
1672 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1673 */
1674struct ipmr_vif_iter {
1675 int ct;
1676};
1677
1678static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1679 loff_t pos)
1680{
Benjamin Therycf958ae32009-01-22 04:56:16 +00001681 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1682 if (!VIF_EXISTS(&init_net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001684 if (pos-- == 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00001685 return &init_net.ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 }
1687 return NULL;
1688}
1689
1690static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001691 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692{
1693 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001694 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 : SEQ_START_TOKEN;
1696}
1697
1698static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1699{
1700 struct ipmr_vif_iter *iter = seq->private;
1701
1702 ++*pos;
1703 if (v == SEQ_START_TOKEN)
1704 return ipmr_vif_seq_idx(iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001705
Benjamin Therycf958ae32009-01-22 04:56:16 +00001706 while (++iter->ct < init_net.ipv4.maxvif) {
1707 if (!VIF_EXISTS(&init_net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 continue;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001709 return &init_net.ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 }
1711 return NULL;
1712}
1713
1714static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001715 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716{
1717 read_unlock(&mrt_lock);
1718}
1719
1720static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1721{
1722 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001723 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1725 } else {
1726 const struct vif_device *vif = v;
1727 const char *name = vif->dev ? vif->dev->name : "none";
1728
1729 seq_printf(seq,
1730 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Benjamin Therycf958ae32009-01-22 04:56:16 +00001731 vif - init_net.ipv4.vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001732 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 vif->bytes_out, vif->pkt_out,
1734 vif->flags, vif->local, vif->remote);
1735 }
1736 return 0;
1737}
1738
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001739static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 .start = ipmr_vif_seq_start,
1741 .next = ipmr_vif_seq_next,
1742 .stop = ipmr_vif_seq_stop,
1743 .show = ipmr_vif_seq_show,
1744};
1745
1746static int ipmr_vif_open(struct inode *inode, struct file *file)
1747{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001748 return seq_open_private(file, &ipmr_vif_seq_ops,
1749 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750}
1751
Arjan van de Ven9a321442007-02-12 00:55:35 -08001752static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 .owner = THIS_MODULE,
1754 .open = ipmr_vif_open,
1755 .read = seq_read,
1756 .llseek = seq_lseek,
1757 .release = seq_release_private,
1758};
1759
1760struct ipmr_mfc_iter {
1761 struct mfc_cache **cache;
1762 int ct;
1763};
1764
1765
1766static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1767{
1768 struct mfc_cache *mfc;
1769
1770 it->cache = mfc_cache_array;
1771 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001772 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001773 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001774 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 return mfc;
1776 read_unlock(&mrt_lock);
1777
1778 it->cache = &mfc_unres_queue;
1779 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001780 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 if (pos-- == 0)
1782 return mfc;
1783 spin_unlock_bh(&mfc_unres_lock);
1784
1785 it->cache = NULL;
1786 return NULL;
1787}
1788
1789
1790static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1791{
1792 struct ipmr_mfc_iter *it = seq->private;
1793 it->cache = NULL;
1794 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001795 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 : SEQ_START_TOKEN;
1797}
1798
1799static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1800{
1801 struct mfc_cache *mfc = v;
1802 struct ipmr_mfc_iter *it = seq->private;
1803
1804 ++*pos;
1805
1806 if (v == SEQ_START_TOKEN)
1807 return ipmr_mfc_seq_idx(seq->private, 0);
1808
1809 if (mfc->next)
1810 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001811
1812 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 goto end_of_list;
1814
1815 BUG_ON(it->cache != mfc_cache_array);
1816
1817 while (++it->ct < MFC_LINES) {
1818 mfc = mfc_cache_array[it->ct];
1819 if (mfc)
1820 return mfc;
1821 }
1822
1823 /* exhausted cache_array, show unresolved */
1824 read_unlock(&mrt_lock);
1825 it->cache = &mfc_unres_queue;
1826 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001827
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 spin_lock_bh(&mfc_unres_lock);
1829 mfc = mfc_unres_queue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001830 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 return mfc;
1832
1833 end_of_list:
1834 spin_unlock_bh(&mfc_unres_lock);
1835 it->cache = NULL;
1836
1837 return NULL;
1838}
1839
1840static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1841{
1842 struct ipmr_mfc_iter *it = seq->private;
1843
1844 if (it->cache == &mfc_unres_queue)
1845 spin_unlock_bh(&mfc_unres_lock);
1846 else if (it->cache == mfc_cache_array)
1847 read_unlock(&mrt_lock);
1848}
1849
1850static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1851{
1852 int n;
1853
1854 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001855 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1857 } else {
1858 const struct mfc_cache *mfc = v;
1859 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001860
Benjamin Thery999890b2008-12-03 22:22:16 -08001861 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 (unsigned long) mfc->mfc_mcastgrp,
1863 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001864 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865
1866 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001867 seq_printf(seq, " %8lu %8lu %8lu",
1868 mfc->mfc_un.res.pkt,
1869 mfc->mfc_un.res.bytes,
1870 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001871 for (n = mfc->mfc_un.res.minvif;
1872 n < mfc->mfc_un.res.maxvif; n++ ) {
Benjamin Therycf958ae32009-01-22 04:56:16 +00001873 if (VIF_EXISTS(&init_net, n) &&
1874 mfc->mfc_un.res.ttls[n] < 255)
1875 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001876 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 n, mfc->mfc_un.res.ttls[n]);
1878 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001879 } else {
1880 /* unresolved mfc_caches don't contain
1881 * pkt, bytes and wrong_if values
1882 */
1883 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 }
1885 seq_putc(seq, '\n');
1886 }
1887 return 0;
1888}
1889
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001890static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 .start = ipmr_mfc_seq_start,
1892 .next = ipmr_mfc_seq_next,
1893 .stop = ipmr_mfc_seq_stop,
1894 .show = ipmr_mfc_seq_show,
1895};
1896
1897static int ipmr_mfc_open(struct inode *inode, struct file *file)
1898{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001899 return seq_open_private(file, &ipmr_mfc_seq_ops,
1900 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901}
1902
Arjan van de Ven9a321442007-02-12 00:55:35 -08001903static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 .owner = THIS_MODULE,
1905 .open = ipmr_mfc_open,
1906 .read = seq_read,
1907 .llseek = seq_lseek,
1908 .release = seq_release_private,
1909};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001910#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911
1912#ifdef CONFIG_IP_PIMSM_V2
1913static struct net_protocol pim_protocol = {
1914 .handler = pim_rcv,
1915};
1916#endif
1917
1918
1919/*
1920 * Setup for IP multicast routing
1921 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001922static int __net_init ipmr_net_init(struct net *net)
1923{
1924 int err = 0;
1925
1926 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1927 GFP_KERNEL);
1928 if (!net->ipv4.vif_table) {
1929 err = -ENOMEM;
1930 goto fail;
1931 }
1932fail:
1933 return err;
1934}
1935
1936static void __net_exit ipmr_net_exit(struct net *net)
1937{
1938 kfree(net->ipv4.vif_table);
1939}
1940
1941static struct pernet_operations ipmr_net_ops = {
1942 .init = ipmr_net_init,
1943 .exit = ipmr_net_exit,
1944};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001945
Wang Chen03d2f892008-07-03 12:13:36 +08001946int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947{
Wang Chen03d2f892008-07-03 12:13:36 +08001948 int err;
1949
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1951 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07001952 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09001953 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08001954 if (!mrt_cachep)
1955 return -ENOMEM;
1956
Benjamin Therycf958ae32009-01-22 04:56:16 +00001957 err = register_pernet_subsys(&ipmr_net_ops);
1958 if (err)
1959 goto reg_pernet_fail;
1960
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08001961 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08001962 err = register_netdevice_notifier(&ip_mr_notifier);
1963 if (err)
1964 goto reg_notif_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001965#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001966 err = -ENOMEM;
1967 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1968 goto proc_vif_fail;
1969 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1970 goto proc_cache_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001971#endif
Wang Chen03d2f892008-07-03 12:13:36 +08001972 return 0;
Wang Chen03d2f892008-07-03 12:13:36 +08001973#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001974proc_cache_fail:
1975 proc_net_remove(&init_net, "ip_mr_vif");
Benjamin Theryc3e38892008-11-19 14:07:41 -08001976proc_vif_fail:
1977 unregister_netdevice_notifier(&ip_mr_notifier);
Wang Chen03d2f892008-07-03 12:13:36 +08001978#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08001979reg_notif_fail:
1980 del_timer(&ipmr_expire_timer);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001981 unregister_pernet_subsys(&ipmr_net_ops);
1982reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08001983 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08001984 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985}