blob: a4fd97f1920cea90bde2e255d0ccc19c0555000f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
Benjamin Therycf958ae32009-01-22 04:56:16 +000080#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock);
86
87/* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
91
92 In this case data path is free of exclusive locks at all.
93 */
94
Christoph Lametere18b8902006-12-06 20:33:20 -080095static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
99static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
100
101#ifdef CONFIG_IP_PIMSM_V2
102static struct net_protocol pim_protocol;
103#endif
104
105static struct timer_list ipmr_expire_timer;
106
107/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
108
Wang Chend6070322008-07-14 20:55:26 -0700109static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
110{
111 dev_close(dev);
112
113 dev = __dev_get_by_name(&init_net, "tunl0");
114 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800115 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700116 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700117 struct ip_tunnel_parm p;
118
119 memset(&p, 0, sizeof(p));
120 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 p.iph.version = 4;
123 p.iph.ihl = 5;
124 p.iph.protocol = IPPROTO_IPIP;
125 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800128 if (ops->ndo_do_ioctl) {
129 mm_segment_t oldfs = get_fs();
130
131 set_fs(KERNEL_DS);
132 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 set_fs(oldfs);
134 }
Wang Chend6070322008-07-14 20:55:26 -0700135 }
136}
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138static
139struct net_device *ipmr_new_tunnel(struct vifctl *v)
140{
141 struct net_device *dev;
142
Eric W. Biederman881d9662007-09-17 11:56:21 -0700143 dev = __dev_get_by_name(&init_net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800146 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 int err;
148 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 struct ip_tunnel_parm p;
150 struct in_device *in_dev;
151
152 memset(&p, 0, sizeof(p));
153 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 p.iph.version = 4;
156 p.iph.ihl = 5;
157 p.iph.protocol = IPPROTO_IPIP;
158 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800159 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800161 if (ops->ndo_do_ioctl) {
162 mm_segment_t oldfs = get_fs();
163
164 set_fs(KERNEL_DS);
165 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 set_fs(oldfs);
167 } else
168 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
170 dev = NULL;
171
Eric W. Biederman881d9662007-09-17 11:56:21 -0700172 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 dev->flags |= IFF_MULTICAST;
174
Herbert Xue5ed6392005-10-03 14:35:55 -0700175 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700176 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700178
179 ipv4_devconf_setall(in_dev);
180 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182 if (dev_open(dev))
183 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700184 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 }
186 }
187 return dev;
188
189failure:
190 /* allow the register to be completed before unregistering. */
191 rtnl_unlock();
192 rtnl_lock();
193
194 unregister_netdevice(dev);
195 return NULL;
196}
197
198#ifdef CONFIG_IP_PIMSM
199
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201{
202 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700203 dev->stats.tx_bytes += skb->len;
204 dev->stats.tx_packets++;
Benjamin Thery6c5143d2009-01-22 04:56:21 +0000205 ipmr_cache_report(skb, init_net.ipv4.mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 read_unlock(&mrt_lock);
207 kfree_skb(skb);
208 return 0;
209}
210
Stephen Hemminger007c3832008-11-20 20:28:35 -0800211static const struct net_device_ops reg_vif_netdev_ops = {
212 .ndo_start_xmit = reg_vif_xmit,
213};
214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215static void reg_vif_setup(struct net_device *dev)
216{
217 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800218 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800220 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 dev->destructor = free_netdev;
222}
223
224static struct net_device *ipmr_reg_vif(void)
225{
226 struct net_device *dev;
227 struct in_device *in_dev;
228
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700229 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230
231 if (dev == NULL)
232 return NULL;
233
234 if (register_netdevice(dev)) {
235 free_netdev(dev);
236 return NULL;
237 }
238 dev->iflink = 0;
239
Herbert Xu71e27da2007-06-04 23:36:06 -0700240 rcu_read_lock();
241 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
242 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700244 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245
Herbert Xu71e27da2007-06-04 23:36:06 -0700246 ipv4_devconf_setall(in_dev);
247 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
248 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249
250 if (dev_open(dev))
251 goto failure;
252
Wang Chen7dc00c82008-07-14 20:56:34 -0700253 dev_hold(dev);
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 return dev;
256
257failure:
258 /* allow the register to be completed before unregistering. */
259 rtnl_unlock();
260 rtnl_lock();
261
262 unregister_netdevice(dev);
263 return NULL;
264}
265#endif
266
267/*
268 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700269 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900271
Wang Chen7dc00c82008-07-14 20:56:34 -0700272static int vif_delete(int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273{
274 struct vif_device *v;
275 struct net_device *dev;
276 struct in_device *in_dev;
277
Benjamin Therycf958ae32009-01-22 04:56:16 +0000278 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 return -EADDRNOTAVAIL;
280
Benjamin Therycf958ae32009-01-22 04:56:16 +0000281 v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282
283 write_lock_bh(&mrt_lock);
284 dev = v->dev;
285 v->dev = NULL;
286
287 if (!dev) {
288 write_unlock_bh(&mrt_lock);
289 return -EADDRNOTAVAIL;
290 }
291
292#ifdef CONFIG_IP_PIMSM
Benjamin Thery6c5143d2009-01-22 04:56:21 +0000293 if (vifi == init_net.ipv4.mroute_reg_vif_num)
294 init_net.ipv4.mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295#endif
296
Benjamin Therycf958ae32009-01-22 04:56:16 +0000297 if (vifi+1 == init_net.ipv4.maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 int tmp;
299 for (tmp=vifi-1; tmp>=0; tmp--) {
Benjamin Therycf958ae32009-01-22 04:56:16 +0000300 if (VIF_EXISTS(&init_net, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 break;
302 }
Benjamin Therycf958ae32009-01-22 04:56:16 +0000303 init_net.ipv4.maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 }
305
306 write_unlock_bh(&mrt_lock);
307
308 dev_set_allmulti(dev, -1);
309
Herbert Xue5ed6392005-10-03 14:35:55 -0700310 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700311 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 ip_rt_multicast_event(in_dev);
313 }
314
Wang Chen7dc00c82008-07-14 20:56:34 -0700315 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 unregister_netdevice(dev);
317
318 dev_put(dev);
319 return 0;
320}
321
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000322static inline void ipmr_cache_free(struct mfc_cache *c)
323{
324 release_net(mfc_net(c));
325 kmem_cache_free(mrt_cachep, c);
326}
327
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328/* Destroy an unresolved cache entry, killing queued skbs
329 and reporting error to netlink readers.
330 */
331
332static void ipmr_destroy_unres(struct mfc_cache *c)
333{
334 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700335 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000337 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Jianjun Kongc354e122008-11-03 00:28:02 -0800339 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700340 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
342 nlh->nlmsg_type = NLMSG_ERROR;
343 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
344 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700345 e = NLMSG_DATA(nlh);
346 e->error = -ETIMEDOUT;
347 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700348
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800349 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 } else
351 kfree_skb(skb);
352 }
353
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000354 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355}
356
357
358/* Single timer process for all the unresolved queue. */
359
360static void ipmr_expire_process(unsigned long dummy)
361{
362 unsigned long now;
363 unsigned long expires;
364 struct mfc_cache *c, **cp;
365
366 if (!spin_trylock(&mfc_unres_lock)) {
367 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
368 return;
369 }
370
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000371 if (mfc_unres_queue == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 goto out;
373
374 now = jiffies;
375 expires = 10*HZ;
376 cp = &mfc_unres_queue;
377
378 while ((c=*cp) != NULL) {
379 if (time_after(c->mfc_un.unres.expires, now)) {
380 unsigned long interval = c->mfc_un.unres.expires - now;
381 if (interval < expires)
382 expires = interval;
383 cp = &c->next;
384 continue;
385 }
386
387 *cp = c->next;
388
389 ipmr_destroy_unres(c);
390 }
391
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000392 if (mfc_unres_queue != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 mod_timer(&ipmr_expire_timer, jiffies + expires);
394
395out:
396 spin_unlock(&mfc_unres_lock);
397}
398
399/* Fill oifs list. It is called under write locked mrt_lock. */
400
Baruch Evend1b04c02005-07-30 17:41:59 -0700401static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
403 int vifi;
404
405 cache->mfc_un.res.minvif = MAXVIFS;
406 cache->mfc_un.res.maxvif = 0;
407 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
408
Benjamin Therycf958ae32009-01-22 04:56:16 +0000409 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
410 if (VIF_EXISTS(&init_net, vifi) &&
411 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
413 if (cache->mfc_un.res.minvif > vifi)
414 cache->mfc_un.res.minvif = vifi;
415 if (cache->mfc_un.res.maxvif <= vifi)
416 cache->mfc_un.res.maxvif = vifi + 1;
417 }
418 }
419}
420
421static int vif_add(struct vifctl *vifc, int mrtsock)
422{
423 int vifi = vifc->vifc_vifi;
Benjamin Therycf958ae32009-01-22 04:56:16 +0000424 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 struct net_device *dev;
426 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700427 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428
429 /* Is vif busy ? */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000430 if (VIF_EXISTS(&init_net, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 return -EADDRINUSE;
432
433 switch (vifc->vifc_flags) {
434#ifdef CONFIG_IP_PIMSM
435 case VIFF_REGISTER:
436 /*
437 * Special Purpose VIF in PIM
438 * All the packets will be sent to the daemon
439 */
Benjamin Thery6c5143d2009-01-22 04:56:21 +0000440 if (init_net.ipv4.mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 return -EADDRINUSE;
442 dev = ipmr_reg_vif();
443 if (!dev)
444 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700445 err = dev_set_allmulti(dev, 1);
446 if (err) {
447 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700448 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700449 return err;
450 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 break;
452#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900453 case VIFF_TUNNEL:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 dev = ipmr_new_tunnel(vifc);
455 if (!dev)
456 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700457 err = dev_set_allmulti(dev, 1);
458 if (err) {
459 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700460 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700461 return err;
462 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 break;
464 case 0:
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800465 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 if (!dev)
467 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700468 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700469 if (err) {
470 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700471 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700472 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 break;
474 default:
475 return -EINVAL;
476 }
477
Herbert Xue5ed6392005-10-03 14:35:55 -0700478 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700480 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 ip_rt_multicast_event(in_dev);
482
483 /*
484 * Fill in the VIF structures
485 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800486 v->rate_limit = vifc->vifc_rate_limit;
487 v->local = vifc->vifc_lcl_addr.s_addr;
488 v->remote = vifc->vifc_rmt_addr.s_addr;
489 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 if (!mrtsock)
491 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800492 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 v->bytes_in = 0;
494 v->bytes_out = 0;
495 v->pkt_in = 0;
496 v->pkt_out = 0;
497 v->link = dev->ifindex;
498 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
499 v->link = dev->iflink;
500
501 /* And finish update writing critical data */
502 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800503 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504#ifdef CONFIG_IP_PIMSM
505 if (v->flags&VIFF_REGISTER)
Benjamin Thery6c5143d2009-01-22 04:56:21 +0000506 init_net.ipv4.mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +0000508 if (vifi+1 > init_net.ipv4.maxvif)
509 init_net.ipv4.maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 write_unlock_bh(&mrt_lock);
511 return 0;
512}
513
Al Viro114c7842006-09-27 18:39:29 -0700514static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515{
Jianjun Kongc354e122008-11-03 00:28:02 -0800516 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 struct mfc_cache *c;
518
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000519 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
521 break;
522 }
523 return c;
524}
525
526/*
527 * Allocate a multicast cache entry
528 */
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000529static struct mfc_cache *ipmr_cache_alloc(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530{
Jianjun Kongc354e122008-11-03 00:28:02 -0800531 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
532 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 c->mfc_un.res.minvif = MAXVIFS;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000535 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return c;
537}
538
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000539static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540{
Jianjun Kongc354e122008-11-03 00:28:02 -0800541 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
542 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 skb_queue_head_init(&c->mfc_un.unres.unresolved);
545 c->mfc_un.unres.expires = jiffies + 10*HZ;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000546 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 return c;
548}
549
550/*
551 * A cache entry has gone into a resolved state from queued
552 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
555{
556 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700557 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 /*
560 * Play the pending entries through our router
561 */
562
Jianjun Kongc354e122008-11-03 00:28:02 -0800563 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700564 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
566
567 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700568 nlh->nlmsg_len = (skb_tail_pointer(skb) -
569 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 } else {
571 nlh->nlmsg_type = NLMSG_ERROR;
572 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
573 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700574 e = NLMSG_DATA(nlh);
575 e->error = -EMSGSIZE;
576 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 }
Thomas Graf2942e902006-08-15 00:30:25 -0700578
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800579 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 } else
581 ip_mr_forward(skb, c, 0);
582 }
583}
584
585/*
586 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
587 * expects the following bizarre scheme.
588 *
589 * Called under mrt_lock.
590 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900591
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
593{
594 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300595 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 struct igmphdr *igmp;
597 struct igmpmsg *msg;
598 int ret;
599
600#ifdef CONFIG_IP_PIMSM
601 if (assert == IGMPMSG_WHOLEPKT)
602 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
603 else
604#endif
605 skb = alloc_skb(128, GFP_ATOMIC);
606
Stephen Hemminger132adf52007-03-08 20:44:43 -0800607 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 return -ENOBUFS;
609
610#ifdef CONFIG_IP_PIMSM
611 if (assert == IGMPMSG_WHOLEPKT) {
612 /* Ugly, but we have no choice with this interface.
613 Duplicate old header, fix ihl, length etc.
614 And all this only to mangle msg->im_msgtype and
615 to set msg->im_mbz to "mbz" :-)
616 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300617 skb_push(skb, sizeof(struct iphdr));
618 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300619 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300620 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700621 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 msg->im_msgtype = IGMPMSG_WHOLEPKT;
623 msg->im_mbz = 0;
Benjamin Thery6c5143d2009-01-22 04:56:21 +0000624 msg->im_vif = init_net.ipv4.mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700625 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
626 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
627 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900628 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900630 {
631
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 /*
633 * Copy the IP header
634 */
635
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700636 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300637 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300638 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700639 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
640 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 msg->im_vif = vifi;
642 skb->dst = dst_clone(pkt->dst);
643
644 /*
645 * Add our header
646 */
647
Jianjun Kongc354e122008-11-03 00:28:02 -0800648 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 igmp->type =
650 msg->im_msgtype = assert;
651 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700652 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700653 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900654 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
Benjamin Thery70a269e2009-01-22 04:56:15 +0000656 if (init_net.ipv4.mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 kfree_skb(skb);
658 return -EINVAL;
659 }
660
661 /*
662 * Deliver to mrouted
663 */
Benjamin Thery70a269e2009-01-22 04:56:15 +0000664 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
665 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 if (net_ratelimit())
667 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
668 kfree_skb(skb);
669 }
670
671 return ret;
672}
673
674/*
675 * Queue a packet for resolution. It gets locked cache entry!
676 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900677
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678static int
679ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
680{
681 int err;
682 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700683 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
685 spin_lock_bh(&mfc_unres_lock);
686 for (c=mfc_unres_queue; c; c=c->next) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000687 if (net_eq(mfc_net(c), &init_net) &&
688 c->mfc_mcastgrp == iph->daddr &&
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700689 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 break;
691 }
692
693 if (c == NULL) {
694 /*
695 * Create a new entry if allowable
696 */
697
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000698 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000699 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 spin_unlock_bh(&mfc_unres_lock);
701
702 kfree_skb(skb);
703 return -ENOBUFS;
704 }
705
706 /*
707 * Fill in the new cache entry
708 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700709 c->mfc_parent = -1;
710 c->mfc_origin = iph->saddr;
711 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
713 /*
714 * Reflect first query at mrouted.
715 */
716 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900717 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 out - Brad Parker
719 */
720 spin_unlock_bh(&mfc_unres_lock);
721
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000722 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 kfree_skb(skb);
724 return err;
725 }
726
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000727 atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 c->next = mfc_unres_queue;
729 mfc_unres_queue = c;
730
731 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
732 }
733
734 /*
735 * See if we can append the packet
736 */
737 if (c->mfc_un.unres.unresolved.qlen>3) {
738 kfree_skb(skb);
739 err = -ENOBUFS;
740 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -0800741 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 err = 0;
743 }
744
745 spin_unlock_bh(&mfc_unres_lock);
746 return err;
747}
748
749/*
750 * MFC cache manipulation by user space mroute daemon
751 */
752
753static int ipmr_mfc_delete(struct mfcctl *mfc)
754{
755 int line;
756 struct mfc_cache *c, **cp;
757
Jianjun Kongc354e122008-11-03 00:28:02 -0800758 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000760 for (cp = &init_net.ipv4.mfc_cache_array[line];
761 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
763 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
764 write_lock_bh(&mrt_lock);
765 *cp = c->next;
766 write_unlock_bh(&mrt_lock);
767
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000768 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 return 0;
770 }
771 }
772 return -ENOENT;
773}
774
775static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
776{
777 int line;
778 struct mfc_cache *uc, *c, **cp;
779
Jianjun Kongc354e122008-11-03 00:28:02 -0800780 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000782 for (cp = &init_net.ipv4.mfc_cache_array[line];
783 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
785 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
786 break;
787 }
788
789 if (c != NULL) {
790 write_lock_bh(&mrt_lock);
791 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700792 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 if (!mrtsock)
794 c->mfc_flags |= MFC_STATIC;
795 write_unlock_bh(&mrt_lock);
796 return 0;
797 }
798
Joe Perchesf97c1e02007-12-16 13:45:43 -0800799 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 return -EINVAL;
801
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000802 c = ipmr_cache_alloc(&init_net);
Jianjun Kongc354e122008-11-03 00:28:02 -0800803 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 return -ENOMEM;
805
Jianjun Kongc354e122008-11-03 00:28:02 -0800806 c->mfc_origin = mfc->mfcc_origin.s_addr;
807 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
808 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700809 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 if (!mrtsock)
811 c->mfc_flags |= MFC_STATIC;
812
813 write_lock_bh(&mrt_lock);
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000814 c->next = init_net.ipv4.mfc_cache_array[line];
815 init_net.ipv4.mfc_cache_array[line] = c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 write_unlock_bh(&mrt_lock);
817
818 /*
819 * Check to see if we resolved a queued list. If so we
820 * need to send on the frames and tidy up.
821 */
822 spin_lock_bh(&mfc_unres_lock);
823 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
824 cp = &uc->next) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000825 if (net_eq(mfc_net(uc), &init_net) &&
826 uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
828 *cp = uc->next;
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000829 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 break;
831 }
832 }
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000833 if (mfc_unres_queue == NULL)
834 del_timer(&ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 spin_unlock_bh(&mfc_unres_lock);
836
837 if (uc) {
838 ipmr_cache_resolve(uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000839 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 }
841 return 0;
842}
843
844/*
845 * Close the multicast socket, and clear the vif tables etc
846 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900847
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848static void mroute_clean_tables(struct sock *sk)
849{
850 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900851
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 /*
853 * Shut down all active vif entries
854 */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000855 for (i = 0; i < init_net.ipv4.maxvif; i++) {
856 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
Wang Chen7dc00c82008-07-14 20:56:34 -0700857 vif_delete(i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 }
859
860 /*
861 * Wipe the cache
862 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800863 for (i=0; i<MFC_LINES; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 struct mfc_cache *c, **cp;
865
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000866 cp = &init_net.ipv4.mfc_cache_array[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 while ((c = *cp) != NULL) {
868 if (c->mfc_flags&MFC_STATIC) {
869 cp = &c->next;
870 continue;
871 }
872 write_lock_bh(&mrt_lock);
873 *cp = c->next;
874 write_unlock_bh(&mrt_lock);
875
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000876 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 }
878 }
879
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000880 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
881 struct mfc_cache *c, **cp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882
883 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000884 cp = &mfc_unres_queue;
885 while ((c = *cp) != NULL) {
886 if (!net_eq(mfc_net(c), &init_net)) {
887 cp = &c->next;
888 continue;
889 }
890 *cp = c->next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891
892 ipmr_destroy_unres(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 }
894 spin_unlock_bh(&mfc_unres_lock);
895 }
896}
897
898static void mrtsock_destruct(struct sock *sk)
899{
900 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000901 if (sk == init_net.ipv4.mroute_sk) {
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900902 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903
904 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000905 init_net.ipv4.mroute_sk = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 write_unlock_bh(&mrt_lock);
907
908 mroute_clean_tables(sk);
909 }
910 rtnl_unlock();
911}
912
913/*
914 * Socket options and virtual interface manipulation. The whole
915 * virtual interface system is a complete heap, but unfortunately
916 * that's how BSD mrouted happens to think. Maybe one day with a proper
917 * MOSPF/PIM router set up we can clean this up.
918 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900919
Jianjun Kongc354e122008-11-03 00:28:02 -0800920int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921{
922 int ret;
923 struct vifctl vif;
924 struct mfcctl mfc;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900925
Stephen Hemminger132adf52007-03-08 20:44:43 -0800926 if (optname != MRT_INIT) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000927 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 return -EACCES;
929 }
930
Stephen Hemminger132adf52007-03-08 20:44:43 -0800931 switch (optname) {
932 case MRT_INIT:
933 if (sk->sk_type != SOCK_RAW ||
934 inet_sk(sk)->num != IPPROTO_IGMP)
935 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -0800936 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800937 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938
Stephen Hemminger132adf52007-03-08 20:44:43 -0800939 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000940 if (init_net.ipv4.mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800942 return -EADDRINUSE;
943 }
944
945 ret = ip_ra_control(sk, 1, mrtsock_destruct);
946 if (ret == 0) {
947 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000948 init_net.ipv4.mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800949 write_unlock_bh(&mrt_lock);
950
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900951 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800952 }
953 rtnl_unlock();
954 return ret;
955 case MRT_DONE:
Benjamin Thery70a269e2009-01-22 04:56:15 +0000956 if (sk != init_net.ipv4.mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800957 return -EACCES;
958 return ip_ra_control(sk, 0, NULL);
959 case MRT_ADD_VIF:
960 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -0800961 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800962 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800963 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800964 return -EFAULT;
965 if (vif.vifc_vifi >= MAXVIFS)
966 return -ENFILE;
967 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800968 if (optname == MRT_ADD_VIF) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000969 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800970 } else {
Wang Chen7dc00c82008-07-14 20:56:34 -0700971 ret = vif_delete(vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800972 }
973 rtnl_unlock();
974 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975
976 /*
977 * Manipulate the forwarding caches. These live
978 * in a sort of kernel/user symbiosis.
979 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800980 case MRT_ADD_MFC:
981 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -0800982 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800983 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800984 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800985 return -EFAULT;
986 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800987 if (optname == MRT_DEL_MFC)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800988 ret = ipmr_mfc_delete(&mfc);
989 else
Benjamin Thery70a269e2009-01-22 04:56:15 +0000990 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800991 rtnl_unlock();
992 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 /*
994 * Control PIM assert.
995 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800996 case MRT_ASSERT:
997 {
998 int v;
999 if (get_user(v,(int __user *)optval))
1000 return -EFAULT;
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001001 init_net.ipv4.mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001002 return 0;
1003 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001005 case MRT_PIM:
1006 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001007 int v;
1008
Stephen Hemminger132adf52007-03-08 20:44:43 -08001009 if (get_user(v,(int __user *)optval))
1010 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001011 v = (v) ? 1 : 0;
1012
Stephen Hemminger132adf52007-03-08 20:44:43 -08001013 rtnl_lock();
1014 ret = 0;
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001015 if (v != init_net.ipv4.mroute_do_pim) {
1016 init_net.ipv4.mroute_do_pim = v;
1017 init_net.ipv4.mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018#ifdef CONFIG_IP_PIMSM_V2
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001019 if (init_net.ipv4.mroute_do_pim)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001020 ret = inet_add_protocol(&pim_protocol,
1021 IPPROTO_PIM);
1022 else
1023 ret = inet_del_protocol(&pim_protocol,
1024 IPPROTO_PIM);
1025 if (ret < 0)
1026 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001029 rtnl_unlock();
1030 return ret;
1031 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001033 /*
1034 * Spurious command, or MRT_VERSION which you cannot
1035 * set.
1036 */
1037 default:
1038 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 }
1040}
1041
1042/*
1043 * Getsock opt support for the multicast routing system.
1044 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001045
Jianjun Kongc354e122008-11-03 00:28:02 -08001046int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047{
1048 int olr;
1049 int val;
1050
Jianjun Kongc354e122008-11-03 00:28:02 -08001051 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052#ifdef CONFIG_IP_PIMSM
1053 optname!=MRT_PIM &&
1054#endif
1055 optname!=MRT_ASSERT)
1056 return -ENOPROTOOPT;
1057
1058 if (get_user(olr, optlen))
1059 return -EFAULT;
1060
1061 olr = min_t(unsigned int, olr, sizeof(int));
1062 if (olr < 0)
1063 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001064
Jianjun Kongc354e122008-11-03 00:28:02 -08001065 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001067 if (optname == MRT_VERSION)
1068 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001070 else if (optname == MRT_PIM)
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001071 val = init_net.ipv4.mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072#endif
1073 else
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001074 val = init_net.ipv4.mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001075 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 return -EFAULT;
1077 return 0;
1078}
1079
1080/*
1081 * The IP multicast ioctl support routines.
1082 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001083
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1085{
1086 struct sioc_sg_req sr;
1087 struct sioc_vif_req vr;
1088 struct vif_device *vif;
1089 struct mfc_cache *c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001090
Stephen Hemminger132adf52007-03-08 20:44:43 -08001091 switch (cmd) {
1092 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001093 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001094 return -EFAULT;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001095 if (vr.vifi >= init_net.ipv4.maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001096 return -EINVAL;
1097 read_lock(&mrt_lock);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001098 vif = &init_net.ipv4.vif_table[vr.vifi];
1099 if (VIF_EXISTS(&init_net, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001100 vr.icount = vif->pkt_in;
1101 vr.ocount = vif->pkt_out;
1102 vr.ibytes = vif->bytes_in;
1103 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001105
Jianjun Kongc354e122008-11-03 00:28:02 -08001106 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001108 return 0;
1109 }
1110 read_unlock(&mrt_lock);
1111 return -EADDRNOTAVAIL;
1112 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001113 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001114 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115
Stephen Hemminger132adf52007-03-08 20:44:43 -08001116 read_lock(&mrt_lock);
1117 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1118 if (c) {
1119 sr.pktcnt = c->mfc_un.res.pkt;
1120 sr.bytecnt = c->mfc_un.res.bytes;
1121 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001123
Jianjun Kongc354e122008-11-03 00:28:02 -08001124 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001125 return -EFAULT;
1126 return 0;
1127 }
1128 read_unlock(&mrt_lock);
1129 return -EADDRNOTAVAIL;
1130 default:
1131 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 }
1133}
1134
1135
1136static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1137{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001138 struct net_device *dev = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 struct vif_device *v;
1140 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001141
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -07001142 if (!net_eq(dev_net(dev), &init_net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001143 return NOTIFY_DONE;
1144
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 if (event != NETDEV_UNREGISTER)
1146 return NOTIFY_DONE;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001147 v = &init_net.ipv4.vif_table[0];
1148 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001149 if (v->dev == dev)
Wang Chen7dc00c82008-07-14 20:56:34 -07001150 vif_delete(ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 }
1152 return NOTIFY_DONE;
1153}
1154
1155
Jianjun Kongc354e122008-11-03 00:28:02 -08001156static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 .notifier_call = ipmr_device_event,
1158};
1159
1160/*
1161 * Encapsulate a packet by attaching a valid IPIP header to it.
1162 * This avoids tunnel drivers and other mess and gives us the speed so
1163 * important for multicast video.
1164 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001165
Al Viro114c7842006-09-27 18:39:29 -07001166static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001168 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001169 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001170
1171 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001172 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001173 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001174 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175
1176 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001177 iph->tos = old_iph->tos;
1178 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 iph->frag_off = 0;
1180 iph->daddr = daddr;
1181 iph->saddr = saddr;
1182 iph->protocol = IPPROTO_IPIP;
1183 iph->ihl = 5;
1184 iph->tot_len = htons(skb->len);
1185 ip_select_ident(iph, skb->dst, NULL);
1186 ip_send_check(iph);
1187
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1189 nf_reset(skb);
1190}
1191
1192static inline int ipmr_forward_finish(struct sk_buff *skb)
1193{
1194 struct ip_options * opt = &(IPCB(skb)->opt);
1195
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001196 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198 if (unlikely(opt->optlen))
1199 ip_forward_options(skb);
1200
1201 return dst_output(skb);
1202}
1203
1204/*
1205 * Processing handlers for ipmr_forward
1206 */
1207
1208static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1209{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001210 const struct iphdr *iph = ip_hdr(skb);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001211 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 struct net_device *dev;
1213 struct rtable *rt;
1214 int encap = 0;
1215
1216 if (vif->dev == NULL)
1217 goto out_free;
1218
1219#ifdef CONFIG_IP_PIMSM
1220 if (vif->flags & VIFF_REGISTER) {
1221 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001222 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001223 vif->dev->stats.tx_bytes += skb->len;
1224 vif->dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1226 kfree_skb(skb);
1227 return;
1228 }
1229#endif
1230
1231 if (vif->flags&VIFF_TUNNEL) {
1232 struct flowi fl = { .oif = vif->link,
1233 .nl_u = { .ip4_u =
1234 { .daddr = vif->remote,
1235 .saddr = vif->local,
1236 .tos = RT_TOS(iph->tos) } },
1237 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001238 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 goto out_free;
1240 encap = sizeof(struct iphdr);
1241 } else {
1242 struct flowi fl = { .oif = vif->link,
1243 .nl_u = { .ip4_u =
1244 { .daddr = iph->daddr,
1245 .tos = RT_TOS(iph->tos) } },
1246 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001247 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 goto out_free;
1249 }
1250
1251 dev = rt->u.dst.dev;
1252
1253 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1254 /* Do not fragment multicasts. Alas, IPv4 does not
1255 allow to send ICMP, so that packets will disappear
1256 to blackhole.
1257 */
1258
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001259 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 ip_rt_put(rt);
1261 goto out_free;
1262 }
1263
1264 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1265
1266 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001267 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 goto out_free;
1269 }
1270
1271 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001272 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
1274 dst_release(skb->dst);
1275 skb->dst = &rt->u.dst;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001276 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277
1278 /* FIXME: forward and output firewalls used to be called here.
1279 * What do we do with netfilter? -- RR */
1280 if (vif->flags & VIFF_TUNNEL) {
1281 ip_encap(skb, vif->local, vif->remote);
1282 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001283 vif->dev->stats.tx_packets++;
1284 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 }
1286
1287 IPCB(skb)->flags |= IPSKB_FORWARDED;
1288
1289 /*
1290 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1291 * not only before forwarding, but after forwarding on all output
1292 * interfaces. It is clear, if mrouter runs a multicasting
1293 * program, it should receive packets not depending to what interface
1294 * program is joined.
1295 * If we will not make it, the program will have to join on all
1296 * interfaces. On the other hand, multihoming host (or router, but
1297 * not mrouter) cannot join to more than one interface - it will
1298 * result in receiving multiple packets.
1299 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001300 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 ipmr_forward_finish);
1302 return;
1303
1304out_free:
1305 kfree_skb(skb);
1306 return;
1307}
1308
1309static int ipmr_find_vif(struct net_device *dev)
1310{
1311 int ct;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001312 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1313 if (init_net.ipv4.vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 break;
1315 }
1316 return ct;
1317}
1318
1319/* "local" means that we should preserve one skb (for local delivery) */
1320
1321static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1322{
1323 int psend = -1;
1324 int vif, ct;
1325
1326 vif = cache->mfc_parent;
1327 cache->mfc_un.res.pkt++;
1328 cache->mfc_un.res.bytes += skb->len;
1329
1330 /*
1331 * Wrong interface: drop packet and (maybe) send PIM assert.
1332 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001333 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 int true_vifi;
1335
Eric Dumazetee6b9672008-03-05 18:30:47 -08001336 if (skb->rtable->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 /* It is our own packet, looped back.
1338 Very complicated situation...
1339
1340 The best workaround until routing daemons will be
1341 fixed is not to redistribute packet, if it was
1342 send through wrong interface. It means, that
1343 multicast applications WILL NOT work for
1344 (S,G), which have default multicast route pointing
1345 to wrong oif. In any case, it is not a good
1346 idea to use multicasting applications on router.
1347 */
1348 goto dont_forward;
1349 }
1350
1351 cache->mfc_un.res.wrong_if++;
1352 true_vifi = ipmr_find_vif(skb->dev);
1353
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001354 if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 /* pimsm uses asserts, when switching from RPT to SPT,
1356 so that we cannot check that packet arrived on an oif.
1357 It is bad, but otherwise we would need to move pretty
1358 large chunk of pimd to kernel. Ough... --ANK
1359 */
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001360 (init_net.ipv4.mroute_do_pim ||
1361 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001362 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1364 cache->mfc_un.res.last_assert = jiffies;
1365 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1366 }
1367 goto dont_forward;
1368 }
1369
Benjamin Therycf958ae32009-01-22 04:56:16 +00001370 init_net.ipv4.vif_table[vif].pkt_in++;
1371 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
1373 /*
1374 * Forward the frame
1375 */
1376 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001377 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 if (psend != -1) {
1379 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1380 if (skb2)
1381 ipmr_queue_xmit(skb2, cache, psend);
1382 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001383 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 }
1385 }
1386 if (psend != -1) {
1387 if (local) {
1388 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1389 if (skb2)
1390 ipmr_queue_xmit(skb2, cache, psend);
1391 } else {
1392 ipmr_queue_xmit(skb, cache, psend);
1393 return 0;
1394 }
1395 }
1396
1397dont_forward:
1398 if (!local)
1399 kfree_skb(skb);
1400 return 0;
1401}
1402
1403
1404/*
1405 * Multicast packets for forwarding arrive here
1406 */
1407
1408int ip_mr_input(struct sk_buff *skb)
1409{
1410 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001411 int local = skb->rtable->rt_flags&RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412
1413 /* Packet is looped back after forward, it should not be
1414 forwarded second time, but still can be delivered locally.
1415 */
1416 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1417 goto dont_forward;
1418
1419 if (!local) {
1420 if (IPCB(skb)->opt.router_alert) {
1421 if (ip_call_ra_chain(skb))
1422 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001423 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 /* IGMPv1 (and broken IGMPv2 implementations sort of
1425 Cisco IOS <= 11.2(8)) do not put router alert
1426 option to IGMP packets destined to routable
1427 groups. It is very bad, because it means
1428 that we can forward NO IGMP messages.
1429 */
1430 read_lock(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001431 if (init_net.ipv4.mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001432 nf_reset(skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001433 raw_rcv(init_net.ipv4.mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 read_unlock(&mrt_lock);
1435 return 0;
1436 }
1437 read_unlock(&mrt_lock);
1438 }
1439 }
1440
1441 read_lock(&mrt_lock);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001442 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443
1444 /*
1445 * No usable cache entry
1446 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001447 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 int vif;
1449
1450 if (local) {
1451 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1452 ip_local_deliver(skb);
1453 if (skb2 == NULL) {
1454 read_unlock(&mrt_lock);
1455 return -ENOBUFS;
1456 }
1457 skb = skb2;
1458 }
1459
1460 vif = ipmr_find_vif(skb->dev);
1461 if (vif >= 0) {
1462 int err = ipmr_cache_unresolved(vif, skb);
1463 read_unlock(&mrt_lock);
1464
1465 return err;
1466 }
1467 read_unlock(&mrt_lock);
1468 kfree_skb(skb);
1469 return -ENODEV;
1470 }
1471
1472 ip_mr_forward(skb, cache, local);
1473
1474 read_unlock(&mrt_lock);
1475
1476 if (local)
1477 return ip_local_deliver(skb);
1478
1479 return 0;
1480
1481dont_forward:
1482 if (local)
1483 return ip_local_deliver(skb);
1484 kfree_skb(skb);
1485 return 0;
1486}
1487
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001488#ifdef CONFIG_IP_PIMSM
1489static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001491 struct net_device *reg_dev = NULL;
1492 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001494 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 /*
1496 Check that:
1497 a. packet is really destinted to a multicast group
1498 b. packet is not a NULL-REGISTER
1499 c. packet is not truncated
1500 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001501 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001503 ntohs(encap->tot_len) + pimlen > skb->len)
1504 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505
1506 read_lock(&mrt_lock);
Benjamin Thery6c5143d2009-01-22 04:56:21 +00001507 if (init_net.ipv4.mroute_reg_vif_num >= 0)
1508 reg_dev = init_net.ipv4.vif_table[init_net.ipv4.mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 if (reg_dev)
1510 dev_hold(reg_dev);
1511 read_unlock(&mrt_lock);
1512
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001513 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001514 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001516 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001518 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 skb->protocol = htons(ETH_P_IP);
1521 skb->ip_summed = 0;
1522 skb->pkt_type = PACKET_HOST;
1523 dst_release(skb->dst);
1524 skb->dst = NULL;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001525 reg_dev->stats.rx_bytes += skb->len;
1526 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 nf_reset(skb);
1528 netif_rx(skb);
1529 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001530
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001532}
1533#endif
1534
1535#ifdef CONFIG_IP_PIMSM_V1
1536/*
1537 * Handle IGMP messages of PIMv1
1538 */
1539
1540int pim_rcv_v1(struct sk_buff * skb)
1541{
1542 struct igmphdr *pim;
1543
1544 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1545 goto drop;
1546
1547 pim = igmp_hdr(skb);
1548
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001549 if (!init_net.ipv4.mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001550 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1551 goto drop;
1552
1553 if (__pim_rcv(skb, sizeof(*pim))) {
1554drop:
1555 kfree_skb(skb);
1556 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 return 0;
1558}
1559#endif
1560
1561#ifdef CONFIG_IP_PIMSM_V2
1562static int pim_rcv(struct sk_buff * skb)
1563{
1564 struct pimreghdr *pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001566 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto drop;
1568
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001569 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001570 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001572 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001573 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 goto drop;
1575
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001576 if (__pim_rcv(skb, sizeof(*pim))) {
1577drop:
1578 kfree_skb(skb);
1579 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 return 0;
1581}
1582#endif
1583
1584static int
1585ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1586{
1587 int ct;
1588 struct rtnexthop *nhp;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001589 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001590 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 struct rtattr *mp_head;
1592
1593 if (dev)
1594 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1595
Jianjun Kongc354e122008-11-03 00:28:02 -08001596 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597
1598 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1599 if (c->mfc_un.res.ttls[ct] < 255) {
1600 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1601 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001602 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 nhp->rtnh_flags = 0;
1604 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Therycf958ae32009-01-22 04:56:16 +00001605 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 nhp->rtnh_len = sizeof(*nhp);
1607 }
1608 }
1609 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001610 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 rtm->rtm_type = RTN_MULTICAST;
1612 return 1;
1613
1614rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001615 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 return -EMSGSIZE;
1617}
1618
1619int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1620{
1621 int err;
1622 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001623 struct rtable *rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624
1625 read_lock(&mrt_lock);
1626 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1627
Jianjun Kongc354e122008-11-03 00:28:02 -08001628 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001629 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001630 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 struct net_device *dev;
1632 int vif;
1633
1634 if (nowait) {
1635 read_unlock(&mrt_lock);
1636 return -EAGAIN;
1637 }
1638
1639 dev = skb->dev;
1640 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1641 read_unlock(&mrt_lock);
1642 return -ENODEV;
1643 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001644 skb2 = skb_clone(skb, GFP_ATOMIC);
1645 if (!skb2) {
1646 read_unlock(&mrt_lock);
1647 return -ENOMEM;
1648 }
1649
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001650 skb_push(skb2, sizeof(struct iphdr));
1651 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001652 iph = ip_hdr(skb2);
1653 iph->ihl = sizeof(struct iphdr) >> 2;
1654 iph->saddr = rt->rt_src;
1655 iph->daddr = rt->rt_dst;
1656 iph->version = 0;
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001657 err = ipmr_cache_unresolved(vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 read_unlock(&mrt_lock);
1659 return err;
1660 }
1661
1662 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1663 cache->mfc_flags |= MFC_NOTIFY;
1664 err = ipmr_fill_mroute(skb, cache, rtm);
1665 read_unlock(&mrt_lock);
1666 return err;
1667}
1668
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001669#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670/*
1671 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1672 */
1673struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001674 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 int ct;
1676};
1677
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001678static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1679 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 loff_t pos)
1681{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001682 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1683 if (!VIF_EXISTS(net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001685 if (pos-- == 0)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001686 return &net->ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 }
1688 return NULL;
1689}
1690
1691static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001692 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001694 struct net *net = seq_file_net(seq);
1695
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001697 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 : SEQ_START_TOKEN;
1699}
1700
1701static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1702{
1703 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001704 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
1706 ++*pos;
1707 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001708 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001709
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001710 while (++iter->ct < net->ipv4.maxvif) {
1711 if (!VIF_EXISTS(net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 continue;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001713 return &net->ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 }
1715 return NULL;
1716}
1717
1718static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001719 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720{
1721 read_unlock(&mrt_lock);
1722}
1723
1724static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1725{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001726 struct net *net = seq_file_net(seq);
1727
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001729 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1731 } else {
1732 const struct vif_device *vif = v;
1733 const char *name = vif->dev ? vif->dev->name : "none";
1734
1735 seq_printf(seq,
1736 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001737 vif - net->ipv4.vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001738 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 vif->bytes_out, vif->pkt_out,
1740 vif->flags, vif->local, vif->remote);
1741 }
1742 return 0;
1743}
1744
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001745static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 .start = ipmr_vif_seq_start,
1747 .next = ipmr_vif_seq_next,
1748 .stop = ipmr_vif_seq_stop,
1749 .show = ipmr_vif_seq_show,
1750};
1751
1752static int ipmr_vif_open(struct inode *inode, struct file *file)
1753{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001754 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1755 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756}
1757
Arjan van de Ven9a321442007-02-12 00:55:35 -08001758static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 .owner = THIS_MODULE,
1760 .open = ipmr_vif_open,
1761 .read = seq_read,
1762 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001763 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764};
1765
1766struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001767 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 struct mfc_cache **cache;
1769 int ct;
1770};
1771
1772
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001773static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1774 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775{
1776 struct mfc_cache *mfc;
1777
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001778 it->cache = net->ipv4.mfc_cache_array;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001780 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001781 for (mfc = net->ipv4.mfc_cache_array[it->ct];
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001782 mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001783 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 return mfc;
1785 read_unlock(&mrt_lock);
1786
1787 it->cache = &mfc_unres_queue;
1788 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001789 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001790 if (net_eq(mfc_net(mfc), net) &&
1791 pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 return mfc;
1793 spin_unlock_bh(&mfc_unres_lock);
1794
1795 it->cache = NULL;
1796 return NULL;
1797}
1798
1799
1800static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1801{
1802 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001803 struct net *net = seq_file_net(seq);
1804
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 it->cache = NULL;
1806 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001807 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 : SEQ_START_TOKEN;
1809}
1810
1811static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1812{
1813 struct mfc_cache *mfc = v;
1814 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001815 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816
1817 ++*pos;
1818
1819 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001820 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821
1822 if (mfc->next)
1823 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001824
1825 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 goto end_of_list;
1827
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001828 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
1830 while (++it->ct < MFC_LINES) {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001831 mfc = net->ipv4.mfc_cache_array[it->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 if (mfc)
1833 return mfc;
1834 }
1835
1836 /* exhausted cache_array, show unresolved */
1837 read_unlock(&mrt_lock);
1838 it->cache = &mfc_unres_queue;
1839 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001840
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 spin_lock_bh(&mfc_unres_lock);
1842 mfc = mfc_unres_queue;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001843 while (mfc && !net_eq(mfc_net(mfc), net))
1844 mfc = mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001845 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 return mfc;
1847
1848 end_of_list:
1849 spin_unlock_bh(&mfc_unres_lock);
1850 it->cache = NULL;
1851
1852 return NULL;
1853}
1854
1855static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1856{
1857 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001858 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
1860 if (it->cache == &mfc_unres_queue)
1861 spin_unlock_bh(&mfc_unres_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001862 else if (it->cache == net->ipv4.mfc_cache_array)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 read_unlock(&mrt_lock);
1864}
1865
1866static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1867{
1868 int n;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001869 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870
1871 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001872 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1874 } else {
1875 const struct mfc_cache *mfc = v;
1876 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001877
Benjamin Thery999890b2008-12-03 22:22:16 -08001878 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 (unsigned long) mfc->mfc_mcastgrp,
1880 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001881 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
1883 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001884 seq_printf(seq, " %8lu %8lu %8lu",
1885 mfc->mfc_un.res.pkt,
1886 mfc->mfc_un.res.bytes,
1887 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001888 for (n = mfc->mfc_un.res.minvif;
1889 n < mfc->mfc_un.res.maxvif; n++ ) {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001890 if (VIF_EXISTS(net, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00001891 mfc->mfc_un.res.ttls[n] < 255)
1892 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001893 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 n, mfc->mfc_un.res.ttls[n]);
1895 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001896 } else {
1897 /* unresolved mfc_caches don't contain
1898 * pkt, bytes and wrong_if values
1899 */
1900 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 }
1902 seq_putc(seq, '\n');
1903 }
1904 return 0;
1905}
1906
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001907static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 .start = ipmr_mfc_seq_start,
1909 .next = ipmr_mfc_seq_next,
1910 .stop = ipmr_mfc_seq_stop,
1911 .show = ipmr_mfc_seq_show,
1912};
1913
1914static int ipmr_mfc_open(struct inode *inode, struct file *file)
1915{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001916 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1917 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918}
1919
Arjan van de Ven9a321442007-02-12 00:55:35 -08001920static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 .owner = THIS_MODULE,
1922 .open = ipmr_mfc_open,
1923 .read = seq_read,
1924 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001925 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001927#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928
1929#ifdef CONFIG_IP_PIMSM_V2
1930static struct net_protocol pim_protocol = {
1931 .handler = pim_rcv,
1932};
1933#endif
1934
1935
1936/*
1937 * Setup for IP multicast routing
1938 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001939static int __net_init ipmr_net_init(struct net *net)
1940{
1941 int err = 0;
1942
1943 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1944 GFP_KERNEL);
1945 if (!net->ipv4.vif_table) {
1946 err = -ENOMEM;
1947 goto fail;
1948 }
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001949
1950 /* Forwarding cache */
1951 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1952 sizeof(struct mfc_cache *),
1953 GFP_KERNEL);
1954 if (!net->ipv4.mfc_cache_array) {
1955 err = -ENOMEM;
1956 goto fail_mfc_cache;
1957 }
Benjamin Thery6c5143d2009-01-22 04:56:21 +00001958
1959#ifdef CONFIG_IP_PIMSM
1960 net->ipv4.mroute_reg_vif_num = -1;
1961#endif
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001962
1963#ifdef CONFIG_PROC_FS
1964 err = -ENOMEM;
1965 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1966 goto proc_vif_fail;
1967 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1968 goto proc_cache_fail;
1969#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001970 return 0;
1971
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001972#ifdef CONFIG_PROC_FS
1973proc_cache_fail:
1974 proc_net_remove(net, "ip_mr_vif");
1975proc_vif_fail:
1976 kfree(net->ipv4.mfc_cache_array);
1977#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001978fail_mfc_cache:
1979 kfree(net->ipv4.vif_table);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001980fail:
1981 return err;
1982}
1983
1984static void __net_exit ipmr_net_exit(struct net *net)
1985{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001986#ifdef CONFIG_PROC_FS
1987 proc_net_remove(net, "ip_mr_cache");
1988 proc_net_remove(net, "ip_mr_vif");
1989#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001990 kfree(net->ipv4.mfc_cache_array);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001991 kfree(net->ipv4.vif_table);
1992}
1993
1994static struct pernet_operations ipmr_net_ops = {
1995 .init = ipmr_net_init,
1996 .exit = ipmr_net_exit,
1997};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001998
Wang Chen03d2f892008-07-03 12:13:36 +08001999int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000{
Wang Chen03d2f892008-07-03 12:13:36 +08002001 int err;
2002
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2004 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002005 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002006 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002007 if (!mrt_cachep)
2008 return -ENOMEM;
2009
Benjamin Therycf958ae32009-01-22 04:56:16 +00002010 err = register_pernet_subsys(&ipmr_net_ops);
2011 if (err)
2012 goto reg_pernet_fail;
2013
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08002014 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08002015 err = register_netdevice_notifier(&ip_mr_notifier);
2016 if (err)
2017 goto reg_notif_fail;
Wang Chen03d2f892008-07-03 12:13:36 +08002018 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002019
Benjamin Theryc3e38892008-11-19 14:07:41 -08002020reg_notif_fail:
2021 del_timer(&ipmr_expire_timer);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002022 unregister_pernet_subsys(&ipmr_net_ops);
2023reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002024 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002025 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026}