blob: cfac26d674ede45d76fb199ce154297c7cf59609 [file] [log] [blame]
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090030#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090033#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090035#include <linux/init.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090036#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090039#include <net/raw.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/notifier.h>
41#include <linux/if_arp.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090042#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090048#include <linux/pim.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090049#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
52struct sock *mroute6_socket;
53
54
55/* Big lock, protecting vif table, mrt cache and mroute socket state.
56 Note that the changes are semaphored via rtnl_lock.
57 */
58
59static DEFINE_RWLOCK(mrt_lock);
60
61/*
62 * Multicast router control variables
63 */
64
65static struct mif_device vif6_table[MAXMIFS]; /* Devices */
66static int maxvif;
67
68#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090070static int mroute_do_assert; /* Set in PIM assert */
71#ifdef CONFIG_IPV6_PIMSM_V2
72static int mroute_do_pim;
73#else
74#define mroute_do_pim 0
75#endif
76
Rami Rosen6ac7eb02008-04-10 12:40:10 +030077static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090078
79static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
80static atomic_t cache_resolve_queue_len; /* Size of unresolved */
81
82/* Special spinlock for queue of unresolved entries */
83static DEFINE_SPINLOCK(mfc_unres_lock);
84
85/* We return to original Alan's scheme. Hash table of resolved
86 entries is changed only in process context and protected
87 with weak lock mrt_lock. Queue of unresolved entries is protected
88 with strong spinlock mfc_unres_lock.
89
90 In this case data path is free of exclusive locks at all.
91 */
92
93static struct kmem_cache *mrt_cachep __read_mostly;
94
95static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
Rami Rosen6ac7eb02008-04-10 12:40:10 +030096static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090097static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090099#ifdef CONFIG_IPV6_PIMSM_V2
100static struct inet6_protocol pim6_protocol;
101#endif
102
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900103static struct timer_list ipmr_expire_timer;
104
105
106#ifdef CONFIG_PROC_FS
107
108struct ipmr_mfc_iter {
109 struct mfc6_cache **cache;
110 int ct;
111};
112
113
114static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115{
116 struct mfc6_cache *mfc;
117
118 it->cache = mfc6_cache_array;
119 read_lock(&mrt_lock);
120 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 if (pos-- == 0)
123 return mfc;
124 read_unlock(&mrt_lock);
125
126 it->cache = &mfc_unres_queue;
127 spin_lock_bh(&mfc_unres_lock);
128 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 if (pos-- == 0)
130 return mfc;
131 spin_unlock_bh(&mfc_unres_lock);
132
133 it->cache = NULL;
134 return NULL;
135}
136
137
138
139
140/*
141 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142 */
143
144struct ipmr_vif_iter {
145 int ct;
146};
147
148static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 loff_t pos)
150{
151 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 if (!MIF_EXISTS(iter->ct))
153 continue;
154 if (pos-- == 0)
155 return &vif6_table[iter->ct];
156 }
157 return NULL;
158}
159
160static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 __acquires(mrt_lock)
162{
163 read_lock(&mrt_lock);
164 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 : SEQ_START_TOKEN);
166}
167
168static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169{
170 struct ipmr_vif_iter *iter = seq->private;
171
172 ++*pos;
173 if (v == SEQ_START_TOKEN)
174 return ip6mr_vif_seq_idx(iter, 0);
175
176 while (++iter->ct < maxvif) {
177 if (!MIF_EXISTS(iter->ct))
178 continue;
179 return &vif6_table[iter->ct];
180 }
181 return NULL;
182}
183
184static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 __releases(mrt_lock)
186{
187 read_unlock(&mrt_lock);
188}
189
190static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191{
192 if (v == SEQ_START_TOKEN) {
193 seq_puts(seq,
194 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
195 } else {
196 const struct mif_device *vif = v;
197 const char *name = vif->dev ? vif->dev->name : "none";
198
199 seq_printf(seq,
Al Virod430a222008-06-02 10:59:02 +0100200 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900201 vif - vif6_table,
202 name, vif->bytes_in, vif->pkt_in,
203 vif->bytes_out, vif->pkt_out,
204 vif->flags);
205 }
206 return 0;
207}
208
209static struct seq_operations ip6mr_vif_seq_ops = {
210 .start = ip6mr_vif_seq_start,
211 .next = ip6mr_vif_seq_next,
212 .stop = ip6mr_vif_seq_stop,
213 .show = ip6mr_vif_seq_show,
214};
215
216static int ip6mr_vif_open(struct inode *inode, struct file *file)
217{
218 return seq_open_private(file, &ip6mr_vif_seq_ops,
219 sizeof(struct ipmr_vif_iter));
220}
221
222static struct file_operations ip6mr_vif_fops = {
223 .owner = THIS_MODULE,
224 .open = ip6mr_vif_open,
225 .read = seq_read,
226 .llseek = seq_lseek,
227 .release = seq_release,
228};
229
230static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231{
232 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 : SEQ_START_TOKEN);
234}
235
236static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237{
238 struct mfc6_cache *mfc = v;
239 struct ipmr_mfc_iter *it = seq->private;
240
241 ++*pos;
242
243 if (v == SEQ_START_TOKEN)
244 return ipmr_mfc_seq_idx(seq->private, 0);
245
246 if (mfc->next)
247 return mfc->next;
248
249 if (it->cache == &mfc_unres_queue)
250 goto end_of_list;
251
252 BUG_ON(it->cache != mfc6_cache_array);
253
254 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 mfc = mfc6_cache_array[it->ct];
256 if (mfc)
257 return mfc;
258 }
259
260 /* exhausted cache_array, show unresolved */
261 read_unlock(&mrt_lock);
262 it->cache = &mfc_unres_queue;
263 it->ct = 0;
264
265 spin_lock_bh(&mfc_unres_lock);
266 mfc = mfc_unres_queue;
267 if (mfc)
268 return mfc;
269
270 end_of_list:
271 spin_unlock_bh(&mfc_unres_lock);
272 it->cache = NULL;
273
274 return NULL;
275}
276
277static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278{
279 struct ipmr_mfc_iter *it = seq->private;
280
281 if (it->cache == &mfc_unres_queue)
282 spin_unlock_bh(&mfc_unres_lock);
283 else if (it->cache == mfc6_cache_array)
284 read_unlock(&mrt_lock);
285}
286
287static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288{
289 int n;
290
291 if (v == SEQ_START_TOKEN) {
292 seq_puts(seq,
293 "Group "
294 "Origin "
295 "Iif Pkts Bytes Wrong Oifs\n");
296 } else {
297 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private;
299
300 seq_printf(seq,
301 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 mfc->mf6c_parent,
304 mfc->mfc_un.res.pkt,
305 mfc->mfc_un.res.bytes,
306 mfc->mfc_un.res.wrong_if);
307
308 if (it->cache != &mfc_unres_queue) {
309 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) {
311 if (MIF_EXISTS(n) &&
312 mfc->mfc_un.res.ttls[n] < 255)
313 seq_printf(seq,
314 " %2d:%-3d",
315 n, mfc->mfc_un.res.ttls[n]);
316 }
317 }
318 seq_putc(seq, '\n');
319 }
320 return 0;
321}
322
323static struct seq_operations ipmr_mfc_seq_ops = {
324 .start = ipmr_mfc_seq_start,
325 .next = ipmr_mfc_seq_next,
326 .stop = ipmr_mfc_seq_stop,
327 .show = ipmr_mfc_seq_show,
328};
329
330static int ipmr_mfc_open(struct inode *inode, struct file *file)
331{
332 return seq_open_private(file, &ipmr_mfc_seq_ops,
333 sizeof(struct ipmr_mfc_iter));
334}
335
336static struct file_operations ip6mr_mfc_fops = {
337 .owner = THIS_MODULE,
338 .open = ipmr_mfc_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343#endif
344
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900345#ifdef CONFIG_IPV6_PIMSM_V2
346static int reg_vif_num = -1;
347
348static int pim6_rcv(struct sk_buff *skb)
349{
350 struct pimreghdr *pim;
351 struct ipv6hdr *encap;
352 struct net_device *reg_dev = NULL;
353
354 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 goto drop;
356
357 pim = (struct pimreghdr *)skb_transport_header(skb);
358 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 (pim->flags & PIM_NULL_REGISTER) ||
360 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Viroec6b4862008-04-26 22:28:58 -0700361 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900362 goto drop;
363
364 /* check if the inner packet is destined to mcast group */
365 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 sizeof(*pim));
367
368 if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 encap->payload_len == 0 ||
370 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 goto drop;
372
373 read_lock(&mrt_lock);
374 if (reg_vif_num >= 0)
375 reg_dev = vif6_table[reg_vif_num].dev;
376 if (reg_dev)
377 dev_hold(reg_dev);
378 read_unlock(&mrt_lock);
379
380 if (reg_dev == NULL)
381 goto drop;
382
383 skb->mac_header = skb->network_header;
384 skb_pull(skb, (u8 *)encap - skb->data);
385 skb_reset_network_header(skb);
386 skb->dev = reg_dev;
387 skb->protocol = htons(ETH_P_IP);
388 skb->ip_summed = 0;
389 skb->pkt_type = PACKET_HOST;
390 dst_release(skb->dst);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700391 reg_dev->stats.rx_bytes += skb->len;
392 reg_dev->stats.rx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900393 skb->dst = NULL;
394 nf_reset(skb);
395 netif_rx(skb);
396 dev_put(reg_dev);
397 return 0;
398 drop:
399 kfree_skb(skb);
400 return 0;
401}
402
403static struct inet6_protocol pim6_protocol = {
404 .handler = pim6_rcv,
405};
406
407/* Service routines creating virtual interfaces: PIMREG */
408
409static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410{
411 read_lock(&mrt_lock);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700412 dev->stats.tx_bytes += skb->len;
413 dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900414 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 read_unlock(&mrt_lock);
416 kfree_skb(skb);
417 return 0;
418}
419
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900420static void reg_vif_setup(struct net_device *dev)
421{
422 dev->type = ARPHRD_PIMREG;
423 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
424 dev->flags = IFF_NOARP;
425 dev->hard_start_xmit = reg_vif_xmit;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900426 dev->destructor = free_netdev;
427}
428
429static struct net_device *ip6mr_reg_vif(void)
430{
431 struct net_device *dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900432
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700433 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900434 if (dev == NULL)
435 return NULL;
436
437 if (register_netdevice(dev)) {
438 free_netdev(dev);
439 return NULL;
440 }
441 dev->iflink = 0;
442
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900443 if (dev_open(dev))
444 goto failure;
445
446 return dev;
447
448failure:
449 /* allow the register to be completed before unregistering. */
450 rtnl_unlock();
451 rtnl_lock();
452
453 unregister_netdevice(dev);
454 return NULL;
455}
456#endif
457
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900458/*
459 * Delete a VIF entry
460 */
461
462static int mif6_delete(int vifi)
463{
464 struct mif_device *v;
465 struct net_device *dev;
466 if (vifi < 0 || vifi >= maxvif)
467 return -EADDRNOTAVAIL;
468
469 v = &vif6_table[vifi];
470
471 write_lock_bh(&mrt_lock);
472 dev = v->dev;
473 v->dev = NULL;
474
475 if (!dev) {
476 write_unlock_bh(&mrt_lock);
477 return -EADDRNOTAVAIL;
478 }
479
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900480#ifdef CONFIG_IPV6_PIMSM_V2
481 if (vifi == reg_vif_num)
482 reg_vif_num = -1;
483#endif
484
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900485 if (vifi + 1 == maxvif) {
486 int tmp;
487 for (tmp = vifi - 1; tmp >= 0; tmp--) {
488 if (MIF_EXISTS(tmp))
489 break;
490 }
491 maxvif = tmp + 1;
492 }
493
494 write_unlock_bh(&mrt_lock);
495
496 dev_set_allmulti(dev, -1);
497
498 if (v->flags & MIFF_REGISTER)
499 unregister_netdevice(dev);
500
501 dev_put(dev);
502 return 0;
503}
504
505/* Destroy an unresolved cache entry, killing queued skbs
506 and reporting error to netlink readers.
507 */
508
509static void ip6mr_destroy_unres(struct mfc6_cache *c)
510{
511 struct sk_buff *skb;
512
513 atomic_dec(&cache_resolve_queue_len);
514
515 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
516 if (ipv6_hdr(skb)->version == 0) {
517 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
518 nlh->nlmsg_type = NLMSG_ERROR;
519 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
520 skb_trim(skb, nlh->nlmsg_len);
521 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
522 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
523 } else
524 kfree_skb(skb);
525 }
526
527 kmem_cache_free(mrt_cachep, c);
528}
529
530
531/* Single timer process for all the unresolved queue. */
532
533static void ipmr_do_expire_process(unsigned long dummy)
534{
535 unsigned long now = jiffies;
536 unsigned long expires = 10 * HZ;
537 struct mfc6_cache *c, **cp;
538
539 cp = &mfc_unres_queue;
540
541 while ((c = *cp) != NULL) {
542 if (time_after(c->mfc_un.unres.expires, now)) {
543 /* not yet... */
544 unsigned long interval = c->mfc_un.unres.expires - now;
545 if (interval < expires)
546 expires = interval;
547 cp = &c->next;
548 continue;
549 }
550
551 *cp = c->next;
552 ip6mr_destroy_unres(c);
553 }
554
555 if (atomic_read(&cache_resolve_queue_len))
556 mod_timer(&ipmr_expire_timer, jiffies + expires);
557}
558
559static void ipmr_expire_process(unsigned long dummy)
560{
561 if (!spin_trylock(&mfc_unres_lock)) {
562 mod_timer(&ipmr_expire_timer, jiffies + 1);
563 return;
564 }
565
566 if (atomic_read(&cache_resolve_queue_len))
567 ipmr_do_expire_process(dummy);
568
569 spin_unlock(&mfc_unres_lock);
570}
571
572/* Fill oifs list. It is called under write locked mrt_lock. */
573
574static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
575{
576 int vifi;
577
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300578 cache->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900579 cache->mfc_un.res.maxvif = 0;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300580 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900581
582 for (vifi = 0; vifi < maxvif; vifi++) {
583 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
584 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
585 if (cache->mfc_un.res.minvif > vifi)
586 cache->mfc_un.res.minvif = vifi;
587 if (cache->mfc_un.res.maxvif <= vifi)
588 cache->mfc_un.res.maxvif = vifi + 1;
589 }
590 }
591}
592
593static int mif6_add(struct mif6ctl *vifc, int mrtsock)
594{
595 int vifi = vifc->mif6c_mifi;
596 struct mif_device *v = &vif6_table[vifi];
597 struct net_device *dev;
598
599 /* Is vif busy ? */
600 if (MIF_EXISTS(vifi))
601 return -EADDRINUSE;
602
603 switch (vifc->mif6c_flags) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900604#ifdef CONFIG_IPV6_PIMSM_V2
605 case MIFF_REGISTER:
606 /*
607 * Special Purpose VIF in PIM
608 * All the packets will be sent to the daemon
609 */
610 if (reg_vif_num >= 0)
611 return -EADDRINUSE;
612 dev = ip6mr_reg_vif();
613 if (!dev)
614 return -ENOBUFS;
615 break;
616#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900617 case 0:
618 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
619 if (!dev)
620 return -EADDRNOTAVAIL;
621 dev_put(dev);
622 break;
623 default:
624 return -EINVAL;
625 }
626
627 dev_set_allmulti(dev, 1);
628
629 /*
630 * Fill in the VIF structures
631 */
632 v->rate_limit = vifc->vifc_rate_limit;
633 v->flags = vifc->mif6c_flags;
634 if (!mrtsock)
635 v->flags |= VIFF_STATIC;
636 v->threshold = vifc->vifc_threshold;
637 v->bytes_in = 0;
638 v->bytes_out = 0;
639 v->pkt_in = 0;
640 v->pkt_out = 0;
641 v->link = dev->ifindex;
642 if (v->flags & MIFF_REGISTER)
643 v->link = dev->iflink;
644
645 /* And finish update writing critical data */
646 write_lock_bh(&mrt_lock);
647 dev_hold(dev);
648 v->dev = dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900649#ifdef CONFIG_IPV6_PIMSM_V2
650 if (v->flags & MIFF_REGISTER)
651 reg_vif_num = vifi;
652#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900653 if (vifi + 1 > maxvif)
654 maxvif = vifi + 1;
655 write_unlock_bh(&mrt_lock);
656 return 0;
657}
658
659static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
660{
661 int line = MFC6_HASH(mcastgrp, origin);
662 struct mfc6_cache *c;
663
664 for (c = mfc6_cache_array[line]; c; c = c->next) {
665 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
666 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
667 break;
668 }
669 return c;
670}
671
672/*
673 * Allocate a multicast cache entry
674 */
675static struct mfc6_cache *ip6mr_cache_alloc(void)
676{
677 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
678 if (c == NULL)
679 return NULL;
680 memset(c, 0, sizeof(*c));
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300681 c->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900682 return c;
683}
684
685static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
686{
687 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
688 if (c == NULL)
689 return NULL;
690 memset(c, 0, sizeof(*c));
691 skb_queue_head_init(&c->mfc_un.unres.unresolved);
692 c->mfc_un.unres.expires = jiffies + 10 * HZ;
693 return c;
694}
695
696/*
697 * A cache entry has gone into a resolved state from queued
698 */
699
700static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
701{
702 struct sk_buff *skb;
703
704 /*
705 * Play the pending entries through our router
706 */
707
708 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
709 if (ipv6_hdr(skb)->version == 0) {
710 int err;
711 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
712
713 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +0900714 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900715 } else {
716 nlh->nlmsg_type = NLMSG_ERROR;
717 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
718 skb_trim(skb, nlh->nlmsg_len);
719 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
720 }
721 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
722 } else
723 ip6_mr_forward(skb, c);
724 }
725}
726
727/*
728 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
729 * expects the following bizarre scheme.
730 *
731 * Called under mrt_lock.
732 */
733
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300734static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900735{
736 struct sk_buff *skb;
737 struct mrt6msg *msg;
738 int ret;
739
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900740#ifdef CONFIG_IPV6_PIMSM_V2
741 if (assert == MRT6MSG_WHOLEPKT)
742 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
743 +sizeof(*msg));
744 else
745#endif
746 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900747
748 if (!skb)
749 return -ENOBUFS;
750
751 /* I suppose that internal messages
752 * do not require checksums */
753
754 skb->ip_summed = CHECKSUM_UNNECESSARY;
755
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900756#ifdef CONFIG_IPV6_PIMSM_V2
757 if (assert == MRT6MSG_WHOLEPKT) {
758 /* Ugly, but we have no choice with this interface.
759 Duplicate old header, fix length etc.
760 And all this only to mangle msg->im6_msgtype and
761 to set msg->im6_mbz to "mbz" :-)
762 */
763 skb_push(skb, -skb_network_offset(pkt));
764
765 skb_push(skb, sizeof(*msg));
766 skb_reset_transport_header(skb);
767 msg = (struct mrt6msg *)skb_transport_header(skb);
768 msg->im6_mbz = 0;
769 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
770 msg->im6_mif = reg_vif_num;
771 msg->im6_pad = 0;
772 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
773 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
774
775 skb->ip_summed = CHECKSUM_UNNECESSARY;
776 } else
777#endif
778 {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900779 /*
780 * Copy the IP header
781 */
782
783 skb_put(skb, sizeof(struct ipv6hdr));
784 skb_reset_network_header(skb);
785 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
786
787 /*
788 * Add our header
789 */
790 skb_put(skb, sizeof(*msg));
791 skb_reset_transport_header(skb);
792 msg = (struct mrt6msg *)skb_transport_header(skb);
793
794 msg->im6_mbz = 0;
795 msg->im6_msgtype = assert;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300796 msg->im6_mif = mifi;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900797 msg->im6_pad = 0;
798 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
799 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
800
801 skb->dst = dst_clone(pkt->dst);
802 skb->ip_summed = CHECKSUM_UNNECESSARY;
803
804 skb_pull(skb, sizeof(struct ipv6hdr));
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900805 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900806
807 if (mroute6_socket == NULL) {
808 kfree_skb(skb);
809 return -EINVAL;
810 }
811
812 /*
813 * Deliver to user space multicast routing algorithms
814 */
815 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
816 if (net_ratelimit())
817 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
818 kfree_skb(skb);
819 }
820
821 return ret;
822}
823
824/*
825 * Queue a packet for resolution. It gets locked cache entry!
826 */
827
828static int
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300829ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900830{
831 int err;
832 struct mfc6_cache *c;
833
834 spin_lock_bh(&mfc_unres_lock);
835 for (c = mfc_unres_queue; c; c = c->next) {
836 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
837 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
838 break;
839 }
840
841 if (c == NULL) {
842 /*
843 * Create a new entry if allowable
844 */
845
846 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
847 (c = ip6mr_cache_alloc_unres()) == NULL) {
848 spin_unlock_bh(&mfc_unres_lock);
849
850 kfree_skb(skb);
851 return -ENOBUFS;
852 }
853
854 /*
855 * Fill in the new cache entry
856 */
857 c->mf6c_parent = -1;
858 c->mf6c_origin = ipv6_hdr(skb)->saddr;
859 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
860
861 /*
862 * Reflect first query at pim6sd
863 */
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300864 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900865 /* If the report failed throw the cache entry
866 out - Brad Parker
867 */
868 spin_unlock_bh(&mfc_unres_lock);
869
870 kmem_cache_free(mrt_cachep, c);
871 kfree_skb(skb);
872 return err;
873 }
874
875 atomic_inc(&cache_resolve_queue_len);
876 c->next = mfc_unres_queue;
877 mfc_unres_queue = c;
878
879 ipmr_do_expire_process(1);
880 }
881
882 /*
883 * See if we can append the packet
884 */
885 if (c->mfc_un.unres.unresolved.qlen > 3) {
886 kfree_skb(skb);
887 err = -ENOBUFS;
888 } else {
889 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
890 err = 0;
891 }
892
893 spin_unlock_bh(&mfc_unres_lock);
894 return err;
895}
896
897/*
898 * MFC6 cache manipulation by user space
899 */
900
901static int ip6mr_mfc_delete(struct mf6cctl *mfc)
902{
903 int line;
904 struct mfc6_cache *c, **cp;
905
906 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
907
908 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
909 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
910 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
911 write_lock_bh(&mrt_lock);
912 *cp = c->next;
913 write_unlock_bh(&mrt_lock);
914
915 kmem_cache_free(mrt_cachep, c);
916 return 0;
917 }
918 }
919 return -ENOENT;
920}
921
922static int ip6mr_device_event(struct notifier_block *this,
923 unsigned long event, void *ptr)
924{
925 struct net_device *dev = ptr;
926 struct mif_device *v;
927 int ct;
928
929 if (dev_net(dev) != &init_net)
930 return NOTIFY_DONE;
931
932 if (event != NETDEV_UNREGISTER)
933 return NOTIFY_DONE;
934
935 v = &vif6_table[0];
936 for (ct = 0; ct < maxvif; ct++, v++) {
937 if (v->dev == dev)
938 mif6_delete(ct);
939 }
940 return NOTIFY_DONE;
941}
942
943static struct notifier_block ip6_mr_notifier = {
944 .notifier_call = ip6mr_device_event
945};
946
947/*
948 * Setup for IP multicast routing
949 */
950
Wang Chen623d1a12008-07-03 12:13:30 +0800951int __init ip6_mr_init(void)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900952{
Wang Chen623d1a12008-07-03 12:13:30 +0800953 int err;
954
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900955 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
956 sizeof(struct mfc6_cache),
957 0, SLAB_HWCACHE_ALIGN,
958 NULL);
959 if (!mrt_cachep)
Wang Chen623d1a12008-07-03 12:13:30 +0800960 return -ENOMEM;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900961
962 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen623d1a12008-07-03 12:13:30 +0800963 err = register_netdevice_notifier(&ip6_mr_notifier);
964 if (err)
965 goto reg_notif_fail;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900966#ifdef CONFIG_PROC_FS
Wang Chen623d1a12008-07-03 12:13:30 +0800967 err = -ENOMEM;
968 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
969 goto proc_vif_fail;
970 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
971 0, &ip6mr_mfc_fops))
972 goto proc_cache_fail;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900973#endif
Wang Chen623d1a12008-07-03 12:13:30 +0800974 return 0;
975reg_notif_fail:
976 kmem_cache_destroy(mrt_cachep);
977#ifdef CONFIG_PROC_FS
978proc_vif_fail:
979 unregister_netdevice_notifier(&ip6_mr_notifier);
980proc_cache_fail:
981 proc_net_remove(&init_net, "ip6_mr_vif");
982#endif
983 return err;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900984}
985
Wang Chen623d1a12008-07-03 12:13:30 +0800986void ip6_mr_cleanup(void)
987{
988#ifdef CONFIG_PROC_FS
989 proc_net_remove(&init_net, "ip6_mr_cache");
990 proc_net_remove(&init_net, "ip6_mr_vif");
991#endif
992 unregister_netdevice_notifier(&ip6_mr_notifier);
993 del_timer(&ipmr_expire_timer);
994 kmem_cache_destroy(mrt_cachep);
995}
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900996
997static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
998{
999 int line;
1000 struct mfc6_cache *uc, *c, **cp;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001001 unsigned char ttls[MAXMIFS];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001002 int i;
1003
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001004 memset(ttls, 255, MAXMIFS);
1005 for (i = 0; i < MAXMIFS; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001006 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1007 ttls[i] = 1;
1008
1009 }
1010
1011 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1012
1013 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
1014 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1015 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1016 break;
1017 }
1018
1019 if (c != NULL) {
1020 write_lock_bh(&mrt_lock);
1021 c->mf6c_parent = mfc->mf6cc_parent;
1022 ip6mr_update_thresholds(c, ttls);
1023 if (!mrtsock)
1024 c->mfc_flags |= MFC_STATIC;
1025 write_unlock_bh(&mrt_lock);
1026 return 0;
1027 }
1028
1029 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1030 return -EINVAL;
1031
1032 c = ip6mr_cache_alloc();
1033 if (c == NULL)
1034 return -ENOMEM;
1035
1036 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1037 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1038 c->mf6c_parent = mfc->mf6cc_parent;
1039 ip6mr_update_thresholds(c, ttls);
1040 if (!mrtsock)
1041 c->mfc_flags |= MFC_STATIC;
1042
1043 write_lock_bh(&mrt_lock);
1044 c->next = mfc6_cache_array[line];
1045 mfc6_cache_array[line] = c;
1046 write_unlock_bh(&mrt_lock);
1047
1048 /*
1049 * Check to see if we resolved a queued list. If so we
1050 * need to send on the frames and tidy up.
1051 */
1052 spin_lock_bh(&mfc_unres_lock);
1053 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1054 cp = &uc->next) {
1055 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1056 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1057 *cp = uc->next;
1058 if (atomic_dec_and_test(&cache_resolve_queue_len))
1059 del_timer(&ipmr_expire_timer);
1060 break;
1061 }
1062 }
1063 spin_unlock_bh(&mfc_unres_lock);
1064
1065 if (uc) {
1066 ip6mr_cache_resolve(uc, c);
1067 kmem_cache_free(mrt_cachep, uc);
1068 }
1069 return 0;
1070}
1071
1072/*
1073 * Close the multicast socket, and clear the vif tables etc
1074 */
1075
1076static void mroute_clean_tables(struct sock *sk)
1077{
1078 int i;
1079
1080 /*
1081 * Shut down all active vif entries
1082 */
1083 for (i = 0; i < maxvif; i++) {
1084 if (!(vif6_table[i].flags & VIFF_STATIC))
1085 mif6_delete(i);
1086 }
1087
1088 /*
1089 * Wipe the cache
1090 */
1091 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1092 struct mfc6_cache *c, **cp;
1093
1094 cp = &mfc6_cache_array[i];
1095 while ((c = *cp) != NULL) {
1096 if (c->mfc_flags & MFC_STATIC) {
1097 cp = &c->next;
1098 continue;
1099 }
1100 write_lock_bh(&mrt_lock);
1101 *cp = c->next;
1102 write_unlock_bh(&mrt_lock);
1103
1104 kmem_cache_free(mrt_cachep, c);
1105 }
1106 }
1107
1108 if (atomic_read(&cache_resolve_queue_len) != 0) {
1109 struct mfc6_cache *c;
1110
1111 spin_lock_bh(&mfc_unres_lock);
1112 while (mfc_unres_queue != NULL) {
1113 c = mfc_unres_queue;
1114 mfc_unres_queue = c->next;
1115 spin_unlock_bh(&mfc_unres_lock);
1116
1117 ip6mr_destroy_unres(c);
1118
1119 spin_lock_bh(&mfc_unres_lock);
1120 }
1121 spin_unlock_bh(&mfc_unres_lock);
1122 }
1123}
1124
1125static int ip6mr_sk_init(struct sock *sk)
1126{
1127 int err = 0;
1128
1129 rtnl_lock();
1130 write_lock_bh(&mrt_lock);
1131 if (likely(mroute6_socket == NULL))
1132 mroute6_socket = sk;
1133 else
1134 err = -EADDRINUSE;
1135 write_unlock_bh(&mrt_lock);
1136
1137 rtnl_unlock();
1138
1139 return err;
1140}
1141
1142int ip6mr_sk_done(struct sock *sk)
1143{
1144 int err = 0;
1145
1146 rtnl_lock();
1147 if (sk == mroute6_socket) {
1148 write_lock_bh(&mrt_lock);
1149 mroute6_socket = NULL;
1150 write_unlock_bh(&mrt_lock);
1151
1152 mroute_clean_tables(sk);
1153 } else
1154 err = -EACCES;
1155 rtnl_unlock();
1156
1157 return err;
1158}
1159
1160/*
1161 * Socket options and virtual interface manipulation. The whole
1162 * virtual interface system is a complete heap, but unfortunately
1163 * that's how BSD mrouted happens to think. Maybe one day with a proper
1164 * MOSPF/PIM router set up we can clean this up.
1165 */
1166
1167int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1168{
1169 int ret;
1170 struct mif6ctl vif;
1171 struct mf6cctl mfc;
1172 mifi_t mifi;
1173
1174 if (optname != MRT6_INIT) {
1175 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1176 return -EACCES;
1177 }
1178
1179 switch (optname) {
1180 case MRT6_INIT:
1181 if (sk->sk_type != SOCK_RAW ||
1182 inet_sk(sk)->num != IPPROTO_ICMPV6)
1183 return -EOPNOTSUPP;
1184 if (optlen < sizeof(int))
1185 return -EINVAL;
1186
1187 return ip6mr_sk_init(sk);
1188
1189 case MRT6_DONE:
1190 return ip6mr_sk_done(sk);
1191
1192 case MRT6_ADD_MIF:
1193 if (optlen < sizeof(vif))
1194 return -EINVAL;
1195 if (copy_from_user(&vif, optval, sizeof(vif)))
1196 return -EFAULT;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001197 if (vif.mif6c_mifi >= MAXMIFS)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001198 return -ENFILE;
1199 rtnl_lock();
1200 ret = mif6_add(&vif, sk == mroute6_socket);
1201 rtnl_unlock();
1202 return ret;
1203
1204 case MRT6_DEL_MIF:
1205 if (optlen < sizeof(mifi_t))
1206 return -EINVAL;
1207 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1208 return -EFAULT;
1209 rtnl_lock();
1210 ret = mif6_delete(mifi);
1211 rtnl_unlock();
1212 return ret;
1213
1214 /*
1215 * Manipulate the forwarding caches. These live
1216 * in a sort of kernel/user symbiosis.
1217 */
1218 case MRT6_ADD_MFC:
1219 case MRT6_DEL_MFC:
1220 if (optlen < sizeof(mfc))
1221 return -EINVAL;
1222 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1223 return -EFAULT;
1224 rtnl_lock();
1225 if (optname == MRT6_DEL_MFC)
1226 ret = ip6mr_mfc_delete(&mfc);
1227 else
1228 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1229 rtnl_unlock();
1230 return ret;
1231
1232 /*
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001233 * Control PIM assert (to activate pim will activate assert)
1234 */
1235 case MRT6_ASSERT:
1236 {
1237 int v;
1238 if (get_user(v, (int __user *)optval))
1239 return -EFAULT;
1240 mroute_do_assert = !!v;
1241 return 0;
1242 }
1243
1244#ifdef CONFIG_IPV6_PIMSM_V2
1245 case MRT6_PIM:
1246 {
YOSHIFUJI Hideakia9f83bf2008-04-10 15:41:28 +09001247 int v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001248 if (get_user(v, (int __user *)optval))
1249 return -EFAULT;
1250 v = !!v;
1251 rtnl_lock();
1252 ret = 0;
1253 if (v != mroute_do_pim) {
1254 mroute_do_pim = v;
1255 mroute_do_assert = v;
1256 if (mroute_do_pim)
1257 ret = inet6_add_protocol(&pim6_protocol,
1258 IPPROTO_PIM);
1259 else
1260 ret = inet6_del_protocol(&pim6_protocol,
1261 IPPROTO_PIM);
1262 if (ret < 0)
1263 ret = -EAGAIN;
1264 }
1265 rtnl_unlock();
1266 return ret;
1267 }
1268
1269#endif
1270 /*
Rami Rosen7d120c52008-04-23 14:35:13 +03001271 * Spurious command, or MRT6_VERSION which you cannot
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001272 * set.
1273 */
1274 default:
1275 return -ENOPROTOOPT;
1276 }
1277}
1278
1279/*
1280 * Getsock opt support for the multicast routing system.
1281 */
1282
1283int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1284 int __user *optlen)
1285{
1286 int olr;
1287 int val;
1288
1289 switch (optname) {
1290 case MRT6_VERSION:
1291 val = 0x0305;
1292 break;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001293#ifdef CONFIG_IPV6_PIMSM_V2
1294 case MRT6_PIM:
1295 val = mroute_do_pim;
1296 break;
1297#endif
1298 case MRT6_ASSERT:
1299 val = mroute_do_assert;
1300 break;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001301 default:
1302 return -ENOPROTOOPT;
1303 }
1304
1305 if (get_user(olr, optlen))
1306 return -EFAULT;
1307
1308 olr = min_t(int, olr, sizeof(int));
1309 if (olr < 0)
1310 return -EINVAL;
1311
1312 if (put_user(olr, optlen))
1313 return -EFAULT;
1314 if (copy_to_user(optval, &val, olr))
1315 return -EFAULT;
1316 return 0;
1317}
1318
1319/*
1320 * The IP multicast ioctl support routines.
1321 */
1322
1323int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1324{
1325 struct sioc_sg_req6 sr;
1326 struct sioc_mif_req6 vr;
1327 struct mif_device *vif;
1328 struct mfc6_cache *c;
1329
1330 switch (cmd) {
1331 case SIOCGETMIFCNT_IN6:
1332 if (copy_from_user(&vr, arg, sizeof(vr)))
1333 return -EFAULT;
1334 if (vr.mifi >= maxvif)
1335 return -EINVAL;
1336 read_lock(&mrt_lock);
1337 vif = &vif6_table[vr.mifi];
1338 if (MIF_EXISTS(vr.mifi)) {
1339 vr.icount = vif->pkt_in;
1340 vr.ocount = vif->pkt_out;
1341 vr.ibytes = vif->bytes_in;
1342 vr.obytes = vif->bytes_out;
1343 read_unlock(&mrt_lock);
1344
1345 if (copy_to_user(arg, &vr, sizeof(vr)))
1346 return -EFAULT;
1347 return 0;
1348 }
1349 read_unlock(&mrt_lock);
1350 return -EADDRNOTAVAIL;
1351 case SIOCGETSGCNT_IN6:
1352 if (copy_from_user(&sr, arg, sizeof(sr)))
1353 return -EFAULT;
1354
1355 read_lock(&mrt_lock);
1356 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1357 if (c) {
1358 sr.pktcnt = c->mfc_un.res.pkt;
1359 sr.bytecnt = c->mfc_un.res.bytes;
1360 sr.wrong_if = c->mfc_un.res.wrong_if;
1361 read_unlock(&mrt_lock);
1362
1363 if (copy_to_user(arg, &sr, sizeof(sr)))
1364 return -EFAULT;
1365 return 0;
1366 }
1367 read_unlock(&mrt_lock);
1368 return -EADDRNOTAVAIL;
1369 default:
1370 return -ENOIOCTLCMD;
1371 }
1372}
1373
1374
1375static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1376{
Rami Rosen0912ea32008-04-13 23:59:13 -07001377 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001378 return dst_output(skb);
1379}
1380
1381/*
1382 * Processing handlers for ip6mr_forward
1383 */
1384
1385static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1386{
1387 struct ipv6hdr *ipv6h;
1388 struct mif_device *vif = &vif6_table[vifi];
1389 struct net_device *dev;
1390 struct dst_entry *dst;
1391 struct flowi fl;
1392
1393 if (vif->dev == NULL)
1394 goto out_free;
1395
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001396#ifdef CONFIG_IPV6_PIMSM_V2
1397 if (vif->flags & MIFF_REGISTER) {
1398 vif->pkt_out++;
1399 vif->bytes_out += skb->len;
Pavel Emelyanovdc58c782008-05-21 14:17:54 -07001400 vif->dev->stats.tx_bytes += skb->len;
1401 vif->dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001402 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1403 kfree_skb(skb);
1404 return 0;
1405 }
1406#endif
1407
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001408 ipv6h = ipv6_hdr(skb);
1409
1410 fl = (struct flowi) {
1411 .oif = vif->link,
1412 .nl_u = { .ip6_u =
1413 { .daddr = ipv6h->daddr, }
1414 }
1415 };
1416
1417 dst = ip6_route_output(&init_net, NULL, &fl);
1418 if (!dst)
1419 goto out_free;
1420
1421 dst_release(skb->dst);
1422 skb->dst = dst;
1423
1424 /*
1425 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1426 * not only before forwarding, but after forwarding on all output
1427 * interfaces. It is clear, if mrouter runs a multicasting
1428 * program, it should receive packets not depending to what interface
1429 * program is joined.
1430 * If we will not make it, the program will have to join on all
1431 * interfaces. On the other hand, multihoming host (or router, but
1432 * not mrouter) cannot join to more than one interface - it will
1433 * result in receiving multiple packets.
1434 */
1435 dev = vif->dev;
1436 skb->dev = dev;
1437 vif->pkt_out++;
1438 vif->bytes_out += skb->len;
1439
1440 /* We are about to write */
1441 /* XXX: extension headers? */
1442 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1443 goto out_free;
1444
1445 ipv6h = ipv6_hdr(skb);
1446 ipv6h->hop_limit--;
1447
1448 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1449
1450 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1451 ip6mr_forward2_finish);
1452
1453out_free:
1454 kfree_skb(skb);
1455 return 0;
1456}
1457
1458static int ip6mr_find_vif(struct net_device *dev)
1459{
1460 int ct;
1461 for (ct = maxvif - 1; ct >= 0; ct--) {
1462 if (vif6_table[ct].dev == dev)
1463 break;
1464 }
1465 return ct;
1466}
1467
1468static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1469{
1470 int psend = -1;
1471 int vif, ct;
1472
1473 vif = cache->mf6c_parent;
1474 cache->mfc_un.res.pkt++;
1475 cache->mfc_un.res.bytes += skb->len;
1476
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001477 /*
1478 * Wrong interface: drop packet and (maybe) send PIM assert.
1479 */
1480 if (vif6_table[vif].dev != skb->dev) {
1481 int true_vifi;
1482
1483 cache->mfc_un.res.wrong_if++;
1484 true_vifi = ip6mr_find_vif(skb->dev);
1485
1486 if (true_vifi >= 0 && mroute_do_assert &&
1487 /* pimsm uses asserts, when switching from RPT to SPT,
1488 so that we cannot check that packet arrived on an oif.
1489 It is bad, but otherwise we would need to move pretty
1490 large chunk of pimd to kernel. Ough... --ANK
1491 */
1492 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1493 time_after(jiffies,
1494 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1495 cache->mfc_un.res.last_assert = jiffies;
1496 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1497 }
1498 goto dont_forward;
1499 }
1500
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001501 vif6_table[vif].pkt_in++;
1502 vif6_table[vif].bytes_in += skb->len;
1503
1504 /*
1505 * Forward the frame
1506 */
1507 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1508 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1509 if (psend != -1) {
1510 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1511 if (skb2)
1512 ip6mr_forward2(skb2, cache, psend);
1513 }
1514 psend = ct;
1515 }
1516 }
1517 if (psend != -1) {
1518 ip6mr_forward2(skb, cache, psend);
1519 return 0;
1520 }
1521
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001522dont_forward:
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001523 kfree_skb(skb);
1524 return 0;
1525}
1526
1527
1528/*
1529 * Multicast packets for forwarding arrive here
1530 */
1531
1532int ip6_mr_input(struct sk_buff *skb)
1533{
1534 struct mfc6_cache *cache;
1535
1536 read_lock(&mrt_lock);
1537 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1538
1539 /*
1540 * No usable cache entry
1541 */
1542 if (cache == NULL) {
1543 int vif;
1544
1545 vif = ip6mr_find_vif(skb->dev);
1546 if (vif >= 0) {
1547 int err = ip6mr_cache_unresolved(vif, skb);
1548 read_unlock(&mrt_lock);
1549
1550 return err;
1551 }
1552 read_unlock(&mrt_lock);
1553 kfree_skb(skb);
1554 return -ENODEV;
1555 }
1556
1557 ip6_mr_forward(skb, cache);
1558
1559 read_unlock(&mrt_lock);
1560
1561 return 0;
1562}
1563
1564
1565static int
1566ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1567{
1568 int ct;
1569 struct rtnexthop *nhp;
1570 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001571 u8 *b = skb_tail_pointer(skb);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001572 struct rtattr *mp_head;
1573
1574 if (dev)
1575 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1576
1577 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1578
1579 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1580 if (c->mfc_un.res.ttls[ct] < 255) {
1581 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1582 goto rtattr_failure;
1583 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1584 nhp->rtnh_flags = 0;
1585 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1586 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1587 nhp->rtnh_len = sizeof(*nhp);
1588 }
1589 }
1590 mp_head->rta_type = RTA_MULTIPATH;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001591 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001592 rtm->rtm_type = RTN_MULTICAST;
1593 return 1;
1594
1595rtattr_failure:
1596 nlmsg_trim(skb, b);
1597 return -EMSGSIZE;
1598}
1599
1600int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1601{
1602 int err;
1603 struct mfc6_cache *cache;
1604 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1605
1606 read_lock(&mrt_lock);
1607 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1608
1609 if (!cache) {
1610 struct sk_buff *skb2;
1611 struct ipv6hdr *iph;
1612 struct net_device *dev;
1613 int vif;
1614
1615 if (nowait) {
1616 read_unlock(&mrt_lock);
1617 return -EAGAIN;
1618 }
1619
1620 dev = skb->dev;
1621 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1622 read_unlock(&mrt_lock);
1623 return -ENODEV;
1624 }
1625
1626 /* really correct? */
1627 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1628 if (!skb2) {
1629 read_unlock(&mrt_lock);
1630 return -ENOMEM;
1631 }
1632
1633 skb_reset_transport_header(skb2);
1634
1635 skb_put(skb2, sizeof(struct ipv6hdr));
1636 skb_reset_network_header(skb2);
1637
1638 iph = ipv6_hdr(skb2);
1639 iph->version = 0;
1640 iph->priority = 0;
1641 iph->flow_lbl[0] = 0;
1642 iph->flow_lbl[1] = 0;
1643 iph->flow_lbl[2] = 0;
1644 iph->payload_len = 0;
1645 iph->nexthdr = IPPROTO_NONE;
1646 iph->hop_limit = 0;
1647 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1648 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1649
1650 err = ip6mr_cache_unresolved(vif, skb2);
1651 read_unlock(&mrt_lock);
1652
1653 return err;
1654 }
1655
1656 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1657 cache->mfc_flags |= MFC_NOTIFY;
1658
1659 err = ip6mr_fill_mroute(skb, cache, rtm);
1660 read_unlock(&mrt_lock);
1661 return err;
1662}
1663