blob: 0b11b378d89a1b7fa710aeba2aaa7cebae061f5b [file] [log] [blame]
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090030#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090033#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090035#include <linux/init.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090036#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090039#include <net/raw.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/notifier.h>
41#include <linux/if_arp.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090042#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090048#include <linux/pim.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090049#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
52struct sock *mroute6_socket;
53
54
55/* Big lock, protecting vif table, mrt cache and mroute socket state.
56 Note that the changes are semaphored via rtnl_lock.
57 */
58
59static DEFINE_RWLOCK(mrt_lock);
60
61/*
62 * Multicast router control variables
63 */
64
65static struct mif_device vif6_table[MAXMIFS]; /* Devices */
66static int maxvif;
67
68#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090070static int mroute_do_assert; /* Set in PIM assert */
71#ifdef CONFIG_IPV6_PIMSM_V2
72static int mroute_do_pim;
73#else
74#define mroute_do_pim 0
75#endif
76
Rami Rosen6ac7eb02008-04-10 12:40:10 +030077static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090078
79static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
80static atomic_t cache_resolve_queue_len; /* Size of unresolved */
81
82/* Special spinlock for queue of unresolved entries */
83static DEFINE_SPINLOCK(mfc_unres_lock);
84
85/* We return to original Alan's scheme. Hash table of resolved
86 entries is changed only in process context and protected
87 with weak lock mrt_lock. Queue of unresolved entries is protected
88 with strong spinlock mfc_unres_lock.
89
90 In this case data path is free of exclusive locks at all.
91 */
92
93static struct kmem_cache *mrt_cachep __read_mostly;
94
95static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
Rami Rosen6ac7eb02008-04-10 12:40:10 +030096static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090097static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090099#ifdef CONFIG_IPV6_PIMSM_V2
100static struct inet6_protocol pim6_protocol;
101#endif
102
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900103static struct timer_list ipmr_expire_timer;
104
105
106#ifdef CONFIG_PROC_FS
107
108struct ipmr_mfc_iter {
109 struct mfc6_cache **cache;
110 int ct;
111};
112
113
114static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115{
116 struct mfc6_cache *mfc;
117
118 it->cache = mfc6_cache_array;
119 read_lock(&mrt_lock);
120 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 if (pos-- == 0)
123 return mfc;
124 read_unlock(&mrt_lock);
125
126 it->cache = &mfc_unres_queue;
127 spin_lock_bh(&mfc_unres_lock);
128 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 if (pos-- == 0)
130 return mfc;
131 spin_unlock_bh(&mfc_unres_lock);
132
133 it->cache = NULL;
134 return NULL;
135}
136
137
138
139
140/*
141 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142 */
143
144struct ipmr_vif_iter {
145 int ct;
146};
147
148static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 loff_t pos)
150{
151 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 if (!MIF_EXISTS(iter->ct))
153 continue;
154 if (pos-- == 0)
155 return &vif6_table[iter->ct];
156 }
157 return NULL;
158}
159
160static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 __acquires(mrt_lock)
162{
163 read_lock(&mrt_lock);
164 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 : SEQ_START_TOKEN);
166}
167
168static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169{
170 struct ipmr_vif_iter *iter = seq->private;
171
172 ++*pos;
173 if (v == SEQ_START_TOKEN)
174 return ip6mr_vif_seq_idx(iter, 0);
175
176 while (++iter->ct < maxvif) {
177 if (!MIF_EXISTS(iter->ct))
178 continue;
179 return &vif6_table[iter->ct];
180 }
181 return NULL;
182}
183
184static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 __releases(mrt_lock)
186{
187 read_unlock(&mrt_lock);
188}
189
190static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191{
192 if (v == SEQ_START_TOKEN) {
193 seq_puts(seq,
194 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
195 } else {
196 const struct mif_device *vif = v;
197 const char *name = vif->dev ? vif->dev->name : "none";
198
199 seq_printf(seq,
200 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n",
201 vif - vif6_table,
202 name, vif->bytes_in, vif->pkt_in,
203 vif->bytes_out, vif->pkt_out,
204 vif->flags);
205 }
206 return 0;
207}
208
209static struct seq_operations ip6mr_vif_seq_ops = {
210 .start = ip6mr_vif_seq_start,
211 .next = ip6mr_vif_seq_next,
212 .stop = ip6mr_vif_seq_stop,
213 .show = ip6mr_vif_seq_show,
214};
215
216static int ip6mr_vif_open(struct inode *inode, struct file *file)
217{
218 return seq_open_private(file, &ip6mr_vif_seq_ops,
219 sizeof(struct ipmr_vif_iter));
220}
221
222static struct file_operations ip6mr_vif_fops = {
223 .owner = THIS_MODULE,
224 .open = ip6mr_vif_open,
225 .read = seq_read,
226 .llseek = seq_lseek,
227 .release = seq_release,
228};
229
230static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231{
232 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 : SEQ_START_TOKEN);
234}
235
236static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237{
238 struct mfc6_cache *mfc = v;
239 struct ipmr_mfc_iter *it = seq->private;
240
241 ++*pos;
242
243 if (v == SEQ_START_TOKEN)
244 return ipmr_mfc_seq_idx(seq->private, 0);
245
246 if (mfc->next)
247 return mfc->next;
248
249 if (it->cache == &mfc_unres_queue)
250 goto end_of_list;
251
252 BUG_ON(it->cache != mfc6_cache_array);
253
254 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 mfc = mfc6_cache_array[it->ct];
256 if (mfc)
257 return mfc;
258 }
259
260 /* exhausted cache_array, show unresolved */
261 read_unlock(&mrt_lock);
262 it->cache = &mfc_unres_queue;
263 it->ct = 0;
264
265 spin_lock_bh(&mfc_unres_lock);
266 mfc = mfc_unres_queue;
267 if (mfc)
268 return mfc;
269
270 end_of_list:
271 spin_unlock_bh(&mfc_unres_lock);
272 it->cache = NULL;
273
274 return NULL;
275}
276
277static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278{
279 struct ipmr_mfc_iter *it = seq->private;
280
281 if (it->cache == &mfc_unres_queue)
282 spin_unlock_bh(&mfc_unres_lock);
283 else if (it->cache == mfc6_cache_array)
284 read_unlock(&mrt_lock);
285}
286
287static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288{
289 int n;
290
291 if (v == SEQ_START_TOKEN) {
292 seq_puts(seq,
293 "Group "
294 "Origin "
295 "Iif Pkts Bytes Wrong Oifs\n");
296 } else {
297 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private;
299
300 seq_printf(seq,
301 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 mfc->mf6c_parent,
304 mfc->mfc_un.res.pkt,
305 mfc->mfc_un.res.bytes,
306 mfc->mfc_un.res.wrong_if);
307
308 if (it->cache != &mfc_unres_queue) {
309 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) {
311 if (MIF_EXISTS(n) &&
312 mfc->mfc_un.res.ttls[n] < 255)
313 seq_printf(seq,
314 " %2d:%-3d",
315 n, mfc->mfc_un.res.ttls[n]);
316 }
317 }
318 seq_putc(seq, '\n');
319 }
320 return 0;
321}
322
323static struct seq_operations ipmr_mfc_seq_ops = {
324 .start = ipmr_mfc_seq_start,
325 .next = ipmr_mfc_seq_next,
326 .stop = ipmr_mfc_seq_stop,
327 .show = ipmr_mfc_seq_show,
328};
329
330static int ipmr_mfc_open(struct inode *inode, struct file *file)
331{
332 return seq_open_private(file, &ipmr_mfc_seq_ops,
333 sizeof(struct ipmr_mfc_iter));
334}
335
336static struct file_operations ip6mr_mfc_fops = {
337 .owner = THIS_MODULE,
338 .open = ipmr_mfc_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343#endif
344
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900345#ifdef CONFIG_IPV6_PIMSM_V2
346static int reg_vif_num = -1;
347
348static int pim6_rcv(struct sk_buff *skb)
349{
350 struct pimreghdr *pim;
351 struct ipv6hdr *encap;
352 struct net_device *reg_dev = NULL;
353
354 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 goto drop;
356
357 pim = (struct pimreghdr *)skb_transport_header(skb);
358 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 (pim->flags & PIM_NULL_REGISTER) ||
360 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Viroec6b4862008-04-26 22:28:58 -0700361 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900362 goto drop;
363
364 /* check if the inner packet is destined to mcast group */
365 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 sizeof(*pim));
367
368 if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 encap->payload_len == 0 ||
370 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 goto drop;
372
373 read_lock(&mrt_lock);
374 if (reg_vif_num >= 0)
375 reg_dev = vif6_table[reg_vif_num].dev;
376 if (reg_dev)
377 dev_hold(reg_dev);
378 read_unlock(&mrt_lock);
379
380 if (reg_dev == NULL)
381 goto drop;
382
383 skb->mac_header = skb->network_header;
384 skb_pull(skb, (u8 *)encap - skb->data);
385 skb_reset_network_header(skb);
386 skb->dev = reg_dev;
387 skb->protocol = htons(ETH_P_IP);
388 skb->ip_summed = 0;
389 skb->pkt_type = PACKET_HOST;
390 dst_release(skb->dst);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700391 reg_dev->stats.rx_bytes += skb->len;
392 reg_dev->stats.rx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900393 skb->dst = NULL;
394 nf_reset(skb);
395 netif_rx(skb);
396 dev_put(reg_dev);
397 return 0;
398 drop:
399 kfree_skb(skb);
400 return 0;
401}
402
403static struct inet6_protocol pim6_protocol = {
404 .handler = pim6_rcv,
405};
406
407/* Service routines creating virtual interfaces: PIMREG */
408
409static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410{
411 read_lock(&mrt_lock);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700412 dev->stats.tx_bytes += skb->len;
413 dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900414 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 read_unlock(&mrt_lock);
416 kfree_skb(skb);
417 return 0;
418}
419
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900420static void reg_vif_setup(struct net_device *dev)
421{
422 dev->type = ARPHRD_PIMREG;
423 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
424 dev->flags = IFF_NOARP;
425 dev->hard_start_xmit = reg_vif_xmit;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900426 dev->destructor = free_netdev;
427}
428
429static struct net_device *ip6mr_reg_vif(void)
430{
431 struct net_device *dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900432
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700433 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900434 if (dev == NULL)
435 return NULL;
436
437 if (register_netdevice(dev)) {
438 free_netdev(dev);
439 return NULL;
440 }
441 dev->iflink = 0;
442
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900443 if (dev_open(dev))
444 goto failure;
445
446 return dev;
447
448failure:
449 /* allow the register to be completed before unregistering. */
450 rtnl_unlock();
451 rtnl_lock();
452
453 unregister_netdevice(dev);
454 return NULL;
455}
456#endif
457
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900458/*
459 * Delete a VIF entry
460 */
461
462static int mif6_delete(int vifi)
463{
464 struct mif_device *v;
465 struct net_device *dev;
466 if (vifi < 0 || vifi >= maxvif)
467 return -EADDRNOTAVAIL;
468
469 v = &vif6_table[vifi];
470
471 write_lock_bh(&mrt_lock);
472 dev = v->dev;
473 v->dev = NULL;
474
475 if (!dev) {
476 write_unlock_bh(&mrt_lock);
477 return -EADDRNOTAVAIL;
478 }
479
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900480#ifdef CONFIG_IPV6_PIMSM_V2
481 if (vifi == reg_vif_num)
482 reg_vif_num = -1;
483#endif
484
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900485 if (vifi + 1 == maxvif) {
486 int tmp;
487 for (tmp = vifi - 1; tmp >= 0; tmp--) {
488 if (MIF_EXISTS(tmp))
489 break;
490 }
491 maxvif = tmp + 1;
492 }
493
494 write_unlock_bh(&mrt_lock);
495
496 dev_set_allmulti(dev, -1);
497
498 if (v->flags & MIFF_REGISTER)
499 unregister_netdevice(dev);
500
501 dev_put(dev);
502 return 0;
503}
504
505/* Destroy an unresolved cache entry, killing queued skbs
506 and reporting error to netlink readers.
507 */
508
509static void ip6mr_destroy_unres(struct mfc6_cache *c)
510{
511 struct sk_buff *skb;
512
513 atomic_dec(&cache_resolve_queue_len);
514
515 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
516 if (ipv6_hdr(skb)->version == 0) {
517 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
518 nlh->nlmsg_type = NLMSG_ERROR;
519 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
520 skb_trim(skb, nlh->nlmsg_len);
521 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
522 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
523 } else
524 kfree_skb(skb);
525 }
526
527 kmem_cache_free(mrt_cachep, c);
528}
529
530
531/* Single timer process for all the unresolved queue. */
532
533static void ipmr_do_expire_process(unsigned long dummy)
534{
535 unsigned long now = jiffies;
536 unsigned long expires = 10 * HZ;
537 struct mfc6_cache *c, **cp;
538
539 cp = &mfc_unres_queue;
540
541 while ((c = *cp) != NULL) {
542 if (time_after(c->mfc_un.unres.expires, now)) {
543 /* not yet... */
544 unsigned long interval = c->mfc_un.unres.expires - now;
545 if (interval < expires)
546 expires = interval;
547 cp = &c->next;
548 continue;
549 }
550
551 *cp = c->next;
552 ip6mr_destroy_unres(c);
553 }
554
555 if (atomic_read(&cache_resolve_queue_len))
556 mod_timer(&ipmr_expire_timer, jiffies + expires);
557}
558
559static void ipmr_expire_process(unsigned long dummy)
560{
561 if (!spin_trylock(&mfc_unres_lock)) {
562 mod_timer(&ipmr_expire_timer, jiffies + 1);
563 return;
564 }
565
566 if (atomic_read(&cache_resolve_queue_len))
567 ipmr_do_expire_process(dummy);
568
569 spin_unlock(&mfc_unres_lock);
570}
571
572/* Fill oifs list. It is called under write locked mrt_lock. */
573
574static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
575{
576 int vifi;
577
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300578 cache->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900579 cache->mfc_un.res.maxvif = 0;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300580 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900581
582 for (vifi = 0; vifi < maxvif; vifi++) {
583 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
584 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
585 if (cache->mfc_un.res.minvif > vifi)
586 cache->mfc_un.res.minvif = vifi;
587 if (cache->mfc_un.res.maxvif <= vifi)
588 cache->mfc_un.res.maxvif = vifi + 1;
589 }
590 }
591}
592
593static int mif6_add(struct mif6ctl *vifc, int mrtsock)
594{
595 int vifi = vifc->mif6c_mifi;
596 struct mif_device *v = &vif6_table[vifi];
597 struct net_device *dev;
598
599 /* Is vif busy ? */
600 if (MIF_EXISTS(vifi))
601 return -EADDRINUSE;
602
603 switch (vifc->mif6c_flags) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900604#ifdef CONFIG_IPV6_PIMSM_V2
605 case MIFF_REGISTER:
606 /*
607 * Special Purpose VIF in PIM
608 * All the packets will be sent to the daemon
609 */
610 if (reg_vif_num >= 0)
611 return -EADDRINUSE;
612 dev = ip6mr_reg_vif();
613 if (!dev)
614 return -ENOBUFS;
615 break;
616#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900617 case 0:
618 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
619 if (!dev)
620 return -EADDRNOTAVAIL;
621 dev_put(dev);
622 break;
623 default:
624 return -EINVAL;
625 }
626
627 dev_set_allmulti(dev, 1);
628
629 /*
630 * Fill in the VIF structures
631 */
632 v->rate_limit = vifc->vifc_rate_limit;
633 v->flags = vifc->mif6c_flags;
634 if (!mrtsock)
635 v->flags |= VIFF_STATIC;
636 v->threshold = vifc->vifc_threshold;
637 v->bytes_in = 0;
638 v->bytes_out = 0;
639 v->pkt_in = 0;
640 v->pkt_out = 0;
641 v->link = dev->ifindex;
642 if (v->flags & MIFF_REGISTER)
643 v->link = dev->iflink;
644
645 /* And finish update writing critical data */
646 write_lock_bh(&mrt_lock);
647 dev_hold(dev);
648 v->dev = dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900649#ifdef CONFIG_IPV6_PIMSM_V2
650 if (v->flags & MIFF_REGISTER)
651 reg_vif_num = vifi;
652#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900653 if (vifi + 1 > maxvif)
654 maxvif = vifi + 1;
655 write_unlock_bh(&mrt_lock);
656 return 0;
657}
658
659static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
660{
661 int line = MFC6_HASH(mcastgrp, origin);
662 struct mfc6_cache *c;
663
664 for (c = mfc6_cache_array[line]; c; c = c->next) {
665 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
666 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
667 break;
668 }
669 return c;
670}
671
672/*
673 * Allocate a multicast cache entry
674 */
675static struct mfc6_cache *ip6mr_cache_alloc(void)
676{
677 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
678 if (c == NULL)
679 return NULL;
680 memset(c, 0, sizeof(*c));
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300681 c->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900682 return c;
683}
684
685static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
686{
687 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
688 if (c == NULL)
689 return NULL;
690 memset(c, 0, sizeof(*c));
691 skb_queue_head_init(&c->mfc_un.unres.unresolved);
692 c->mfc_un.unres.expires = jiffies + 10 * HZ;
693 return c;
694}
695
696/*
697 * A cache entry has gone into a resolved state from queued
698 */
699
700static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
701{
702 struct sk_buff *skb;
703
704 /*
705 * Play the pending entries through our router
706 */
707
708 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
709 if (ipv6_hdr(skb)->version == 0) {
710 int err;
711 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
712
713 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +0900714 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900715 } else {
716 nlh->nlmsg_type = NLMSG_ERROR;
717 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
718 skb_trim(skb, nlh->nlmsg_len);
719 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
720 }
721 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
722 } else
723 ip6_mr_forward(skb, c);
724 }
725}
726
727/*
728 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
729 * expects the following bizarre scheme.
730 *
731 * Called under mrt_lock.
732 */
733
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300734static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900735{
736 struct sk_buff *skb;
737 struct mrt6msg *msg;
738 int ret;
739
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900740#ifdef CONFIG_IPV6_PIMSM_V2
741 if (assert == MRT6MSG_WHOLEPKT)
742 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
743 +sizeof(*msg));
744 else
745#endif
746 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900747
748 if (!skb)
749 return -ENOBUFS;
750
751 /* I suppose that internal messages
752 * do not require checksums */
753
754 skb->ip_summed = CHECKSUM_UNNECESSARY;
755
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900756#ifdef CONFIG_IPV6_PIMSM_V2
757 if (assert == MRT6MSG_WHOLEPKT) {
758 /* Ugly, but we have no choice with this interface.
759 Duplicate old header, fix length etc.
760 And all this only to mangle msg->im6_msgtype and
761 to set msg->im6_mbz to "mbz" :-)
762 */
763 skb_push(skb, -skb_network_offset(pkt));
764
765 skb_push(skb, sizeof(*msg));
766 skb_reset_transport_header(skb);
767 msg = (struct mrt6msg *)skb_transport_header(skb);
768 msg->im6_mbz = 0;
769 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
770 msg->im6_mif = reg_vif_num;
771 msg->im6_pad = 0;
772 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
773 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
774
775 skb->ip_summed = CHECKSUM_UNNECESSARY;
776 } else
777#endif
778 {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900779 /*
780 * Copy the IP header
781 */
782
783 skb_put(skb, sizeof(struct ipv6hdr));
784 skb_reset_network_header(skb);
785 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
786
787 /*
788 * Add our header
789 */
790 skb_put(skb, sizeof(*msg));
791 skb_reset_transport_header(skb);
792 msg = (struct mrt6msg *)skb_transport_header(skb);
793
794 msg->im6_mbz = 0;
795 msg->im6_msgtype = assert;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300796 msg->im6_mif = mifi;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900797 msg->im6_pad = 0;
798 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
799 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
800
801 skb->dst = dst_clone(pkt->dst);
802 skb->ip_summed = CHECKSUM_UNNECESSARY;
803
804 skb_pull(skb, sizeof(struct ipv6hdr));
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900805 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900806
807 if (mroute6_socket == NULL) {
808 kfree_skb(skb);
809 return -EINVAL;
810 }
811
812 /*
813 * Deliver to user space multicast routing algorithms
814 */
815 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
816 if (net_ratelimit())
817 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
818 kfree_skb(skb);
819 }
820
821 return ret;
822}
823
824/*
825 * Queue a packet for resolution. It gets locked cache entry!
826 */
827
828static int
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300829ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900830{
831 int err;
832 struct mfc6_cache *c;
833
834 spin_lock_bh(&mfc_unres_lock);
835 for (c = mfc_unres_queue; c; c = c->next) {
836 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
837 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
838 break;
839 }
840
841 if (c == NULL) {
842 /*
843 * Create a new entry if allowable
844 */
845
846 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
847 (c = ip6mr_cache_alloc_unres()) == NULL) {
848 spin_unlock_bh(&mfc_unres_lock);
849
850 kfree_skb(skb);
851 return -ENOBUFS;
852 }
853
854 /*
855 * Fill in the new cache entry
856 */
857 c->mf6c_parent = -1;
858 c->mf6c_origin = ipv6_hdr(skb)->saddr;
859 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
860
861 /*
862 * Reflect first query at pim6sd
863 */
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300864 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900865 /* If the report failed throw the cache entry
866 out - Brad Parker
867 */
868 spin_unlock_bh(&mfc_unres_lock);
869
870 kmem_cache_free(mrt_cachep, c);
871 kfree_skb(skb);
872 return err;
873 }
874
875 atomic_inc(&cache_resolve_queue_len);
876 c->next = mfc_unres_queue;
877 mfc_unres_queue = c;
878
879 ipmr_do_expire_process(1);
880 }
881
882 /*
883 * See if we can append the packet
884 */
885 if (c->mfc_un.unres.unresolved.qlen > 3) {
886 kfree_skb(skb);
887 err = -ENOBUFS;
888 } else {
889 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
890 err = 0;
891 }
892
893 spin_unlock_bh(&mfc_unres_lock);
894 return err;
895}
896
897/*
898 * MFC6 cache manipulation by user space
899 */
900
901static int ip6mr_mfc_delete(struct mf6cctl *mfc)
902{
903 int line;
904 struct mfc6_cache *c, **cp;
905
906 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
907
908 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
909 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
910 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
911 write_lock_bh(&mrt_lock);
912 *cp = c->next;
913 write_unlock_bh(&mrt_lock);
914
915 kmem_cache_free(mrt_cachep, c);
916 return 0;
917 }
918 }
919 return -ENOENT;
920}
921
922static int ip6mr_device_event(struct notifier_block *this,
923 unsigned long event, void *ptr)
924{
925 struct net_device *dev = ptr;
926 struct mif_device *v;
927 int ct;
928
929 if (dev_net(dev) != &init_net)
930 return NOTIFY_DONE;
931
932 if (event != NETDEV_UNREGISTER)
933 return NOTIFY_DONE;
934
935 v = &vif6_table[0];
936 for (ct = 0; ct < maxvif; ct++, v++) {
937 if (v->dev == dev)
938 mif6_delete(ct);
939 }
940 return NOTIFY_DONE;
941}
942
943static struct notifier_block ip6_mr_notifier = {
944 .notifier_call = ip6mr_device_event
945};
946
947/*
948 * Setup for IP multicast routing
949 */
950
951void __init ip6_mr_init(void)
952{
953 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
954 sizeof(struct mfc6_cache),
955 0, SLAB_HWCACHE_ALIGN,
956 NULL);
957 if (!mrt_cachep)
958 panic("cannot allocate ip6_mrt_cache");
959
960 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
961 register_netdevice_notifier(&ip6_mr_notifier);
962#ifdef CONFIG_PROC_FS
963 proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
964 proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
965#endif
966}
967
968
969static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
970{
971 int line;
972 struct mfc6_cache *uc, *c, **cp;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300973 unsigned char ttls[MAXMIFS];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900974 int i;
975
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300976 memset(ttls, 255, MAXMIFS);
977 for (i = 0; i < MAXMIFS; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900978 if (IF_ISSET(i, &mfc->mf6cc_ifset))
979 ttls[i] = 1;
980
981 }
982
983 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
984
985 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
986 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
987 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
988 break;
989 }
990
991 if (c != NULL) {
992 write_lock_bh(&mrt_lock);
993 c->mf6c_parent = mfc->mf6cc_parent;
994 ip6mr_update_thresholds(c, ttls);
995 if (!mrtsock)
996 c->mfc_flags |= MFC_STATIC;
997 write_unlock_bh(&mrt_lock);
998 return 0;
999 }
1000
1001 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1002 return -EINVAL;
1003
1004 c = ip6mr_cache_alloc();
1005 if (c == NULL)
1006 return -ENOMEM;
1007
1008 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1009 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1010 c->mf6c_parent = mfc->mf6cc_parent;
1011 ip6mr_update_thresholds(c, ttls);
1012 if (!mrtsock)
1013 c->mfc_flags |= MFC_STATIC;
1014
1015 write_lock_bh(&mrt_lock);
1016 c->next = mfc6_cache_array[line];
1017 mfc6_cache_array[line] = c;
1018 write_unlock_bh(&mrt_lock);
1019
1020 /*
1021 * Check to see if we resolved a queued list. If so we
1022 * need to send on the frames and tidy up.
1023 */
1024 spin_lock_bh(&mfc_unres_lock);
1025 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1026 cp = &uc->next) {
1027 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1028 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1029 *cp = uc->next;
1030 if (atomic_dec_and_test(&cache_resolve_queue_len))
1031 del_timer(&ipmr_expire_timer);
1032 break;
1033 }
1034 }
1035 spin_unlock_bh(&mfc_unres_lock);
1036
1037 if (uc) {
1038 ip6mr_cache_resolve(uc, c);
1039 kmem_cache_free(mrt_cachep, uc);
1040 }
1041 return 0;
1042}
1043
1044/*
1045 * Close the multicast socket, and clear the vif tables etc
1046 */
1047
1048static void mroute_clean_tables(struct sock *sk)
1049{
1050 int i;
1051
1052 /*
1053 * Shut down all active vif entries
1054 */
1055 for (i = 0; i < maxvif; i++) {
1056 if (!(vif6_table[i].flags & VIFF_STATIC))
1057 mif6_delete(i);
1058 }
1059
1060 /*
1061 * Wipe the cache
1062 */
1063 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1064 struct mfc6_cache *c, **cp;
1065
1066 cp = &mfc6_cache_array[i];
1067 while ((c = *cp) != NULL) {
1068 if (c->mfc_flags & MFC_STATIC) {
1069 cp = &c->next;
1070 continue;
1071 }
1072 write_lock_bh(&mrt_lock);
1073 *cp = c->next;
1074 write_unlock_bh(&mrt_lock);
1075
1076 kmem_cache_free(mrt_cachep, c);
1077 }
1078 }
1079
1080 if (atomic_read(&cache_resolve_queue_len) != 0) {
1081 struct mfc6_cache *c;
1082
1083 spin_lock_bh(&mfc_unres_lock);
1084 while (mfc_unres_queue != NULL) {
1085 c = mfc_unres_queue;
1086 mfc_unres_queue = c->next;
1087 spin_unlock_bh(&mfc_unres_lock);
1088
1089 ip6mr_destroy_unres(c);
1090
1091 spin_lock_bh(&mfc_unres_lock);
1092 }
1093 spin_unlock_bh(&mfc_unres_lock);
1094 }
1095}
1096
1097static int ip6mr_sk_init(struct sock *sk)
1098{
1099 int err = 0;
1100
1101 rtnl_lock();
1102 write_lock_bh(&mrt_lock);
1103 if (likely(mroute6_socket == NULL))
1104 mroute6_socket = sk;
1105 else
1106 err = -EADDRINUSE;
1107 write_unlock_bh(&mrt_lock);
1108
1109 rtnl_unlock();
1110
1111 return err;
1112}
1113
1114int ip6mr_sk_done(struct sock *sk)
1115{
1116 int err = 0;
1117
1118 rtnl_lock();
1119 if (sk == mroute6_socket) {
1120 write_lock_bh(&mrt_lock);
1121 mroute6_socket = NULL;
1122 write_unlock_bh(&mrt_lock);
1123
1124 mroute_clean_tables(sk);
1125 } else
1126 err = -EACCES;
1127 rtnl_unlock();
1128
1129 return err;
1130}
1131
1132/*
1133 * Socket options and virtual interface manipulation. The whole
1134 * virtual interface system is a complete heap, but unfortunately
1135 * that's how BSD mrouted happens to think. Maybe one day with a proper
1136 * MOSPF/PIM router set up we can clean this up.
1137 */
1138
1139int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1140{
1141 int ret;
1142 struct mif6ctl vif;
1143 struct mf6cctl mfc;
1144 mifi_t mifi;
1145
1146 if (optname != MRT6_INIT) {
1147 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1148 return -EACCES;
1149 }
1150
1151 switch (optname) {
1152 case MRT6_INIT:
1153 if (sk->sk_type != SOCK_RAW ||
1154 inet_sk(sk)->num != IPPROTO_ICMPV6)
1155 return -EOPNOTSUPP;
1156 if (optlen < sizeof(int))
1157 return -EINVAL;
1158
1159 return ip6mr_sk_init(sk);
1160
1161 case MRT6_DONE:
1162 return ip6mr_sk_done(sk);
1163
1164 case MRT6_ADD_MIF:
1165 if (optlen < sizeof(vif))
1166 return -EINVAL;
1167 if (copy_from_user(&vif, optval, sizeof(vif)))
1168 return -EFAULT;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001169 if (vif.mif6c_mifi >= MAXMIFS)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001170 return -ENFILE;
1171 rtnl_lock();
1172 ret = mif6_add(&vif, sk == mroute6_socket);
1173 rtnl_unlock();
1174 return ret;
1175
1176 case MRT6_DEL_MIF:
1177 if (optlen < sizeof(mifi_t))
1178 return -EINVAL;
1179 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1180 return -EFAULT;
1181 rtnl_lock();
1182 ret = mif6_delete(mifi);
1183 rtnl_unlock();
1184 return ret;
1185
1186 /*
1187 * Manipulate the forwarding caches. These live
1188 * in a sort of kernel/user symbiosis.
1189 */
1190 case MRT6_ADD_MFC:
1191 case MRT6_DEL_MFC:
1192 if (optlen < sizeof(mfc))
1193 return -EINVAL;
1194 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1195 return -EFAULT;
1196 rtnl_lock();
1197 if (optname == MRT6_DEL_MFC)
1198 ret = ip6mr_mfc_delete(&mfc);
1199 else
1200 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1201 rtnl_unlock();
1202 return ret;
1203
1204 /*
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001205 * Control PIM assert (to activate pim will activate assert)
1206 */
1207 case MRT6_ASSERT:
1208 {
1209 int v;
1210 if (get_user(v, (int __user *)optval))
1211 return -EFAULT;
1212 mroute_do_assert = !!v;
1213 return 0;
1214 }
1215
1216#ifdef CONFIG_IPV6_PIMSM_V2
1217 case MRT6_PIM:
1218 {
YOSHIFUJI Hideakia9f83bf2008-04-10 15:41:28 +09001219 int v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001220 if (get_user(v, (int __user *)optval))
1221 return -EFAULT;
1222 v = !!v;
1223 rtnl_lock();
1224 ret = 0;
1225 if (v != mroute_do_pim) {
1226 mroute_do_pim = v;
1227 mroute_do_assert = v;
1228 if (mroute_do_pim)
1229 ret = inet6_add_protocol(&pim6_protocol,
1230 IPPROTO_PIM);
1231 else
1232 ret = inet6_del_protocol(&pim6_protocol,
1233 IPPROTO_PIM);
1234 if (ret < 0)
1235 ret = -EAGAIN;
1236 }
1237 rtnl_unlock();
1238 return ret;
1239 }
1240
1241#endif
1242 /*
Rami Rosen7d120c52008-04-23 14:35:13 +03001243 * Spurious command, or MRT6_VERSION which you cannot
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001244 * set.
1245 */
1246 default:
1247 return -ENOPROTOOPT;
1248 }
1249}
1250
1251/*
1252 * Getsock opt support for the multicast routing system.
1253 */
1254
1255int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1256 int __user *optlen)
1257{
1258 int olr;
1259 int val;
1260
1261 switch (optname) {
1262 case MRT6_VERSION:
1263 val = 0x0305;
1264 break;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001265#ifdef CONFIG_IPV6_PIMSM_V2
1266 case MRT6_PIM:
1267 val = mroute_do_pim;
1268 break;
1269#endif
1270 case MRT6_ASSERT:
1271 val = mroute_do_assert;
1272 break;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001273 default:
1274 return -ENOPROTOOPT;
1275 }
1276
1277 if (get_user(olr, optlen))
1278 return -EFAULT;
1279
1280 olr = min_t(int, olr, sizeof(int));
1281 if (olr < 0)
1282 return -EINVAL;
1283
1284 if (put_user(olr, optlen))
1285 return -EFAULT;
1286 if (copy_to_user(optval, &val, olr))
1287 return -EFAULT;
1288 return 0;
1289}
1290
1291/*
1292 * The IP multicast ioctl support routines.
1293 */
1294
1295int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1296{
1297 struct sioc_sg_req6 sr;
1298 struct sioc_mif_req6 vr;
1299 struct mif_device *vif;
1300 struct mfc6_cache *c;
1301
1302 switch (cmd) {
1303 case SIOCGETMIFCNT_IN6:
1304 if (copy_from_user(&vr, arg, sizeof(vr)))
1305 return -EFAULT;
1306 if (vr.mifi >= maxvif)
1307 return -EINVAL;
1308 read_lock(&mrt_lock);
1309 vif = &vif6_table[vr.mifi];
1310 if (MIF_EXISTS(vr.mifi)) {
1311 vr.icount = vif->pkt_in;
1312 vr.ocount = vif->pkt_out;
1313 vr.ibytes = vif->bytes_in;
1314 vr.obytes = vif->bytes_out;
1315 read_unlock(&mrt_lock);
1316
1317 if (copy_to_user(arg, &vr, sizeof(vr)))
1318 return -EFAULT;
1319 return 0;
1320 }
1321 read_unlock(&mrt_lock);
1322 return -EADDRNOTAVAIL;
1323 case SIOCGETSGCNT_IN6:
1324 if (copy_from_user(&sr, arg, sizeof(sr)))
1325 return -EFAULT;
1326
1327 read_lock(&mrt_lock);
1328 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1329 if (c) {
1330 sr.pktcnt = c->mfc_un.res.pkt;
1331 sr.bytecnt = c->mfc_un.res.bytes;
1332 sr.wrong_if = c->mfc_un.res.wrong_if;
1333 read_unlock(&mrt_lock);
1334
1335 if (copy_to_user(arg, &sr, sizeof(sr)))
1336 return -EFAULT;
1337 return 0;
1338 }
1339 read_unlock(&mrt_lock);
1340 return -EADDRNOTAVAIL;
1341 default:
1342 return -ENOIOCTLCMD;
1343 }
1344}
1345
1346
1347static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1348{
Rami Rosen0912ea32008-04-13 23:59:13 -07001349 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001350 return dst_output(skb);
1351}
1352
1353/*
1354 * Processing handlers for ip6mr_forward
1355 */
1356
1357static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1358{
1359 struct ipv6hdr *ipv6h;
1360 struct mif_device *vif = &vif6_table[vifi];
1361 struct net_device *dev;
1362 struct dst_entry *dst;
1363 struct flowi fl;
1364
1365 if (vif->dev == NULL)
1366 goto out_free;
1367
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001368#ifdef CONFIG_IPV6_PIMSM_V2
1369 if (vif->flags & MIFF_REGISTER) {
1370 vif->pkt_out++;
1371 vif->bytes_out += skb->len;
Pavel Emelyanovdc58c782008-05-21 14:17:54 -07001372 vif->dev->stats.tx_bytes += skb->len;
1373 vif->dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001374 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1375 kfree_skb(skb);
1376 return 0;
1377 }
1378#endif
1379
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001380 ipv6h = ipv6_hdr(skb);
1381
1382 fl = (struct flowi) {
1383 .oif = vif->link,
1384 .nl_u = { .ip6_u =
1385 { .daddr = ipv6h->daddr, }
1386 }
1387 };
1388
1389 dst = ip6_route_output(&init_net, NULL, &fl);
1390 if (!dst)
1391 goto out_free;
1392
1393 dst_release(skb->dst);
1394 skb->dst = dst;
1395
1396 /*
1397 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1398 * not only before forwarding, but after forwarding on all output
1399 * interfaces. It is clear, if mrouter runs a multicasting
1400 * program, it should receive packets not depending to what interface
1401 * program is joined.
1402 * If we will not make it, the program will have to join on all
1403 * interfaces. On the other hand, multihoming host (or router, but
1404 * not mrouter) cannot join to more than one interface - it will
1405 * result in receiving multiple packets.
1406 */
1407 dev = vif->dev;
1408 skb->dev = dev;
1409 vif->pkt_out++;
1410 vif->bytes_out += skb->len;
1411
1412 /* We are about to write */
1413 /* XXX: extension headers? */
1414 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1415 goto out_free;
1416
1417 ipv6h = ipv6_hdr(skb);
1418 ipv6h->hop_limit--;
1419
1420 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1421
1422 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1423 ip6mr_forward2_finish);
1424
1425out_free:
1426 kfree_skb(skb);
1427 return 0;
1428}
1429
1430static int ip6mr_find_vif(struct net_device *dev)
1431{
1432 int ct;
1433 for (ct = maxvif - 1; ct >= 0; ct--) {
1434 if (vif6_table[ct].dev == dev)
1435 break;
1436 }
1437 return ct;
1438}
1439
1440static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1441{
1442 int psend = -1;
1443 int vif, ct;
1444
1445 vif = cache->mf6c_parent;
1446 cache->mfc_un.res.pkt++;
1447 cache->mfc_un.res.bytes += skb->len;
1448
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001449 /*
1450 * Wrong interface: drop packet and (maybe) send PIM assert.
1451 */
1452 if (vif6_table[vif].dev != skb->dev) {
1453 int true_vifi;
1454
1455 cache->mfc_un.res.wrong_if++;
1456 true_vifi = ip6mr_find_vif(skb->dev);
1457
1458 if (true_vifi >= 0 && mroute_do_assert &&
1459 /* pimsm uses asserts, when switching from RPT to SPT,
1460 so that we cannot check that packet arrived on an oif.
1461 It is bad, but otherwise we would need to move pretty
1462 large chunk of pimd to kernel. Ough... --ANK
1463 */
1464 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1465 time_after(jiffies,
1466 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1467 cache->mfc_un.res.last_assert = jiffies;
1468 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1469 }
1470 goto dont_forward;
1471 }
1472
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001473 vif6_table[vif].pkt_in++;
1474 vif6_table[vif].bytes_in += skb->len;
1475
1476 /*
1477 * Forward the frame
1478 */
1479 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1480 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1481 if (psend != -1) {
1482 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1483 if (skb2)
1484 ip6mr_forward2(skb2, cache, psend);
1485 }
1486 psend = ct;
1487 }
1488 }
1489 if (psend != -1) {
1490 ip6mr_forward2(skb, cache, psend);
1491 return 0;
1492 }
1493
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001494dont_forward:
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001495 kfree_skb(skb);
1496 return 0;
1497}
1498
1499
1500/*
1501 * Multicast packets for forwarding arrive here
1502 */
1503
1504int ip6_mr_input(struct sk_buff *skb)
1505{
1506 struct mfc6_cache *cache;
1507
1508 read_lock(&mrt_lock);
1509 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1510
1511 /*
1512 * No usable cache entry
1513 */
1514 if (cache == NULL) {
1515 int vif;
1516
1517 vif = ip6mr_find_vif(skb->dev);
1518 if (vif >= 0) {
1519 int err = ip6mr_cache_unresolved(vif, skb);
1520 read_unlock(&mrt_lock);
1521
1522 return err;
1523 }
1524 read_unlock(&mrt_lock);
1525 kfree_skb(skb);
1526 return -ENODEV;
1527 }
1528
1529 ip6_mr_forward(skb, cache);
1530
1531 read_unlock(&mrt_lock);
1532
1533 return 0;
1534}
1535
1536
1537static int
1538ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1539{
1540 int ct;
1541 struct rtnexthop *nhp;
1542 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001543 u8 *b = skb_tail_pointer(skb);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001544 struct rtattr *mp_head;
1545
1546 if (dev)
1547 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1548
1549 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1550
1551 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1552 if (c->mfc_un.res.ttls[ct] < 255) {
1553 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1554 goto rtattr_failure;
1555 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1556 nhp->rtnh_flags = 0;
1557 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1558 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1559 nhp->rtnh_len = sizeof(*nhp);
1560 }
1561 }
1562 mp_head->rta_type = RTA_MULTIPATH;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001563 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001564 rtm->rtm_type = RTN_MULTICAST;
1565 return 1;
1566
1567rtattr_failure:
1568 nlmsg_trim(skb, b);
1569 return -EMSGSIZE;
1570}
1571
1572int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1573{
1574 int err;
1575 struct mfc6_cache *cache;
1576 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1577
1578 read_lock(&mrt_lock);
1579 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1580
1581 if (!cache) {
1582 struct sk_buff *skb2;
1583 struct ipv6hdr *iph;
1584 struct net_device *dev;
1585 int vif;
1586
1587 if (nowait) {
1588 read_unlock(&mrt_lock);
1589 return -EAGAIN;
1590 }
1591
1592 dev = skb->dev;
1593 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1594 read_unlock(&mrt_lock);
1595 return -ENODEV;
1596 }
1597
1598 /* really correct? */
1599 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1600 if (!skb2) {
1601 read_unlock(&mrt_lock);
1602 return -ENOMEM;
1603 }
1604
1605 skb_reset_transport_header(skb2);
1606
1607 skb_put(skb2, sizeof(struct ipv6hdr));
1608 skb_reset_network_header(skb2);
1609
1610 iph = ipv6_hdr(skb2);
1611 iph->version = 0;
1612 iph->priority = 0;
1613 iph->flow_lbl[0] = 0;
1614 iph->flow_lbl[1] = 0;
1615 iph->flow_lbl[2] = 0;
1616 iph->payload_len = 0;
1617 iph->nexthdr = IPPROTO_NONE;
1618 iph->hop_limit = 0;
1619 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1620 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1621
1622 err = ip6mr_cache_unresolved(vif, skb2);
1623 read_unlock(&mrt_lock);
1624
1625 return err;
1626 }
1627
1628 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1629 cache->mfc_flags |= MFC_NOTIFY;
1630
1631 err = ip6mr_fill_mroute(skb, cache, rtm);
1632 read_unlock(&mrt_lock);
1633 return err;
1634}
1635