2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
84 atomic_t cache_resolve_queue_len;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
93 struct fib_rule common;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache *mrt_cachep __read_mostly;
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
141 struct mr_table *mrt;
143 ipmr_for_each_table(mrt, net) {
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
170 switch (rule->action) {
173 case FR_ACT_UNREACHABLE:
175 case FR_ACT_PROHIBIT:
177 case FR_ACT_BLACKHOLE:
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
234 static int __net_init ipmr_rules_init(struct net *net)
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
256 net->ipv4.mr_rules_ops = ops;
262 fib_rules_unregister(ops);
266 static void __net_exit ipmr_rules_exit(struct net *net)
268 struct mr_table *mrt, *next;
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
275 #define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
278 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
280 return net->ipv4.mrt;
283 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
286 *mrt = net->ipv4.mrt;
290 static int __net_init ipmr_rules_init(struct net *net)
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
296 static void __net_exit ipmr_rules_exit(struct net *net)
298 kfree(net->ipv4.mrt);
302 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
304 struct mr_table *mrt;
307 mrt = ipmr_get_table(net, id);
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 write_pnet(&mrt->net, net);
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 #ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
329 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
335 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
337 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
339 struct net *net = dev_net(dev);
343 dev = __dev_get_by_name(net, "tunl0");
345 const struct net_device_ops *ops = dev->netdev_ops;
347 struct ip_tunnel_parm p;
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
369 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
371 struct net_device *dev;
373 dev = __dev_get_by_name(net, "tunl0");
376 const struct net_device_ops *ops = dev->netdev_ops;
379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
404 dev->flags |= IFF_MULTICAST;
406 in_dev = __in_dev_get_rtnl(dev);
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
421 /* allow the register to be completed before unregistering. */
425 unregister_netdevice(dev);
429 #ifdef CONFIG_IP_PIMSM
431 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
433 struct net *net = dev_net(dev);
434 struct mr_table *mrt;
442 err = ipmr_fib_lookup(net, &fl, &mrt);
446 read_lock(&mrt_lock);
447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
450 read_unlock(&mrt_lock);
455 static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
459 static void reg_vif_setup(struct net_device *dev)
461 dev->type = ARPHRD_PIMREG;
462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
463 dev->flags = IFF_NOARP;
464 dev->netdev_ops = ®_vif_netdev_ops,
465 dev->destructor = free_netdev;
466 dev->features |= NETIF_F_NETNS_LOCAL;
469 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
471 struct net_device *dev;
472 struct in_device *in_dev;
475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
478 sprintf(name, "pimreg%u", mrt->id);
480 dev = alloc_netdev(0, name, reg_vif_setup);
485 dev_net_set(dev, net);
487 if (register_netdevice(dev)) {
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
511 /* allow the register to be completed before unregistering. */
515 unregister_netdevice(dev);
522 * @notify: Set to 1, if the caller is a notifier_call
525 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
526 struct list_head *head)
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
532 if (vifi < 0 || vifi >= mrt->maxvif)
533 return -EADDRNOTAVAIL;
535 v = &mrt->vif_table[vifi];
537 write_lock_bh(&mrt_lock);
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
546 #ifdef CONFIG_IP_PIMSM
547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
551 if (vifi+1 == mrt->maxvif) {
553 for (tmp=vifi-1; tmp>=0; tmp--) {
554 if (VIF_EXISTS(mrt, tmp))
560 write_unlock_bh(&mrt_lock);
562 dev_set_allmulti(dev, -1);
564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
566 ip_rt_multicast_event(in_dev);
569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
570 unregister_netdevice_queue(dev, head);
576 static inline void ipmr_cache_free(struct mfc_cache *c)
578 kmem_cache_free(mrt_cachep, c);
581 /* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
585 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
587 struct net *net = read_pnet(&mrt->net);
591 atomic_dec(&mrt->cache_resolve_queue_len);
593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
594 if (ip_hdr(skb)->version == 0) {
595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
612 /* Timer process for the unresolved queue. */
614 static void ipmr_expire_process(unsigned long arg)
616 struct mr_table *mrt = (struct mr_table *)arg;
618 unsigned long expires;
619 struct mfc_cache *c, *next;
621 if (!spin_trylock(&mfc_unres_lock)) {
622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
626 if (list_empty(&mrt->mfc_unres_queue))
632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
641 ipmr_destroy_unres(mrt, c);
644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
648 spin_unlock(&mfc_unres_lock);
651 /* Fill oifs list. It is called under write locked mrt_lock. */
653 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
664 ttls[vifi] && ttls[vifi] < 255) {
665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
674 static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
677 int vifi = vifc->vifc_vifi;
678 struct vif_device *v = &mrt->vif_table[vifi];
679 struct net_device *dev;
680 struct in_device *in_dev;
684 if (VIF_EXISTS(mrt, vifi))
687 switch (vifc->vifc_flags) {
688 #ifdef CONFIG_IP_PIMSM
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
694 if (mrt->mroute_reg_vif_num >= 0)
696 dev = ipmr_reg_vif(net, mrt);
699 err = dev_set_allmulti(dev, 1);
701 unregister_netdevice(dev);
708 dev = ipmr_new_tunnel(net, vifc);
711 err = dev_set_allmulti(dev, 1);
713 ipmr_del_tunnel(dev, vifc);
719 case VIFF_USE_IFINDEX:
721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
725 return -EADDRNOTAVAIL;
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
731 return -EADDRNOTAVAIL;
732 err = dev_set_allmulti(dev, 1);
742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
744 return -EADDRNOTAVAIL;
746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
747 ip_rt_multicast_event(in_dev);
750 * Fill in the VIF structures
752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
757 v->flags |= VIFF_STATIC;
758 v->threshold = vifc->vifc_threshold;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
770 #ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
772 mrt->mroute_reg_vif_num = vifi;
774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
776 write_unlock_bh(&mrt_lock);
780 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
784 int line = MFC_HASH(mcastgrp, origin);
787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
795 * Allocate a multicast cache entry
797 static struct mfc_cache *ipmr_cache_alloc(void)
799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
802 c->mfc_un.res.minvif = MAXVIFS;
806 static struct mfc_cache *ipmr_cache_alloc_unres(void)
808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
817 * A cache entry has gone into a resolved state from queued
820 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
827 * Play the pending entries through our router
830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
831 if (ip_hdr(skb)->version == 0) {
832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
848 ip_mr_forward(net, mrt, skb, c, 0);
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
856 * Called under mrt_lock.
859 static int ipmr_cache_report(struct mr_table *mrt,
860 struct sk_buff *pkt, vifi_t vifi, int assert)
863 const int ihl = ip_hdrlen(pkt);
864 struct igmphdr *igmp;
868 #ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
873 skb = alloc_skb(128, GFP_ATOMIC);
878 #ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
887 skb_reset_transport_header(skb);
888 msg = (struct igmpmsg *)skb_network_header(skb);
889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
892 msg->im_vif = mrt->mroute_reg_vif_num;
893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
904 skb->network_header = skb->tail;
906 skb_copy_to_linear_data(skb, pkt->data, ihl);
907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
918 msg->im_msgtype = assert;
920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
921 skb->transport_header = skb->network_header;
924 if (mrt->mroute_sk == NULL) {
932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
943 * Queue a packet for resolution. It gets locked cache entry!
947 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
952 const struct iphdr *iph = ip_hdr(skb);
954 spin_lock_bh(&mfc_unres_lock);
955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
956 if (c->mfc_mcastgrp == iph->daddr &&
957 c->mfc_origin == iph->saddr) {
965 * Create a new entry if allowable
968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
969 (c = ipmr_cache_alloc_unres()) == NULL) {
970 spin_unlock_bh(&mfc_unres_lock);
977 * Fill in the new cache entry
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
984 * Reflect first query at mrouted.
986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
988 /* If the report failed throw the cache entry
991 spin_unlock_bh(&mfc_unres_lock);
998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
1001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1006 * See if we can append the packet
1008 if (c->mfc_un.unres.unresolved.qlen>3) {
1012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1016 spin_unlock_bh(&mfc_unres_lock);
1021 * MFC cache manipulation by user space mroute daemon
1024 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1027 struct mfc_cache *c, *next;
1029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1032 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034 write_lock_bh(&mrt_lock);
1036 write_unlock_bh(&mrt_lock);
1045 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046 struct mfcctl *mfc, int mrtsock)
1050 struct mfc_cache *uc, *c;
1052 if (mfc->mfcc_parent >= MAXVIFS)
1055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1058 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1066 write_lock_bh(&mrt_lock);
1067 c->mfc_parent = mfc->mfcc_parent;
1068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1070 c->mfc_flags |= MFC_STATIC;
1071 write_unlock_bh(&mrt_lock);
1075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1078 c = ipmr_cache_alloc();
1082 c->mfc_origin = mfc->mfcc_origin.s_addr;
1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084 c->mfc_parent = mfc->mfcc_parent;
1085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1087 c->mfc_flags |= MFC_STATIC;
1089 write_lock_bh(&mrt_lock);
1090 list_add(&c->list, &mrt->mfc_cache_array[line]);
1091 write_unlock_bh(&mrt_lock);
1094 * Check to see if we resolved a queued list. If so we
1095 * need to send on the frames and tidy up.
1098 spin_lock_bh(&mfc_unres_lock);
1099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1100 if (uc->mfc_origin == c->mfc_origin &&
1101 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1102 list_del(&uc->list);
1103 atomic_dec(&mrt->cache_resolve_queue_len);
1108 if (list_empty(&mrt->mfc_unres_queue))
1109 del_timer(&mrt->ipmr_expire_timer);
1110 spin_unlock_bh(&mfc_unres_lock);
1113 ipmr_cache_resolve(net, mrt, uc, c);
1114 ipmr_cache_free(uc);
1120 * Close the multicast socket, and clear the vif tables etc
1123 static void mroute_clean_tables(struct mr_table *mrt)
1127 struct mfc_cache *c, *next;
1130 * Shut down all active vif entries
1132 for (i = 0; i < mrt->maxvif; i++) {
1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134 vif_delete(mrt, i, 0, &list);
1136 unregister_netdevice_many(&list);
1141 for (i = 0; i < MFC_LINES; i++) {
1142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1143 if (c->mfc_flags&MFC_STATIC)
1145 write_lock_bh(&mrt_lock);
1147 write_unlock_bh(&mrt_lock);
1153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1154 spin_lock_bh(&mfc_unres_lock);
1155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1157 ipmr_destroy_unres(mrt, c);
1159 spin_unlock_bh(&mfc_unres_lock);
1163 static void mrtsock_destruct(struct sock *sk)
1165 struct net *net = sock_net(sk);
1166 struct mr_table *mrt;
1169 ipmr_for_each_table(mrt, net) {
1170 if (sk == mrt->mroute_sk) {
1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1173 write_lock_bh(&mrt_lock);
1174 mrt->mroute_sk = NULL;
1175 write_unlock_bh(&mrt_lock);
1177 mroute_clean_tables(mrt);
1184 * Socket options and virtual interface manipulation. The whole
1185 * virtual interface system is a complete heap, but unfortunately
1186 * that's how BSD mrouted happens to think. Maybe one day with a proper
1187 * MOSPF/PIM router set up we can clean this up.
1190 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1195 struct net *net = sock_net(sk);
1196 struct mr_table *mrt;
1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1202 if (optname != MRT_INIT) {
1203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1209 if (sk->sk_type != SOCK_RAW ||
1210 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1212 if (optlen != sizeof(int))
1213 return -ENOPROTOOPT;
1216 if (mrt->mroute_sk) {
1221 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1223 write_lock_bh(&mrt_lock);
1224 mrt->mroute_sk = sk;
1225 write_unlock_bh(&mrt_lock);
1227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232 if (sk != mrt->mroute_sk)
1234 return ip_ra_control(sk, 0, NULL);
1237 if (optlen != sizeof(vif))
1239 if (copy_from_user(&vif, optval, sizeof(vif)))
1241 if (vif.vifc_vifi >= MAXVIFS)
1244 if (optname == MRT_ADD_VIF) {
1245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1253 * Manipulate the forwarding caches. These live
1254 * in a sort of kernel/user symbiosis.
1258 if (optlen != sizeof(mfc))
1260 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1263 if (optname == MRT_DEL_MFC)
1264 ret = ipmr_mfc_delete(mrt, &mfc);
1266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1270 * Control PIM assert.
1275 if (get_user(v,(int __user *)optval))
1277 mrt->mroute_do_assert = (v) ? 1 : 0;
1280 #ifdef CONFIG_IP_PIMSM
1285 if (get_user(v,(int __user *)optval))
1291 if (v != mrt->mroute_do_pim) {
1292 mrt->mroute_do_pim = v;
1293 mrt->mroute_do_assert = v;
1299 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304 if (optlen != sizeof(u32))
1306 if (get_user(v, (u32 __user *)optval))
1308 if (sk == mrt->mroute_sk)
1313 if (!ipmr_new_table(net, v))
1315 raw_sk(sk)->ipmr_table = v;
1321 * Spurious command, or MRT_VERSION which you cannot
1325 return -ENOPROTOOPT;
1330 * Getsock opt support for the multicast routing system.
1333 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1337 struct net *net = sock_net(sk);
1338 struct mr_table *mrt;
1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1344 if (optname != MRT_VERSION &&
1345 #ifdef CONFIG_IP_PIMSM
1348 optname!=MRT_ASSERT)
1349 return -ENOPROTOOPT;
1351 if (get_user(olr, optlen))
1354 olr = min_t(unsigned int, olr, sizeof(int));
1358 if (put_user(olr, optlen))
1360 if (optname == MRT_VERSION)
1362 #ifdef CONFIG_IP_PIMSM
1363 else if (optname == MRT_PIM)
1364 val = mrt->mroute_do_pim;
1367 val = mrt->mroute_do_assert;
1368 if (copy_to_user(optval, &val, olr))
1374 * The IP multicast ioctl support routines.
1377 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1379 struct sioc_sg_req sr;
1380 struct sioc_vif_req vr;
1381 struct vif_device *vif;
1382 struct mfc_cache *c;
1383 struct net *net = sock_net(sk);
1384 struct mr_table *mrt;
1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1392 if (copy_from_user(&vr, arg, sizeof(vr)))
1394 if (vr.vifi >= mrt->maxvif)
1396 read_lock(&mrt_lock);
1397 vif = &mrt->vif_table[vr.vifi];
1398 if (VIF_EXISTS(mrt, vr.vifi)) {
1399 vr.icount = vif->pkt_in;
1400 vr.ocount = vif->pkt_out;
1401 vr.ibytes = vif->bytes_in;
1402 vr.obytes = vif->bytes_out;
1403 read_unlock(&mrt_lock);
1405 if (copy_to_user(arg, &vr, sizeof(vr)))
1409 read_unlock(&mrt_lock);
1410 return -EADDRNOTAVAIL;
1412 if (copy_from_user(&sr, arg, sizeof(sr)))
1415 read_lock(&mrt_lock);
1416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1418 sr.pktcnt = c->mfc_un.res.pkt;
1419 sr.bytecnt = c->mfc_un.res.bytes;
1420 sr.wrong_if = c->mfc_un.res.wrong_if;
1421 read_unlock(&mrt_lock);
1423 if (copy_to_user(arg, &sr, sizeof(sr)))
1427 read_unlock(&mrt_lock);
1428 return -EADDRNOTAVAIL;
1430 return -ENOIOCTLCMD;
1435 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1437 struct net_device *dev = ptr;
1438 struct net *net = dev_net(dev);
1439 struct mr_table *mrt;
1440 struct vif_device *v;
1444 if (event != NETDEV_UNREGISTER)
1447 ipmr_for_each_table(mrt, net) {
1448 v = &mrt->vif_table[0];
1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1451 vif_delete(mrt, ct, 1, &list);
1454 unregister_netdevice_many(&list);
1459 static struct notifier_block ip_mr_notifier = {
1460 .notifier_call = ipmr_device_event,
1464 * Encapsulate a packet by attaching a valid IPIP header to it.
1465 * This avoids tunnel drivers and other mess and gives us the speed so
1466 * important for multicast video.
1469 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1472 struct iphdr *old_iph = ip_hdr(skb);
1474 skb_push(skb, sizeof(struct iphdr));
1475 skb->transport_header = skb->network_header;
1476 skb_reset_network_header(skb);
1480 iph->tos = old_iph->tos;
1481 iph->ttl = old_iph->ttl;
1485 iph->protocol = IPPROTO_IPIP;
1487 iph->tot_len = htons(skb->len);
1488 ip_select_ident(iph, skb_dst(skb), NULL);
1491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1495 static inline int ipmr_forward_finish(struct sk_buff *skb)
1497 struct ip_options * opt = &(IPCB(skb)->opt);
1499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1501 if (unlikely(opt->optlen))
1502 ip_forward_options(skb);
1504 return dst_output(skb);
1508 * Processing handlers for ipmr_forward
1511 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1514 const struct iphdr *iph = ip_hdr(skb);
1515 struct vif_device *vif = &mrt->vif_table[vifi];
1516 struct net_device *dev;
1520 if (vif->dev == NULL)
1523 #ifdef CONFIG_IP_PIMSM
1524 if (vif->flags & VIFF_REGISTER) {
1526 vif->bytes_out += skb->len;
1527 vif->dev->stats.tx_bytes += skb->len;
1528 vif->dev->stats.tx_packets++;
1529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1534 if (vif->flags&VIFF_TUNNEL) {
1535 struct flowi fl = { .oif = vif->link,
1537 { .daddr = vif->remote,
1538 .saddr = vif->local,
1539 .tos = RT_TOS(iph->tos) } },
1540 .proto = IPPROTO_IPIP };
1541 if (ip_route_output_key(net, &rt, &fl))
1543 encap = sizeof(struct iphdr);
1545 struct flowi fl = { .oif = vif->link,
1547 { .daddr = iph->daddr,
1548 .tos = RT_TOS(iph->tos) } },
1549 .proto = IPPROTO_IPIP };
1550 if (ip_route_output_key(net, &rt, &fl))
1554 dev = rt->u.dst.dev;
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear
1562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1569 if (skb_cow(skb, encap)) {
1575 vif->bytes_out += skb->len;
1578 skb_dst_set(skb, &rt->u.dst);
1579 ip_decrease_ttl(ip_hdr(skb));
1581 /* FIXME: forward and output firewalls used to be called here.
1582 * What do we do with netfilter? -- RR */
1583 if (vif->flags & VIFF_TUNNEL) {
1584 ip_encap(skb, vif->local, vif->remote);
1585 /* FIXME: extra output firewall step used to be here. --RR */
1586 vif->dev->stats.tx_packets++;
1587 vif->dev->stats.tx_bytes += skb->len;
1590 IPCB(skb)->flags |= IPSKB_FORWARDED;
1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594 * not only before forwarding, but after forwarding on all output
1595 * interfaces. It is clear, if mrouter runs a multicasting
1596 * program, it should receive packets not depending to what interface
1597 * program is joined.
1598 * If we will not make it, the program will have to join on all
1599 * interfaces. On the other hand, multihoming host (or router, but
1600 * not mrouter) cannot join to more than one interface - it will
1601 * result in receiving multiple packets.
1603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1604 ipmr_forward_finish);
1612 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1616 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1617 if (mrt->vif_table[ct].dev == dev)
1623 /* "local" means that we should preserve one skb (for local delivery) */
1625 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1626 struct sk_buff *skb, struct mfc_cache *cache,
1632 vif = cache->mfc_parent;
1633 cache->mfc_un.res.pkt++;
1634 cache->mfc_un.res.bytes += skb->len;
1637 * Wrong interface: drop packet and (maybe) send PIM assert.
1639 if (mrt->vif_table[vif].dev != skb->dev) {
1642 if (skb_rtable(skb)->fl.iif == 0) {
1643 /* It is our own packet, looped back.
1644 Very complicated situation...
1646 The best workaround until routing daemons will be
1647 fixed is not to redistribute packet, if it was
1648 send through wrong interface. It means, that
1649 multicast applications WILL NOT work for
1650 (S,G), which have default multicast route pointing
1651 to wrong oif. In any case, it is not a good
1652 idea to use multicasting applications on router.
1657 cache->mfc_un.res.wrong_if++;
1658 true_vifi = ipmr_find_vif(mrt, skb->dev);
1660 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1661 /* pimsm uses asserts, when switching from RPT to SPT,
1662 so that we cannot check that packet arrived on an oif.
1663 It is bad, but otherwise we would need to move pretty
1664 large chunk of pimd to kernel. Ough... --ANK
1666 (mrt->mroute_do_pim ||
1667 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1669 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1670 cache->mfc_un.res.last_assert = jiffies;
1671 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1676 mrt->vif_table[vif].pkt_in++;
1677 mrt->vif_table[vif].bytes_in += skb->len;
1682 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1683 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1685 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1687 ipmr_queue_xmit(net, mrt, skb2, cache,
1695 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1697 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1699 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1712 * Multicast packets for forwarding arrive here
1715 int ip_mr_input(struct sk_buff *skb)
1717 struct mfc_cache *cache;
1718 struct net *net = dev_net(skb->dev);
1719 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1720 struct mr_table *mrt;
1723 /* Packet is looped back after forward, it should not be
1724 forwarded second time, but still can be delivered locally.
1726 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1729 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1734 if (IPCB(skb)->opt.router_alert) {
1735 if (ip_call_ra_chain(skb))
1737 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1738 /* IGMPv1 (and broken IGMPv2 implementations sort of
1739 Cisco IOS <= 11.2(8)) do not put router alert
1740 option to IGMP packets destined to routable
1741 groups. It is very bad, because it means
1742 that we can forward NO IGMP messages.
1744 read_lock(&mrt_lock);
1745 if (mrt->mroute_sk) {
1747 raw_rcv(mrt->mroute_sk, skb);
1748 read_unlock(&mrt_lock);
1751 read_unlock(&mrt_lock);
1755 read_lock(&mrt_lock);
1756 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1759 * No usable cache entry
1761 if (cache == NULL) {
1765 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1766 ip_local_deliver(skb);
1768 read_unlock(&mrt_lock);
1774 vif = ipmr_find_vif(mrt, skb->dev);
1776 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1777 read_unlock(&mrt_lock);
1781 read_unlock(&mrt_lock);
1786 ip_mr_forward(net, mrt, skb, cache, local);
1788 read_unlock(&mrt_lock);
1791 return ip_local_deliver(skb);
1797 return ip_local_deliver(skb);
1802 #ifdef CONFIG_IP_PIMSM
1803 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1804 unsigned int pimlen)
1806 struct net_device *reg_dev = NULL;
1807 struct iphdr *encap;
1809 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1812 a. packet is really destinted to a multicast group
1813 b. packet is not a NULL-REGISTER
1814 c. packet is not truncated
1816 if (!ipv4_is_multicast(encap->daddr) ||
1817 encap->tot_len == 0 ||
1818 ntohs(encap->tot_len) + pimlen > skb->len)
1821 read_lock(&mrt_lock);
1822 if (mrt->mroute_reg_vif_num >= 0)
1823 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1826 read_unlock(&mrt_lock);
1828 if (reg_dev == NULL)
1831 skb->mac_header = skb->network_header;
1832 skb_pull(skb, (u8*)encap - skb->data);
1833 skb_reset_network_header(skb);
1835 skb->protocol = htons(ETH_P_IP);
1837 skb->pkt_type = PACKET_HOST;
1839 reg_dev->stats.rx_bytes += skb->len;
1840 reg_dev->stats.rx_packets++;
1849 #ifdef CONFIG_IP_PIMSM_V1
1851 * Handle IGMP messages of PIMv1
1854 int pim_rcv_v1(struct sk_buff * skb)
1856 struct igmphdr *pim;
1857 struct net *net = dev_net(skb->dev);
1858 struct mr_table *mrt;
1860 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1863 pim = igmp_hdr(skb);
1865 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1868 if (!mrt->mroute_do_pim ||
1869 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1872 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1880 #ifdef CONFIG_IP_PIMSM_V2
1881 static int pim_rcv(struct sk_buff * skb)
1883 struct pimreghdr *pim;
1884 struct net *net = dev_net(skb->dev);
1885 struct mr_table *mrt;
1887 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1890 pim = (struct pimreghdr *)skb_transport_header(skb);
1891 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1892 (pim->flags&PIM_NULL_REGISTER) ||
1893 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1894 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1897 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1900 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1908 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1909 struct mfc_cache *c, struct rtmsg *rtm)
1912 struct rtnexthop *nhp;
1913 u8 *b = skb_tail_pointer(skb);
1914 struct rtattr *mp_head;
1916 /* If cache is unresolved, don't try to parse IIF and OIF */
1917 if (c->mfc_parent > MAXVIFS)
1920 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1928 goto rtattr_failure;
1929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1930 nhp->rtnh_flags = 0;
1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1933 nhp->rtnh_len = sizeof(*nhp);
1936 mp_head->rta_type = RTA_MULTIPATH;
1937 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1938 rtm->rtm_type = RTN_MULTICAST;
1946 int ipmr_get_route(struct net *net,
1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1950 struct mr_table *mrt;
1951 struct mfc_cache *cache;
1952 struct rtable *rt = skb_rtable(skb);
1954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1958 read_lock(&mrt_lock);
1959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1961 if (cache == NULL) {
1962 struct sk_buff *skb2;
1964 struct net_device *dev;
1968 read_unlock(&mrt_lock);
1973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1974 read_unlock(&mrt_lock);
1977 skb2 = skb_clone(skb, GFP_ATOMIC);
1979 read_unlock(&mrt_lock);
1983 skb_push(skb2, sizeof(struct iphdr));
1984 skb_reset_network_header(skb2);
1986 iph->ihl = sizeof(struct iphdr) >> 2;
1987 iph->saddr = rt->rt_src;
1988 iph->daddr = rt->rt_dst;
1990 err = ipmr_cache_unresolved(mrt, vif, skb2);
1991 read_unlock(&mrt_lock);
1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1996 cache->mfc_flags |= MFC_NOTIFY;
1997 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1998 read_unlock(&mrt_lock);
2002 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2003 u32 pid, u32 seq, struct mfc_cache *c)
2005 struct nlmsghdr *nlh;
2008 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2012 rtm = nlmsg_data(nlh);
2013 rtm->rtm_family = RTNL_FAMILY_IPMR;
2014 rtm->rtm_dst_len = 32;
2015 rtm->rtm_src_len = 32;
2017 rtm->rtm_table = mrt->id;
2018 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2019 rtm->rtm_type = RTN_MULTICAST;
2020 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2021 rtm->rtm_protocol = RTPROT_UNSPEC;
2024 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2025 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2027 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2028 goto nla_put_failure;
2030 return nlmsg_end(skb, nlh);
2033 nlmsg_cancel(skb, nlh);
2037 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2039 struct net *net = sock_net(skb->sk);
2040 struct mr_table *mrt;
2041 struct mfc_cache *mfc;
2042 unsigned int t = 0, s_t;
2043 unsigned int h = 0, s_h;
2044 unsigned int e = 0, s_e;
2050 read_lock(&mrt_lock);
2051 ipmr_for_each_table(mrt, net) {
2056 for (h = s_h; h < MFC_LINES; h++) {
2057 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2060 if (ipmr_fill_mroute(mrt, skb,
2061 NETLINK_CB(cb->skb).pid,
2075 read_unlock(&mrt_lock);
2084 #ifdef CONFIG_PROC_FS
2086 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2088 struct ipmr_vif_iter {
2089 struct seq_net_private p;
2090 struct mr_table *mrt;
2094 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2095 struct ipmr_vif_iter *iter,
2098 struct mr_table *mrt = iter->mrt;
2100 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2101 if (!VIF_EXISTS(mrt, iter->ct))
2104 return &mrt->vif_table[iter->ct];
2109 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2110 __acquires(mrt_lock)
2112 struct ipmr_vif_iter *iter = seq->private;
2113 struct net *net = seq_file_net(seq);
2114 struct mr_table *mrt;
2116 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2118 return ERR_PTR(-ENOENT);
2122 read_lock(&mrt_lock);
2123 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2127 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2129 struct ipmr_vif_iter *iter = seq->private;
2130 struct net *net = seq_file_net(seq);
2131 struct mr_table *mrt = iter->mrt;
2134 if (v == SEQ_START_TOKEN)
2135 return ipmr_vif_seq_idx(net, iter, 0);
2137 while (++iter->ct < mrt->maxvif) {
2138 if (!VIF_EXISTS(mrt, iter->ct))
2140 return &mrt->vif_table[iter->ct];
2145 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2146 __releases(mrt_lock)
2148 read_unlock(&mrt_lock);
2151 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2153 struct ipmr_vif_iter *iter = seq->private;
2154 struct mr_table *mrt = iter->mrt;
2156 if (v == SEQ_START_TOKEN) {
2158 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2160 const struct vif_device *vif = v;
2161 const char *name = vif->dev ? vif->dev->name : "none";
2164 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2165 vif - mrt->vif_table,
2166 name, vif->bytes_in, vif->pkt_in,
2167 vif->bytes_out, vif->pkt_out,
2168 vif->flags, vif->local, vif->remote);
2173 static const struct seq_operations ipmr_vif_seq_ops = {
2174 .start = ipmr_vif_seq_start,
2175 .next = ipmr_vif_seq_next,
2176 .stop = ipmr_vif_seq_stop,
2177 .show = ipmr_vif_seq_show,
2180 static int ipmr_vif_open(struct inode *inode, struct file *file)
2182 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2183 sizeof(struct ipmr_vif_iter));
2186 static const struct file_operations ipmr_vif_fops = {
2187 .owner = THIS_MODULE,
2188 .open = ipmr_vif_open,
2190 .llseek = seq_lseek,
2191 .release = seq_release_net,
2194 struct ipmr_mfc_iter {
2195 struct seq_net_private p;
2196 struct mr_table *mrt;
2197 struct list_head *cache;
2202 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2203 struct ipmr_mfc_iter *it, loff_t pos)
2205 struct mr_table *mrt = it->mrt;
2206 struct mfc_cache *mfc;
2208 read_lock(&mrt_lock);
2209 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2210 it->cache = &mrt->mfc_cache_array[it->ct];
2211 list_for_each_entry(mfc, it->cache, list)
2215 read_unlock(&mrt_lock);
2217 spin_lock_bh(&mfc_unres_lock);
2218 it->cache = &mrt->mfc_unres_queue;
2219 list_for_each_entry(mfc, it->cache, list)
2222 spin_unlock_bh(&mfc_unres_lock);
2229 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2231 struct ipmr_mfc_iter *it = seq->private;
2232 struct net *net = seq_file_net(seq);
2233 struct mr_table *mrt;
2235 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2237 return ERR_PTR(-ENOENT);
2242 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2246 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2248 struct mfc_cache *mfc = v;
2249 struct ipmr_mfc_iter *it = seq->private;
2250 struct net *net = seq_file_net(seq);
2251 struct mr_table *mrt = it->mrt;
2255 if (v == SEQ_START_TOKEN)
2256 return ipmr_mfc_seq_idx(net, seq->private, 0);
2258 if (mfc->list.next != it->cache)
2259 return list_entry(mfc->list.next, struct mfc_cache, list);
2261 if (it->cache == &mrt->mfc_unres_queue)
2264 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2266 while (++it->ct < MFC_LINES) {
2267 it->cache = &mrt->mfc_cache_array[it->ct];
2268 if (list_empty(it->cache))
2270 return list_first_entry(it->cache, struct mfc_cache, list);
2273 /* exhausted cache_array, show unresolved */
2274 read_unlock(&mrt_lock);
2275 it->cache = &mrt->mfc_unres_queue;
2278 spin_lock_bh(&mfc_unres_lock);
2279 if (!list_empty(it->cache))
2280 return list_first_entry(it->cache, struct mfc_cache, list);
2283 spin_unlock_bh(&mfc_unres_lock);
2289 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2291 struct ipmr_mfc_iter *it = seq->private;
2292 struct mr_table *mrt = it->mrt;
2294 if (it->cache == &mrt->mfc_unres_queue)
2295 spin_unlock_bh(&mfc_unres_lock);
2296 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2297 read_unlock(&mrt_lock);
2300 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2304 if (v == SEQ_START_TOKEN) {
2306 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2308 const struct mfc_cache *mfc = v;
2309 const struct ipmr_mfc_iter *it = seq->private;
2310 const struct mr_table *mrt = it->mrt;
2312 seq_printf(seq, "%08X %08X %-3hd",
2313 (__force u32) mfc->mfc_mcastgrp,
2314 (__force u32) mfc->mfc_origin,
2317 if (it->cache != &mrt->mfc_unres_queue) {
2318 seq_printf(seq, " %8lu %8lu %8lu",
2319 mfc->mfc_un.res.pkt,
2320 mfc->mfc_un.res.bytes,
2321 mfc->mfc_un.res.wrong_if);
2322 for (n = mfc->mfc_un.res.minvif;
2323 n < mfc->mfc_un.res.maxvif; n++ ) {
2324 if (VIF_EXISTS(mrt, n) &&
2325 mfc->mfc_un.res.ttls[n] < 255)
2328 n, mfc->mfc_un.res.ttls[n]);
2331 /* unresolved mfc_caches don't contain
2332 * pkt, bytes and wrong_if values
2334 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2336 seq_putc(seq, '\n');
2341 static const struct seq_operations ipmr_mfc_seq_ops = {
2342 .start = ipmr_mfc_seq_start,
2343 .next = ipmr_mfc_seq_next,
2344 .stop = ipmr_mfc_seq_stop,
2345 .show = ipmr_mfc_seq_show,
2348 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2350 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2351 sizeof(struct ipmr_mfc_iter));
2354 static const struct file_operations ipmr_mfc_fops = {
2355 .owner = THIS_MODULE,
2356 .open = ipmr_mfc_open,
2358 .llseek = seq_lseek,
2359 .release = seq_release_net,
2363 #ifdef CONFIG_IP_PIMSM_V2
2364 static const struct net_protocol pim_protocol = {
2372 * Setup for IP multicast routing
2374 static int __net_init ipmr_net_init(struct net *net)
2378 err = ipmr_rules_init(net);
2382 #ifdef CONFIG_PROC_FS
2384 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2386 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2387 goto proc_cache_fail;
2391 #ifdef CONFIG_PROC_FS
2393 proc_net_remove(net, "ip_mr_vif");
2395 ipmr_rules_exit(net);
2401 static void __net_exit ipmr_net_exit(struct net *net)
2403 #ifdef CONFIG_PROC_FS
2404 proc_net_remove(net, "ip_mr_cache");
2405 proc_net_remove(net, "ip_mr_vif");
2407 ipmr_rules_exit(net);
2410 static struct pernet_operations ipmr_net_ops = {
2411 .init = ipmr_net_init,
2412 .exit = ipmr_net_exit,
2415 int __init ip_mr_init(void)
2419 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2420 sizeof(struct mfc_cache),
2421 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2426 err = register_pernet_subsys(&ipmr_net_ops);
2428 goto reg_pernet_fail;
2430 err = register_netdevice_notifier(&ip_mr_notifier);
2432 goto reg_notif_fail;
2433 #ifdef CONFIG_IP_PIMSM_V2
2434 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2435 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2437 goto add_proto_fail;
2440 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2443 #ifdef CONFIG_IP_PIMSM_V2
2445 unregister_netdevice_notifier(&ip_mr_notifier);
2448 unregister_pernet_subsys(&ipmr_net_ops);
2450 kmem_cache_destroy(mrt_cachep);