2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
69 #include "fib_lookup.h"
71 static struct ipv4_devconf ipv4_devconf = {
73 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
80 static struct ipv4_devconf ipv4_devconf_dflt = {
82 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
90 #define IPV4_DEVCONF_DFLT(net, attr) \
91 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
93 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
94 [IFA_LOCAL] = { .type = NLA_U32 },
95 [IFA_ADDRESS] = { .type = NLA_U32 },
96 [IFA_BROADCAST] = { .type = NLA_U32 },
97 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
98 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
101 #define IN4_ADDR_HSIZE_SHIFT 8
102 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
104 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
105 static DEFINE_SPINLOCK(inet_addr_hash_lock);
107 static u32 inet_addr_hash(struct net *net, __be32 addr)
109 u32 val = (__force u32) addr ^ net_hash_mix(net);
111 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
116 u32 hash = inet_addr_hash(net, ifa->ifa_local);
118 spin_lock(&inet_addr_hash_lock);
119 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120 spin_unlock(&inet_addr_hash_lock);
123 static void inet_hash_remove(struct in_ifaddr *ifa)
125 spin_lock(&inet_addr_hash_lock);
126 hlist_del_init_rcu(&ifa->hash);
127 spin_unlock(&inet_addr_hash_lock);
131 * __ip_dev_find - find the first device with a given source address.
132 * @net: the net namespace
133 * @addr: the source address
134 * @devref: if true, take a reference on the found device
136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 u32 hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL;
142 struct in_ifaddr *ifa;
145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 if (ifa->ifa_local == addr) {
147 struct net_device *dev = ifa->ifa_dev->dev;
149 if (!net_eq(dev_net(dev), net))
156 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 };
158 struct fib_table *local;
160 /* Fallback to FIB local table so that communication
161 * over loopback subnets work.
163 local = fib_get_table(net, RT_TABLE_LOCAL);
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res);
169 if (result && devref)
174 EXPORT_SYMBOL(__ip_dev_find);
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
185 static void devinet_sysctl_register(struct in_device *idev)
188 static void devinet_sysctl_unregister(struct in_device *idev)
193 /* Locks all the inet devices. */
195 static struct in_ifaddr *inet_alloc_ifa(void)
197 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 static void inet_rcu_free_ifa(struct rcu_head *head)
202 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 in_dev_put(ifa->ifa_dev);
208 static void inet_free_ifa(struct in_ifaddr *ifa)
210 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 void in_dev_finish_destroy(struct in_device *idev)
215 struct net_device *dev = idev->dev;
217 WARN_ON(idev->ifa_list);
218 WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 pr_err("Freeing alive in_device %p\n", idev);
228 EXPORT_SYMBOL(in_dev_finish_destroy);
230 static struct in_device *inetdev_init(struct net_device *dev)
232 struct in_device *in_dev;
236 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240 sizeof(in_dev->cnf));
241 in_dev->cnf.sysctl = NULL;
243 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244 if (!in_dev->arp_parms)
246 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247 dev_disable_lro(dev);
248 /* Reference in_dev->dev */
250 /* Account for reference dev->ip_ptr (below) */
253 devinet_sysctl_register(in_dev);
254 ip_mc_init_dev(in_dev);
255 if (dev->flags & IFF_UP)
258 /* we can receive as soon as ip_ptr is set -- do this last */
259 rcu_assign_pointer(dev->ip_ptr, in_dev);
268 static void in_dev_rcu_put(struct rcu_head *head)
270 struct in_device *idev = container_of(head, struct in_device, rcu_head);
274 static void inetdev_destroy(struct in_device *in_dev)
276 struct in_ifaddr *ifa;
277 struct net_device *dev;
285 ip_mc_destroy_dev(in_dev);
287 while ((ifa = in_dev->ifa_list) != NULL) {
288 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
292 RCU_INIT_POINTER(dev->ip_ptr, NULL);
294 devinet_sysctl_unregister(in_dev);
295 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 for_primary_ifa(in_dev) {
305 if (inet_ifa_match(a, ifa)) {
306 if (!b || inet_ifa_match(b, ifa)) {
311 } endfor_ifa(in_dev);
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317 int destroy, struct nlmsghdr *nlh, u32 portid)
319 struct in_ifaddr *promote = NULL;
320 struct in_ifaddr *ifa, *ifa1 = *ifap;
321 struct in_ifaddr *last_prim = in_dev->ifa_list;
322 struct in_ifaddr *prev_prom = NULL;
323 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
327 /* 1. Deleting primary ifaddr forces deletion all secondaries
328 * unless alias promotion is set
331 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
334 while ((ifa = *ifap1) != NULL) {
335 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336 ifa1->ifa_scope <= ifa->ifa_scope)
339 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340 ifa1->ifa_mask != ifa->ifa_mask ||
341 !inet_ifa_match(ifa1->ifa_address, ifa)) {
342 ifap1 = &ifa->ifa_next;
348 inet_hash_remove(ifa);
349 *ifap1 = ifa->ifa_next;
351 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352 blocking_notifier_call_chain(&inetaddr_chain,
362 /* On promotion all secondaries from subnet are changing
363 * the primary IP, we must remove all their routes silently
364 * and later to add them back with new prefsrc. Do this
365 * while all addresses are on the device list.
367 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368 if (ifa1->ifa_mask == ifa->ifa_mask &&
369 inet_ifa_match(ifa1->ifa_address, ifa))
370 fib_del_ifaddr(ifa, ifa1);
375 *ifap = ifa1->ifa_next;
376 inet_hash_remove(ifa1);
378 /* 3. Announce address deletion */
380 /* Send message first, then call notifier.
381 At first sight, FIB update triggered by notifier
382 will refer to already deleted ifaddr, that could confuse
383 netlink listeners. It is not true: look, gated sees
384 that route deleted and if it still thinks that ifaddr
385 is valid, it will try to restore deleted routes... Grr.
386 So that, this order is correct.
388 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392 struct in_ifaddr *next_sec = promote->ifa_next;
395 prev_prom->ifa_next = promote->ifa_next;
396 promote->ifa_next = last_prim->ifa_next;
397 last_prim->ifa_next = promote;
400 promote->ifa_flags &= ~IFA_F_SECONDARY;
401 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402 blocking_notifier_call_chain(&inetaddr_chain,
404 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405 if (ifa1->ifa_mask != ifa->ifa_mask ||
406 !inet_ifa_match(ifa1->ifa_address, ifa))
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 static void check_lifetime(struct work_struct *work);
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
429 struct in_device *in_dev = ifa->ifa_dev;
430 struct in_ifaddr *ifa1, **ifap, **last_primary;
434 if (!ifa->ifa_local) {
439 ifa->ifa_flags &= ~IFA_F_SECONDARY;
440 last_primary = &in_dev->ifa_list;
442 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443 ifap = &ifa1->ifa_next) {
444 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445 ifa->ifa_scope <= ifa1->ifa_scope)
446 last_primary = &ifa1->ifa_next;
447 if (ifa1->ifa_mask == ifa->ifa_mask &&
448 inet_ifa_match(ifa1->ifa_address, ifa)) {
449 if (ifa1->ifa_local == ifa->ifa_local) {
453 if (ifa1->ifa_scope != ifa->ifa_scope) {
457 ifa->ifa_flags |= IFA_F_SECONDARY;
461 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462 net_srandom(ifa->ifa_local);
466 ifa->ifa_next = *ifap;
469 inet_hash_insert(dev_net(in_dev->dev), ifa);
471 cancel_delayed_work(&check_lifetime_work);
472 schedule_delayed_work(&check_lifetime_work, 0);
474 /* Send message first, then call notifier.
475 Notifier will trigger FIB update, so that
476 listeners of netlink will know about new ifaddr */
477 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
485 return __inet_insert_ifa(ifa, NULL, 0);
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
490 struct in_device *in_dev = __in_dev_get_rtnl(dev);
498 ipv4_devconf_setall(in_dev);
499 if (ifa->ifa_dev != in_dev) {
500 WARN_ON(ifa->ifa_dev);
502 ifa->ifa_dev = in_dev;
504 if (ipv4_is_loopback(ifa->ifa_local))
505 ifa->ifa_scope = RT_SCOPE_HOST;
506 return inet_insert_ifa(ifa);
509 /* Caller must hold RCU or RTNL :
510 * We dont take a reference on found in_device
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
514 struct net_device *dev;
515 struct in_device *in_dev = NULL;
518 dev = dev_get_by_index_rcu(net, ifindex);
520 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
524 EXPORT_SYMBOL(inetdev_by_index);
526 /* Called only from RTNL semaphored context. No locks. */
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
533 for_primary_ifa(in_dev) {
534 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
536 } endfor_ifa(in_dev);
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
542 struct net *net = sock_net(skb->sk);
543 struct nlattr *tb[IFA_MAX+1];
544 struct in_device *in_dev;
545 struct ifaddrmsg *ifm;
546 struct in_ifaddr *ifa, **ifap;
551 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
555 ifm = nlmsg_data(nlh);
556 in_dev = inetdev_by_index(net, ifm->ifa_index);
557 if (in_dev == NULL) {
562 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563 ifap = &ifa->ifa_next) {
565 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
568 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
571 if (tb[IFA_ADDRESS] &&
572 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
576 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
580 err = -EADDRNOTAVAIL;
585 #define INFINITY_LIFE_TIME 0xFFFFFFFF
587 static void check_lifetime(struct work_struct *work)
589 unsigned long now, next, next_sec, next_sched;
590 struct in_ifaddr *ifa;
591 struct hlist_node *n;
595 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
597 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598 bool change_needed = false;
601 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
604 if (ifa->ifa_flags & IFA_F_PERMANENT)
607 /* We try to batch several events at once. */
608 age = (now - ifa->ifa_tstamp +
609 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
611 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612 age >= ifa->ifa_valid_lft) {
613 change_needed = true;
614 } else if (ifa->ifa_preferred_lft ==
615 INFINITY_LIFE_TIME) {
617 } else if (age >= ifa->ifa_preferred_lft) {
618 if (time_before(ifa->ifa_tstamp +
619 ifa->ifa_valid_lft * HZ, next))
620 next = ifa->ifa_tstamp +
621 ifa->ifa_valid_lft * HZ;
623 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624 change_needed = true;
625 } else if (time_before(ifa->ifa_tstamp +
626 ifa->ifa_preferred_lft * HZ,
628 next = ifa->ifa_tstamp +
629 ifa->ifa_preferred_lft * HZ;
636 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
639 if (ifa->ifa_flags & IFA_F_PERMANENT)
642 /* We try to batch several events at once. */
643 age = (now - ifa->ifa_tstamp +
644 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
646 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647 age >= ifa->ifa_valid_lft) {
648 struct in_ifaddr **ifap;
650 for (ifap = &ifa->ifa_dev->ifa_list;
651 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
653 inet_del_ifa(ifa->ifa_dev,
658 } else if (ifa->ifa_preferred_lft !=
659 INFINITY_LIFE_TIME &&
660 age >= ifa->ifa_preferred_lft &&
661 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662 ifa->ifa_flags |= IFA_F_DEPRECATED;
663 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
669 next_sec = round_jiffies_up(next);
672 /* If rounded timeout is accurate enough, accept it. */
673 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674 next_sched = next_sec;
677 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
681 schedule_delayed_work(&check_lifetime_work, next_sched - now);
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
687 unsigned long timeout;
689 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
691 timeout = addrconf_timeout_fixup(valid_lft, HZ);
692 if (addrconf_finite_timeout(timeout))
693 ifa->ifa_valid_lft = timeout;
695 ifa->ifa_flags |= IFA_F_PERMANENT;
697 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698 if (addrconf_finite_timeout(timeout)) {
700 ifa->ifa_flags |= IFA_F_DEPRECATED;
701 ifa->ifa_preferred_lft = timeout;
703 ifa->ifa_tstamp = jiffies;
704 if (!ifa->ifa_cstamp)
705 ifa->ifa_cstamp = ifa->ifa_tstamp;
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709 __u32 *pvalid_lft, __u32 *pprefered_lft)
711 struct nlattr *tb[IFA_MAX+1];
712 struct in_ifaddr *ifa;
713 struct ifaddrmsg *ifm;
714 struct net_device *dev;
715 struct in_device *in_dev;
718 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
722 ifm = nlmsg_data(nlh);
724 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
727 dev = __dev_get_by_index(net, ifm->ifa_index);
732 in_dev = __in_dev_get_rtnl(dev);
737 ifa = inet_alloc_ifa();
740 * A potential indev allocation can be left alive, it stays
741 * assigned to its device and is destroy with it.
745 ipv4_devconf_setall(in_dev);
748 if (tb[IFA_ADDRESS] == NULL)
749 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
751 INIT_HLIST_NODE(&ifa->hash);
752 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754 ifa->ifa_flags = ifm->ifa_flags;
755 ifa->ifa_scope = ifm->ifa_scope;
756 ifa->ifa_dev = in_dev;
758 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
761 if (tb[IFA_BROADCAST])
762 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
765 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
767 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
769 if (tb[IFA_CACHEINFO]) {
770 struct ifa_cacheinfo *ci;
772 ci = nla_data(tb[IFA_CACHEINFO]);
773 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
777 *pvalid_lft = ci->ifa_valid;
778 *pprefered_lft = ci->ifa_prefered;
787 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
789 struct in_device *in_dev = ifa->ifa_dev;
790 struct in_ifaddr *ifa1, **ifap;
795 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
796 ifap = &ifa1->ifa_next) {
797 if (ifa1->ifa_mask == ifa->ifa_mask &&
798 inet_ifa_match(ifa1->ifa_address, ifa) &&
799 ifa1->ifa_local == ifa->ifa_local)
805 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
807 struct net *net = sock_net(skb->sk);
808 struct in_ifaddr *ifa;
809 struct in_ifaddr *ifa_existing;
810 __u32 valid_lft = INFINITY_LIFE_TIME;
811 __u32 prefered_lft = INFINITY_LIFE_TIME;
815 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
819 ifa_existing = find_matching_ifa(ifa);
821 /* It would be best to check for !NLM_F_CREATE here but
822 * userspace alreay relies on not having to provide this.
824 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
825 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
829 if (nlh->nlmsg_flags & NLM_F_EXCL ||
830 !(nlh->nlmsg_flags & NLM_F_REPLACE))
833 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
834 cancel_delayed_work(&check_lifetime_work);
835 schedule_delayed_work(&check_lifetime_work, 0);
836 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
837 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
843 * Determine a default network mask, based on the IP address.
846 static int inet_abc_len(__be32 addr)
848 int rc = -1; /* Something else, probably a multicast. */
850 if (ipv4_is_zeronet(addr))
853 __u32 haddr = ntohl(addr);
855 if (IN_CLASSA(haddr))
857 else if (IN_CLASSB(haddr))
859 else if (IN_CLASSC(haddr))
867 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
870 struct sockaddr_in sin_orig;
871 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
872 struct in_device *in_dev;
873 struct in_ifaddr **ifap = NULL;
874 struct in_ifaddr *ifa = NULL;
875 struct net_device *dev;
878 int tryaddrmatch = 0;
881 * Fetch the caller's info block into kernel space
884 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
886 ifr.ifr_name[IFNAMSIZ - 1] = 0;
888 /* save original address for comparison */
889 memcpy(&sin_orig, sin, sizeof(*sin));
891 colon = strchr(ifr.ifr_name, ':');
895 dev_load(net, ifr.ifr_name);
898 case SIOCGIFADDR: /* Get interface address */
899 case SIOCGIFBRDADDR: /* Get the broadcast address */
900 case SIOCGIFDSTADDR: /* Get the destination address */
901 case SIOCGIFNETMASK: /* Get the netmask for the interface */
902 /* Note that these ioctls will not sleep,
903 so that we do not impose a lock.
904 One day we will be forced to put shlock here (I mean SMP)
906 tryaddrmatch = (sin_orig.sin_family == AF_INET);
907 memset(sin, 0, sizeof(*sin));
908 sin->sin_family = AF_INET;
913 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
916 case SIOCSIFADDR: /* Set interface address (and family) */
917 case SIOCSIFBRDADDR: /* Set the broadcast address */
918 case SIOCSIFDSTADDR: /* Set the destination address */
919 case SIOCSIFNETMASK: /* Set the netmask for the interface */
920 case SIOCKILLADDR: /* Nuke all sockets on this address */
922 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
925 if (sin->sin_family != AF_INET)
936 dev = __dev_get_by_name(net, ifr.ifr_name);
943 in_dev = __in_dev_get_rtnl(dev);
946 /* Matthias Andree */
947 /* compare label and address (4.4BSD style) */
948 /* note: we only do this for a limited set of ioctls
949 and only if the original address family was AF_INET.
950 This is checked above. */
951 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
952 ifap = &ifa->ifa_next) {
953 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
954 sin_orig.sin_addr.s_addr ==
960 /* we didn't get a match, maybe the application is
961 4.3BSD-style and passed in junk so we fall back to
962 comparing just the label */
964 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
965 ifap = &ifa->ifa_next)
966 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
971 ret = -EADDRNOTAVAIL;
972 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
973 && cmd != SIOCKILLADDR)
977 case SIOCGIFADDR: /* Get interface address */
978 sin->sin_addr.s_addr = ifa->ifa_local;
981 case SIOCGIFBRDADDR: /* Get the broadcast address */
982 sin->sin_addr.s_addr = ifa->ifa_broadcast;
985 case SIOCGIFDSTADDR: /* Get the destination address */
986 sin->sin_addr.s_addr = ifa->ifa_address;
989 case SIOCGIFNETMASK: /* Get the netmask for the interface */
990 sin->sin_addr.s_addr = ifa->ifa_mask;
995 ret = -EADDRNOTAVAIL;
999 if (!(ifr.ifr_flags & IFF_UP))
1000 inet_del_ifa(in_dev, ifap, 1);
1003 ret = dev_change_flags(dev, ifr.ifr_flags);
1006 case SIOCSIFADDR: /* Set interface address (and family) */
1008 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1013 ifa = inet_alloc_ifa();
1016 INIT_HLIST_NODE(&ifa->hash);
1018 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1020 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1023 if (ifa->ifa_local == sin->sin_addr.s_addr)
1025 inet_del_ifa(in_dev, ifap, 0);
1026 ifa->ifa_broadcast = 0;
1030 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1032 if (!(dev->flags & IFF_POINTOPOINT)) {
1033 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1034 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1035 if ((dev->flags & IFF_BROADCAST) &&
1036 ifa->ifa_prefixlen < 31)
1037 ifa->ifa_broadcast = ifa->ifa_address |
1040 ifa->ifa_prefixlen = 32;
1041 ifa->ifa_mask = inet_make_mask(32);
1043 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1044 ret = inet_set_ifa(dev, ifa);
1047 case SIOCSIFBRDADDR: /* Set the broadcast address */
1049 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1050 inet_del_ifa(in_dev, ifap, 0);
1051 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1052 inet_insert_ifa(ifa);
1056 case SIOCSIFDSTADDR: /* Set the destination address */
1058 if (ifa->ifa_address == sin->sin_addr.s_addr)
1061 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1064 inet_del_ifa(in_dev, ifap, 0);
1065 ifa->ifa_address = sin->sin_addr.s_addr;
1066 inet_insert_ifa(ifa);
1069 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1072 * The mask we set must be legal.
1075 if (bad_mask(sin->sin_addr.s_addr, 0))
1078 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1079 __be32 old_mask = ifa->ifa_mask;
1080 inet_del_ifa(in_dev, ifap, 0);
1081 ifa->ifa_mask = sin->sin_addr.s_addr;
1082 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1084 /* See if current broadcast address matches
1085 * with current netmask, then recalculate
1086 * the broadcast address. Otherwise it's a
1087 * funny address, so don't touch it since
1088 * the user seems to know what (s)he's doing...
1090 if ((dev->flags & IFF_BROADCAST) &&
1091 (ifa->ifa_prefixlen < 31) &&
1092 (ifa->ifa_broadcast ==
1093 (ifa->ifa_local|~old_mask))) {
1094 ifa->ifa_broadcast = (ifa->ifa_local |
1095 ~sin->sin_addr.s_addr);
1097 inet_insert_ifa(ifa);
1100 case SIOCKILLADDR: /* Nuke all connections on this address */
1101 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
1110 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1114 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1116 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1117 struct in_ifaddr *ifa;
1124 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1126 done += sizeof(ifr);
1129 if (len < (int) sizeof(ifr))
1131 memset(&ifr, 0, sizeof(struct ifreq));
1133 strcpy(ifr.ifr_name, ifa->ifa_label);
1135 strcpy(ifr.ifr_name, dev->name);
1137 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1138 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1141 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1145 buf += sizeof(struct ifreq);
1146 len -= sizeof(struct ifreq);
1147 done += sizeof(struct ifreq);
1153 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156 struct in_device *in_dev;
1157 struct net *net = dev_net(dev);
1160 in_dev = __in_dev_get_rcu(dev);
1164 for_primary_ifa(in_dev) {
1165 if (ifa->ifa_scope > scope)
1167 if (!dst || inet_ifa_match(dst, ifa)) {
1168 addr = ifa->ifa_local;
1172 addr = ifa->ifa_local;
1173 } endfor_ifa(in_dev);
1179 /* Not loopback addresses on loopback should be preferred
1180 in this case. It is importnat that lo is the first interface
1183 for_each_netdev_rcu(net, dev) {
1184 in_dev = __in_dev_get_rcu(dev);
1188 for_primary_ifa(in_dev) {
1189 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1190 ifa->ifa_scope <= scope) {
1191 addr = ifa->ifa_local;
1194 } endfor_ifa(in_dev);
1200 EXPORT_SYMBOL(inet_select_addr);
1202 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1203 __be32 local, int scope)
1210 (local == ifa->ifa_local || !local) &&
1211 ifa->ifa_scope <= scope) {
1212 addr = ifa->ifa_local;
1217 same = (!local || inet_ifa_match(local, ifa)) &&
1218 (!dst || inet_ifa_match(dst, ifa));
1222 /* Is the selected addr into dst subnet? */
1223 if (inet_ifa_match(addr, ifa))
1225 /* No, then can we use new local src? */
1226 if (ifa->ifa_scope <= scope) {
1227 addr = ifa->ifa_local;
1230 /* search for large dst subnet for addr */
1234 } endfor_ifa(in_dev);
1236 return same ? addr : 0;
1240 * Confirm that local IP address exists using wildcards:
1241 * - in_dev: only on this interface, 0=any interface
1242 * - dst: only in the same subnet as dst, 0=any dst
1243 * - local: address, 0=autoselect the local address
1244 * - scope: maximum allowed scope value for the local address
1246 __be32 inet_confirm_addr(struct in_device *in_dev,
1247 __be32 dst, __be32 local, int scope)
1250 struct net_device *dev;
1253 if (scope != RT_SCOPE_LINK)
1254 return confirm_addr_indev(in_dev, dst, local, scope);
1256 net = dev_net(in_dev->dev);
1258 for_each_netdev_rcu(net, dev) {
1259 in_dev = __in_dev_get_rcu(dev);
1261 addr = confirm_addr_indev(in_dev, dst, local, scope);
1270 EXPORT_SYMBOL(inet_confirm_addr);
1276 int register_inetaddr_notifier(struct notifier_block *nb)
1278 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1280 EXPORT_SYMBOL(register_inetaddr_notifier);
1282 int unregister_inetaddr_notifier(struct notifier_block *nb)
1284 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1286 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1288 /* Rename ifa_labels for a device name change. Make some effort to preserve
1289 * existing alias numbering and to create unique labels if possible.
1291 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1293 struct in_ifaddr *ifa;
1296 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1297 char old[IFNAMSIZ], *dot;
1299 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1300 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1303 dot = strchr(old, ':');
1305 sprintf(old, ":%d", named);
1308 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1309 strcat(ifa->ifa_label, dot);
1311 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1313 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1317 static bool inetdev_valid_mtu(unsigned int mtu)
1322 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1323 struct in_device *in_dev)
1326 struct in_ifaddr *ifa;
1328 for (ifa = in_dev->ifa_list; ifa;
1329 ifa = ifa->ifa_next) {
1330 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1331 ifa->ifa_local, dev,
1332 ifa->ifa_local, NULL,
1333 dev->dev_addr, NULL);
1337 /* Called only under RTNL semaphore */
1339 static int inetdev_event(struct notifier_block *this, unsigned long event,
1342 struct net_device *dev = ptr;
1343 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1348 if (event == NETDEV_REGISTER) {
1349 in_dev = inetdev_init(dev);
1351 return notifier_from_errno(-ENOMEM);
1352 if (dev->flags & IFF_LOOPBACK) {
1353 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1354 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1356 } else if (event == NETDEV_CHANGEMTU) {
1357 /* Re-enabling IP */
1358 if (inetdev_valid_mtu(dev->mtu))
1359 in_dev = inetdev_init(dev);
1365 case NETDEV_REGISTER:
1366 pr_debug("%s: bug\n", __func__);
1367 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1370 if (!inetdev_valid_mtu(dev->mtu))
1372 if (dev->flags & IFF_LOOPBACK) {
1373 struct in_ifaddr *ifa = inet_alloc_ifa();
1376 INIT_HLIST_NODE(&ifa->hash);
1378 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1379 ifa->ifa_prefixlen = 8;
1380 ifa->ifa_mask = inet_make_mask(8);
1381 in_dev_hold(in_dev);
1382 ifa->ifa_dev = in_dev;
1383 ifa->ifa_scope = RT_SCOPE_HOST;
1384 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1385 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1386 INFINITY_LIFE_TIME);
1387 inet_insert_ifa(ifa);
1392 case NETDEV_CHANGEADDR:
1393 if (!IN_DEV_ARP_NOTIFY(in_dev))
1396 case NETDEV_NOTIFY_PEERS:
1397 /* Send gratuitous ARP to notify of link change */
1398 inetdev_send_gratuitous_arp(dev, in_dev);
1403 case NETDEV_PRE_TYPE_CHANGE:
1404 ip_mc_unmap(in_dev);
1406 case NETDEV_POST_TYPE_CHANGE:
1407 ip_mc_remap(in_dev);
1409 case NETDEV_CHANGEMTU:
1410 if (inetdev_valid_mtu(dev->mtu))
1412 /* disable IP when MTU is not enough */
1413 case NETDEV_UNREGISTER:
1414 inetdev_destroy(in_dev);
1416 case NETDEV_CHANGENAME:
1417 /* Do not notify about label change, this event is
1418 * not interesting to applications using netlink.
1420 inetdev_changename(dev, in_dev);
1422 devinet_sysctl_unregister(in_dev);
1423 devinet_sysctl_register(in_dev);
1430 static struct notifier_block ip_netdev_notifier = {
1431 .notifier_call = inetdev_event,
1434 static size_t inet_nlmsg_size(void)
1436 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1437 + nla_total_size(4) /* IFA_ADDRESS */
1438 + nla_total_size(4) /* IFA_LOCAL */
1439 + nla_total_size(4) /* IFA_BROADCAST */
1440 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1443 static inline u32 cstamp_delta(unsigned long cstamp)
1445 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1448 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1449 unsigned long tstamp, u32 preferred, u32 valid)
1451 struct ifa_cacheinfo ci;
1453 ci.cstamp = cstamp_delta(cstamp);
1454 ci.tstamp = cstamp_delta(tstamp);
1455 ci.ifa_prefered = preferred;
1456 ci.ifa_valid = valid;
1458 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1461 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1462 u32 portid, u32 seq, int event, unsigned int flags)
1464 struct ifaddrmsg *ifm;
1465 struct nlmsghdr *nlh;
1466 u32 preferred, valid;
1468 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1472 ifm = nlmsg_data(nlh);
1473 ifm->ifa_family = AF_INET;
1474 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1475 ifm->ifa_flags = ifa->ifa_flags;
1476 ifm->ifa_scope = ifa->ifa_scope;
1477 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1479 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1480 preferred = ifa->ifa_preferred_lft;
1481 valid = ifa->ifa_valid_lft;
1482 if (preferred != INFINITY_LIFE_TIME) {
1483 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1485 if (preferred > tval)
1489 if (valid != INFINITY_LIFE_TIME) {
1497 preferred = INFINITY_LIFE_TIME;
1498 valid = INFINITY_LIFE_TIME;
1500 if ((ifa->ifa_address &&
1501 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1503 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1504 (ifa->ifa_broadcast &&
1505 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1506 (ifa->ifa_label[0] &&
1507 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1508 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1510 goto nla_put_failure;
1512 return nlmsg_end(skb, nlh);
1515 nlmsg_cancel(skb, nlh);
1519 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1521 struct net *net = sock_net(skb->sk);
1524 int ip_idx, s_ip_idx;
1525 struct net_device *dev;
1526 struct in_device *in_dev;
1527 struct in_ifaddr *ifa;
1528 struct hlist_head *head;
1531 s_idx = idx = cb->args[1];
1532 s_ip_idx = ip_idx = cb->args[2];
1534 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1536 head = &net->dev_index_head[h];
1538 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1540 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1543 if (h > s_h || idx > s_idx)
1545 in_dev = __in_dev_get_rcu(dev);
1549 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1550 ifa = ifa->ifa_next, ip_idx++) {
1551 if (ip_idx < s_ip_idx)
1553 if (inet_fill_ifaddr(skb, ifa,
1554 NETLINK_CB(cb->skb).portid,
1556 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1560 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1571 cb->args[2] = ip_idx;
1576 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1579 struct sk_buff *skb;
1580 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1584 net = dev_net(ifa->ifa_dev->dev);
1585 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1589 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1591 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1592 WARN_ON(err == -EMSGSIZE);
1596 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1600 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1603 static size_t inet_get_link_af_size(const struct net_device *dev)
1605 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1610 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1613 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1615 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1622 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1626 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1627 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1632 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1633 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1636 static int inet_validate_link_af(const struct net_device *dev,
1637 const struct nlattr *nla)
1639 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1642 if (dev && !__in_dev_get_rtnl(dev))
1643 return -EAFNOSUPPORT;
1645 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1649 if (tb[IFLA_INET_CONF]) {
1650 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1651 int cfgid = nla_type(a);
1656 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1664 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1666 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1667 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1671 return -EAFNOSUPPORT;
1673 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1676 if (tb[IFLA_INET_CONF]) {
1677 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1678 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1684 static int inet_netconf_msgsize_devconf(int type)
1686 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1687 + nla_total_size(4); /* NETCONFA_IFINDEX */
1689 /* type -1 is used for ALL */
1690 if (type == -1 || type == NETCONFA_FORWARDING)
1691 size += nla_total_size(4);
1692 if (type == -1 || type == NETCONFA_RP_FILTER)
1693 size += nla_total_size(4);
1694 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1695 size += nla_total_size(4);
1700 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1701 struct ipv4_devconf *devconf, u32 portid,
1702 u32 seq, int event, unsigned int flags,
1705 struct nlmsghdr *nlh;
1706 struct netconfmsg *ncm;
1708 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1713 ncm = nlmsg_data(nlh);
1714 ncm->ncm_family = AF_INET;
1716 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1717 goto nla_put_failure;
1719 /* type -1 is used for ALL */
1720 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1721 nla_put_s32(skb, NETCONFA_FORWARDING,
1722 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1723 goto nla_put_failure;
1724 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1725 nla_put_s32(skb, NETCONFA_RP_FILTER,
1726 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1727 goto nla_put_failure;
1728 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1729 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1730 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1731 goto nla_put_failure;
1733 return nlmsg_end(skb, nlh);
1736 nlmsg_cancel(skb, nlh);
1740 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1741 struct ipv4_devconf *devconf)
1743 struct sk_buff *skb;
1746 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1750 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1751 RTM_NEWNETCONF, 0, type);
1753 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1754 WARN_ON(err == -EMSGSIZE);
1758 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1762 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1765 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1766 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1767 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1768 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1771 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1772 struct nlmsghdr *nlh)
1774 struct net *net = sock_net(in_skb->sk);
1775 struct nlattr *tb[NETCONFA_MAX+1];
1776 struct netconfmsg *ncm;
1777 struct sk_buff *skb;
1778 struct ipv4_devconf *devconf;
1779 struct in_device *in_dev;
1780 struct net_device *dev;
1784 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1785 devconf_ipv4_policy);
1790 if (!tb[NETCONFA_IFINDEX])
1793 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1795 case NETCONFA_IFINDEX_ALL:
1796 devconf = net->ipv4.devconf_all;
1798 case NETCONFA_IFINDEX_DEFAULT:
1799 devconf = net->ipv4.devconf_dflt;
1802 dev = __dev_get_by_index(net, ifindex);
1805 in_dev = __in_dev_get_rtnl(dev);
1808 devconf = &in_dev->cnf;
1813 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1817 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1818 NETLINK_CB(in_skb).portid,
1819 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1822 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1823 WARN_ON(err == -EMSGSIZE);
1827 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1832 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1833 struct netlink_callback *cb)
1835 struct net *net = sock_net(skb->sk);
1838 struct net_device *dev;
1839 struct in_device *in_dev;
1840 struct hlist_head *head;
1843 s_idx = idx = cb->args[1];
1845 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1847 head = &net->dev_index_head[h];
1849 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1851 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1854 in_dev = __in_dev_get_rcu(dev);
1858 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1860 NETLINK_CB(cb->skb).portid,
1868 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1874 if (h == NETDEV_HASHENTRIES) {
1875 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1876 net->ipv4.devconf_all,
1877 NETLINK_CB(cb->skb).portid,
1879 RTM_NEWNETCONF, NLM_F_MULTI,
1885 if (h == NETDEV_HASHENTRIES + 1) {
1886 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1887 net->ipv4.devconf_dflt,
1888 NETLINK_CB(cb->skb).portid,
1890 RTM_NEWNETCONF, NLM_F_MULTI,
1903 #ifdef CONFIG_SYSCTL
1905 static void devinet_copy_dflt_conf(struct net *net, int i)
1907 struct net_device *dev;
1910 for_each_netdev_rcu(net, dev) {
1911 struct in_device *in_dev;
1913 in_dev = __in_dev_get_rcu(dev);
1914 if (in_dev && !test_bit(i, in_dev->cnf.state))
1915 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1920 /* called with RTNL locked */
1921 static void inet_forward_change(struct net *net)
1923 struct net_device *dev;
1924 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1926 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1927 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1928 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1929 NETCONFA_IFINDEX_ALL,
1930 net->ipv4.devconf_all);
1931 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1932 NETCONFA_IFINDEX_DEFAULT,
1933 net->ipv4.devconf_dflt);
1935 for_each_netdev(net, dev) {
1936 struct in_device *in_dev;
1938 dev_disable_lro(dev);
1940 in_dev = __in_dev_get_rcu(dev);
1942 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1943 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944 dev->ifindex, &in_dev->cnf);
1950 static int devinet_conf_proc(ctl_table *ctl, int write,
1951 void __user *buffer,
1952 size_t *lenp, loff_t *ppos)
1954 int old_value = *(int *)ctl->data;
1955 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1956 int new_value = *(int *)ctl->data;
1959 struct ipv4_devconf *cnf = ctl->extra1;
1960 struct net *net = ctl->extra2;
1961 int i = (int *)ctl->data - cnf->data;
1963 set_bit(i, cnf->state);
1965 if (cnf == net->ipv4.devconf_dflt)
1966 devinet_copy_dflt_conf(net, i);
1967 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1968 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1969 if ((new_value == 0) && (old_value != 0))
1970 rt_cache_flush(net);
1971 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1972 new_value != old_value) {
1975 if (cnf == net->ipv4.devconf_dflt)
1976 ifindex = NETCONFA_IFINDEX_DEFAULT;
1977 else if (cnf == net->ipv4.devconf_all)
1978 ifindex = NETCONFA_IFINDEX_ALL;
1980 struct in_device *idev =
1981 container_of(cnf, struct in_device,
1983 ifindex = idev->dev->ifindex;
1985 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1993 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1994 void __user *buffer,
1995 size_t *lenp, loff_t *ppos)
1997 int *valp = ctl->data;
2000 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2002 if (write && *valp != val) {
2003 struct net *net = ctl->extra2;
2005 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2006 if (!rtnl_trylock()) {
2007 /* Restore the original values before restarting */
2010 return restart_syscall();
2012 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2013 inet_forward_change(net);
2015 struct ipv4_devconf *cnf = ctl->extra1;
2016 struct in_device *idev =
2017 container_of(cnf, struct in_device, cnf);
2019 dev_disable_lro(idev->dev);
2020 inet_netconf_notify_devconf(net,
2021 NETCONFA_FORWARDING,
2026 rt_cache_flush(net);
2028 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2029 NETCONFA_IFINDEX_DEFAULT,
2030 net->ipv4.devconf_dflt);
2036 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2037 void __user *buffer,
2038 size_t *lenp, loff_t *ppos)
2040 int *valp = ctl->data;
2042 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2043 struct net *net = ctl->extra2;
2045 if (write && *valp != val)
2046 rt_cache_flush(net);
2051 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2054 .data = ipv4_devconf.data + \
2055 IPV4_DEVCONF_ ## attr - 1, \
2056 .maxlen = sizeof(int), \
2058 .proc_handler = proc, \
2059 .extra1 = &ipv4_devconf, \
2062 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2063 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2065 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2066 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2068 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2069 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2071 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2072 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2074 static struct devinet_sysctl_table {
2075 struct ctl_table_header *sysctl_header;
2076 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2077 } devinet_sysctl = {
2079 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2080 devinet_sysctl_forward),
2081 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2083 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2084 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2085 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2086 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2087 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2088 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2089 "accept_source_route"),
2090 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2091 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2092 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2093 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2094 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2095 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2096 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2097 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2098 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2099 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2100 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2101 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2102 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2104 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2105 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2106 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2107 "force_igmp_version"),
2108 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2109 "promote_secondaries"),
2110 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2115 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2116 struct ipv4_devconf *p)
2119 struct devinet_sysctl_table *t;
2120 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2122 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2126 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2127 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2128 t->devinet_vars[i].extra1 = p;
2129 t->devinet_vars[i].extra2 = net;
2132 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2134 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2135 if (!t->sysctl_header)
2147 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2149 struct devinet_sysctl_table *t = cnf->sysctl;
2155 unregister_net_sysctl_table(t->sysctl_header);
2159 static void devinet_sysctl_register(struct in_device *idev)
2161 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2162 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2166 static void devinet_sysctl_unregister(struct in_device *idev)
2168 __devinet_sysctl_unregister(&idev->cnf);
2169 neigh_sysctl_unregister(idev->arp_parms);
2172 static struct ctl_table ctl_forward_entry[] = {
2174 .procname = "ip_forward",
2175 .data = &ipv4_devconf.data[
2176 IPV4_DEVCONF_FORWARDING - 1],
2177 .maxlen = sizeof(int),
2179 .proc_handler = devinet_sysctl_forward,
2180 .extra1 = &ipv4_devconf,
2181 .extra2 = &init_net,
2187 static __net_init int devinet_init_net(struct net *net)
2190 struct ipv4_devconf *all, *dflt;
2191 #ifdef CONFIG_SYSCTL
2192 struct ctl_table *tbl = ctl_forward_entry;
2193 struct ctl_table_header *forw_hdr;
2197 all = &ipv4_devconf;
2198 dflt = &ipv4_devconf_dflt;
2200 if (!net_eq(net, &init_net)) {
2201 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2205 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2207 goto err_alloc_dflt;
2209 #ifdef CONFIG_SYSCTL
2210 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2214 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2215 tbl[0].extra1 = all;
2216 tbl[0].extra2 = net;
2220 #ifdef CONFIG_SYSCTL
2221 err = __devinet_sysctl_register(net, "all", all);
2225 err = __devinet_sysctl_register(net, "default", dflt);
2230 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2231 if (forw_hdr == NULL)
2233 net->ipv4.forw_hdr = forw_hdr;
2236 net->ipv4.devconf_all = all;
2237 net->ipv4.devconf_dflt = dflt;
2240 #ifdef CONFIG_SYSCTL
2242 __devinet_sysctl_unregister(dflt);
2244 __devinet_sysctl_unregister(all);
2246 if (tbl != ctl_forward_entry)
2250 if (dflt != &ipv4_devconf_dflt)
2253 if (all != &ipv4_devconf)
2259 static __net_exit void devinet_exit_net(struct net *net)
2261 #ifdef CONFIG_SYSCTL
2262 struct ctl_table *tbl;
2264 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2265 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2266 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2267 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2270 kfree(net->ipv4.devconf_dflt);
2271 kfree(net->ipv4.devconf_all);
2274 static __net_initdata struct pernet_operations devinet_ops = {
2275 .init = devinet_init_net,
2276 .exit = devinet_exit_net,
2279 static struct rtnl_af_ops inet_af_ops = {
2281 .fill_link_af = inet_fill_link_af,
2282 .get_link_af_size = inet_get_link_af_size,
2283 .validate_link_af = inet_validate_link_af,
2284 .set_link_af = inet_set_link_af,
2287 void __init devinet_init(void)
2291 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2292 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2294 register_pernet_subsys(&devinet_ops);
2296 register_gifconf(PF_INET, inet_gifconf);
2297 register_netdevice_notifier(&ip_netdev_notifier);
2299 schedule_delayed_work(&check_lifetime_work, 0);
2301 rtnl_af_register(&inet_af_ops);
2303 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2304 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2305 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2306 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2307 inet_netconf_dump_devconf, NULL);