Merge branch 'linux-linaro-lsk' into linux-linaro-lsk-android
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/tcp.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #include "fib_lookup.h"
70
71 static struct ipv4_devconf ipv4_devconf = {
72         .data = {
73                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87         },
88 };
89
90 #define IPV4_DEVCONF_DFLT(net, attr) \
91         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
92
93 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
94         [IFA_LOCAL]             = { .type = NLA_U32 },
95         [IFA_ADDRESS]           = { .type = NLA_U32 },
96         [IFA_BROADCAST]         = { .type = NLA_U32 },
97         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
98         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
99 };
100
101 #define IN4_ADDR_HSIZE_SHIFT    8
102 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
103
104 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
105 static DEFINE_SPINLOCK(inet_addr_hash_lock);
106
107 static u32 inet_addr_hash(struct net *net, __be32 addr)
108 {
109         u32 val = (__force u32) addr ^ net_hash_mix(net);
110
111         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
112 }
113
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 {
116         u32 hash = inet_addr_hash(net, ifa->ifa_local);
117
118         spin_lock(&inet_addr_hash_lock);
119         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120         spin_unlock(&inet_addr_hash_lock);
121 }
122
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125         spin_lock(&inet_addr_hash_lock);
126         hlist_del_init_rcu(&ifa->hash);
127         spin_unlock(&inet_addr_hash_lock);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         u32 hash = inet_addr_hash(net, addr);
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143
144         rcu_read_lock();
145         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146                 if (ifa->ifa_local == addr) {
147                         struct net_device *dev = ifa->ifa_dev->dev;
148
149                         if (!net_eq(dev_net(dev), net))
150                                 continue;
151                         result = dev;
152                         break;
153                 }
154         }
155         if (!result) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         }
169         if (result && devref)
170                 dev_hold(result);
171         rcu_read_unlock();
172         return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180                          int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203         if (ifa->ifa_dev)
204                 in_dev_put(ifa->ifa_dev);
205         kfree(ifa);
206 }
207
208 static void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215         struct net_device *dev = idev->dev;
216
217         WARN_ON(idev->ifa_list);
218         WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222         dev_put(dev);
223         if (!idev->dead)
224                 pr_err("Freeing alive in_device %p\n", idev);
225         else
226                 kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232         struct in_device *in_dev;
233
234         ASSERT_RTNL();
235
236         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237         if (!in_dev)
238                 goto out;
239         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240                         sizeof(in_dev->cnf));
241         in_dev->cnf.sysctl = NULL;
242         in_dev->dev = dev;
243         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244         if (!in_dev->arp_parms)
245                 goto out_kfree;
246         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247                 dev_disable_lro(dev);
248         /* Reference in_dev->dev */
249         dev_hold(dev);
250         /* Account for reference dev->ip_ptr (below) */
251         in_dev_hold(in_dev);
252
253         devinet_sysctl_register(in_dev);
254         ip_mc_init_dev(in_dev);
255         if (dev->flags & IFF_UP)
256                 ip_mc_up(in_dev);
257
258         /* we can receive as soon as ip_ptr is set -- do this last */
259         rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261         return in_dev;
262 out_kfree:
263         kfree(in_dev);
264         in_dev = NULL;
265         goto out;
266 }
267
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270         struct in_device *idev = container_of(head, struct in_device, rcu_head);
271         in_dev_put(idev);
272 }
273
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276         struct in_ifaddr *ifa;
277         struct net_device *dev;
278
279         ASSERT_RTNL();
280
281         dev = in_dev->dev;
282
283         in_dev->dead = 1;
284
285         ip_mc_destroy_dev(in_dev);
286
287         while ((ifa = in_dev->ifa_list) != NULL) {
288                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289                 inet_free_ifa(ifa);
290         }
291
292         RCU_INIT_POINTER(dev->ip_ptr, NULL);
293
294         devinet_sysctl_unregister(in_dev);
295         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296         arp_ifdown(dev);
297
298         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303         rcu_read_lock();
304         for_primary_ifa(in_dev) {
305                 if (inet_ifa_match(a, ifa)) {
306                         if (!b || inet_ifa_match(b, ifa)) {
307                                 rcu_read_unlock();
308                                 return 1;
309                         }
310                 }
311         } endfor_ifa(in_dev);
312         rcu_read_unlock();
313         return 0;
314 }
315
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317                          int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319         struct in_ifaddr *promote = NULL;
320         struct in_ifaddr *ifa, *ifa1 = *ifap;
321         struct in_ifaddr *last_prim = in_dev->ifa_list;
322         struct in_ifaddr *prev_prom = NULL;
323         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324
325         ASSERT_RTNL();
326
327         /* 1. Deleting primary ifaddr forces deletion all secondaries
328          * unless alias promotion is set
329          **/
330
331         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333
334                 while ((ifa = *ifap1) != NULL) {
335                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336                             ifa1->ifa_scope <= ifa->ifa_scope)
337                                 last_prim = ifa;
338
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340                             ifa1->ifa_mask != ifa->ifa_mask ||
341                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
342                                 ifap1 = &ifa->ifa_next;
343                                 prev_prom = ifa;
344                                 continue;
345                         }
346
347                         if (!do_promote) {
348                                 inet_hash_remove(ifa);
349                                 *ifap1 = ifa->ifa_next;
350
351                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352                                 blocking_notifier_call_chain(&inetaddr_chain,
353                                                 NETDEV_DOWN, ifa);
354                                 inet_free_ifa(ifa);
355                         } else {
356                                 promote = ifa;
357                                 break;
358                         }
359                 }
360         }
361
362         /* On promotion all secondaries from subnet are changing
363          * the primary IP, we must remove all their routes silently
364          * and later to add them back with new prefsrc. Do this
365          * while all addresses are on the device list.
366          */
367         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368                 if (ifa1->ifa_mask == ifa->ifa_mask &&
369                     inet_ifa_match(ifa1->ifa_address, ifa))
370                         fib_del_ifaddr(ifa, ifa1);
371         }
372
373         /* 2. Unlink it */
374
375         *ifap = ifa1->ifa_next;
376         inet_hash_remove(ifa1);
377
378         /* 3. Announce address deletion */
379
380         /* Send message first, then call notifier.
381            At first sight, FIB update triggered by notifier
382            will refer to already deleted ifaddr, that could confuse
383            netlink listeners. It is not true: look, gated sees
384            that route deleted and if it still thinks that ifaddr
385            is valid, it will try to restore deleted routes... Grr.
386            So that, this order is correct.
387          */
388         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390
391         if (promote) {
392                 struct in_ifaddr *next_sec = promote->ifa_next;
393
394                 if (prev_prom) {
395                         prev_prom->ifa_next = promote->ifa_next;
396                         promote->ifa_next = last_prim->ifa_next;
397                         last_prim->ifa_next = promote;
398                 }
399
400                 promote->ifa_flags &= ~IFA_F_SECONDARY;
401                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402                 blocking_notifier_call_chain(&inetaddr_chain,
403                                 NETDEV_UP, promote);
404                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405                         if (ifa1->ifa_mask != ifa->ifa_mask ||
406                             !inet_ifa_match(ifa1->ifa_address, ifa))
407                                         continue;
408                         fib_add_ifaddr(ifa);
409                 }
410
411         }
412         if (destroy)
413                 inet_free_ifa(ifa1);
414 }
415
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417                          int destroy)
418 {
419         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421
422 static void check_lifetime(struct work_struct *work);
423
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427                              u32 portid)
428 {
429         struct in_device *in_dev = ifa->ifa_dev;
430         struct in_ifaddr *ifa1, **ifap, **last_primary;
431
432         ASSERT_RTNL();
433
434         if (!ifa->ifa_local) {
435                 inet_free_ifa(ifa);
436                 return 0;
437         }
438
439         ifa->ifa_flags &= ~IFA_F_SECONDARY;
440         last_primary = &in_dev->ifa_list;
441
442         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443              ifap = &ifa1->ifa_next) {
444                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445                     ifa->ifa_scope <= ifa1->ifa_scope)
446                         last_primary = &ifa1->ifa_next;
447                 if (ifa1->ifa_mask == ifa->ifa_mask &&
448                     inet_ifa_match(ifa1->ifa_address, ifa)) {
449                         if (ifa1->ifa_local == ifa->ifa_local) {
450                                 inet_free_ifa(ifa);
451                                 return -EEXIST;
452                         }
453                         if (ifa1->ifa_scope != ifa->ifa_scope) {
454                                 inet_free_ifa(ifa);
455                                 return -EINVAL;
456                         }
457                         ifa->ifa_flags |= IFA_F_SECONDARY;
458                 }
459         }
460
461         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462                 net_srandom(ifa->ifa_local);
463                 ifap = last_primary;
464         }
465
466         ifa->ifa_next = *ifap;
467         *ifap = ifa;
468
469         inet_hash_insert(dev_net(in_dev->dev), ifa);
470
471         cancel_delayed_work(&check_lifetime_work);
472         schedule_delayed_work(&check_lifetime_work, 0);
473
474         /* Send message first, then call notifier.
475            Notifier will trigger FIB update, so that
476            listeners of netlink will know about new ifaddr */
477         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479
480         return 0;
481 }
482
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485         return __inet_insert_ifa(ifa, NULL, 0);
486 }
487
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490         struct in_device *in_dev = __in_dev_get_rtnl(dev);
491
492         ASSERT_RTNL();
493
494         if (!in_dev) {
495                 inet_free_ifa(ifa);
496                 return -ENOBUFS;
497         }
498         ipv4_devconf_setall(in_dev);
499         if (ifa->ifa_dev != in_dev) {
500                 WARN_ON(ifa->ifa_dev);
501                 in_dev_hold(in_dev);
502                 ifa->ifa_dev = in_dev;
503         }
504         if (ipv4_is_loopback(ifa->ifa_local))
505                 ifa->ifa_scope = RT_SCOPE_HOST;
506         return inet_insert_ifa(ifa);
507 }
508
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514         struct net_device *dev;
515         struct in_device *in_dev = NULL;
516
517         rcu_read_lock();
518         dev = dev_get_by_index_rcu(net, ifindex);
519         if (dev)
520                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521         rcu_read_unlock();
522         return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525
526 /* Called only from RTNL semaphored context. No locks. */
527
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529                                     __be32 mask)
530 {
531         ASSERT_RTNL();
532
533         for_primary_ifa(in_dev) {
534                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535                         return ifa;
536         } endfor_ifa(in_dev);
537         return NULL;
538 }
539
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 {
542         struct net *net = sock_net(skb->sk);
543         struct nlattr *tb[IFA_MAX+1];
544         struct in_device *in_dev;
545         struct ifaddrmsg *ifm;
546         struct in_ifaddr *ifa, **ifap;
547         int err = -EINVAL;
548
549         ASSERT_RTNL();
550
551         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552         if (err < 0)
553                 goto errout;
554
555         ifm = nlmsg_data(nlh);
556         in_dev = inetdev_by_index(net, ifm->ifa_index);
557         if (in_dev == NULL) {
558                 err = -ENODEV;
559                 goto errout;
560         }
561
562         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563              ifap = &ifa->ifa_next) {
564                 if (tb[IFA_LOCAL] &&
565                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566                         continue;
567
568                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569                         continue;
570
571                 if (tb[IFA_ADDRESS] &&
572                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574                         continue;
575
576                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577                 return 0;
578         }
579
580         err = -EADDRNOTAVAIL;
581 errout:
582         return err;
583 }
584
585 #define INFINITY_LIFE_TIME      0xFFFFFFFF
586
587 static void check_lifetime(struct work_struct *work)
588 {
589         unsigned long now, next, next_sec, next_sched;
590         struct in_ifaddr *ifa;
591         struct hlist_node *n;
592         int i;
593
594         now = jiffies;
595         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596
597         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598                 bool change_needed = false;
599
600                 rcu_read_lock();
601                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
602                         unsigned long age;
603
604                         if (ifa->ifa_flags & IFA_F_PERMANENT)
605                                 continue;
606
607                         /* We try to batch several events at once. */
608                         age = (now - ifa->ifa_tstamp +
609                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610
611                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612                             age >= ifa->ifa_valid_lft) {
613                                 change_needed = true;
614                         } else if (ifa->ifa_preferred_lft ==
615                                    INFINITY_LIFE_TIME) {
616                                 continue;
617                         } else if (age >= ifa->ifa_preferred_lft) {
618                                 if (time_before(ifa->ifa_tstamp +
619                                                 ifa->ifa_valid_lft * HZ, next))
620                                         next = ifa->ifa_tstamp +
621                                                ifa->ifa_valid_lft * HZ;
622
623                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624                                         change_needed = true;
625                         } else if (time_before(ifa->ifa_tstamp +
626                                                ifa->ifa_preferred_lft * HZ,
627                                                next)) {
628                                 next = ifa->ifa_tstamp +
629                                        ifa->ifa_preferred_lft * HZ;
630                         }
631                 }
632                 rcu_read_unlock();
633                 if (!change_needed)
634                         continue;
635                 rtnl_lock();
636                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
637                         unsigned long age;
638
639                         if (ifa->ifa_flags & IFA_F_PERMANENT)
640                                 continue;
641
642                         /* We try to batch several events at once. */
643                         age = (now - ifa->ifa_tstamp +
644                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645
646                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647                             age >= ifa->ifa_valid_lft) {
648                                 struct in_ifaddr **ifap;
649
650                                 for (ifap = &ifa->ifa_dev->ifa_list;
651                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652                                         if (*ifap == ifa) {
653                                                 inet_del_ifa(ifa->ifa_dev,
654                                                              ifap, 1);
655                                                 break;
656                                         }
657                                 }
658                         } else if (ifa->ifa_preferred_lft !=
659                                    INFINITY_LIFE_TIME &&
660                                    age >= ifa->ifa_preferred_lft &&
661                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
663                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
664                         }
665                 }
666                 rtnl_unlock();
667         }
668
669         next_sec = round_jiffies_up(next);
670         next_sched = next;
671
672         /* If rounded timeout is accurate enough, accept it. */
673         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674                 next_sched = next_sec;
675
676         now = jiffies;
677         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680
681         schedule_delayed_work(&check_lifetime_work, next_sched - now);
682 }
683
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
685                              __u32 prefered_lft)
686 {
687         unsigned long timeout;
688
689         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690
691         timeout = addrconf_timeout_fixup(valid_lft, HZ);
692         if (addrconf_finite_timeout(timeout))
693                 ifa->ifa_valid_lft = timeout;
694         else
695                 ifa->ifa_flags |= IFA_F_PERMANENT;
696
697         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698         if (addrconf_finite_timeout(timeout)) {
699                 if (timeout == 0)
700                         ifa->ifa_flags |= IFA_F_DEPRECATED;
701                 ifa->ifa_preferred_lft = timeout;
702         }
703         ifa->ifa_tstamp = jiffies;
704         if (!ifa->ifa_cstamp)
705                 ifa->ifa_cstamp = ifa->ifa_tstamp;
706 }
707
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
710 {
711         struct nlattr *tb[IFA_MAX+1];
712         struct in_ifaddr *ifa;
713         struct ifaddrmsg *ifm;
714         struct net_device *dev;
715         struct in_device *in_dev;
716         int err;
717
718         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
719         if (err < 0)
720                 goto errout;
721
722         ifm = nlmsg_data(nlh);
723         err = -EINVAL;
724         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
725                 goto errout;
726
727         dev = __dev_get_by_index(net, ifm->ifa_index);
728         err = -ENODEV;
729         if (dev == NULL)
730                 goto errout;
731
732         in_dev = __in_dev_get_rtnl(dev);
733         err = -ENOBUFS;
734         if (in_dev == NULL)
735                 goto errout;
736
737         ifa = inet_alloc_ifa();
738         if (ifa == NULL)
739                 /*
740                  * A potential indev allocation can be left alive, it stays
741                  * assigned to its device and is destroy with it.
742                  */
743                 goto errout;
744
745         ipv4_devconf_setall(in_dev);
746         in_dev_hold(in_dev);
747
748         if (tb[IFA_ADDRESS] == NULL)
749                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750
751         INIT_HLIST_NODE(&ifa->hash);
752         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754         ifa->ifa_flags = ifm->ifa_flags;
755         ifa->ifa_scope = ifm->ifa_scope;
756         ifa->ifa_dev = in_dev;
757
758         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760
761         if (tb[IFA_BROADCAST])
762                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
763
764         if (tb[IFA_LABEL])
765                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766         else
767                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768
769         if (tb[IFA_CACHEINFO]) {
770                 struct ifa_cacheinfo *ci;
771
772                 ci = nla_data(tb[IFA_CACHEINFO]);
773                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
774                         err = -EINVAL;
775                         goto errout_free;
776                 }
777                 *pvalid_lft = ci->ifa_valid;
778                 *pprefered_lft = ci->ifa_prefered;
779         }
780
781         return ifa;
782
783 errout_free:
784         inet_free_ifa(ifa);
785 errout:
786         return ERR_PTR(err);
787 }
788
789 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
790 {
791         struct in_device *in_dev = ifa->ifa_dev;
792         struct in_ifaddr *ifa1, **ifap;
793
794         if (!ifa->ifa_local)
795                 return NULL;
796
797         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
798              ifap = &ifa1->ifa_next) {
799                 if (ifa1->ifa_mask == ifa->ifa_mask &&
800                     inet_ifa_match(ifa1->ifa_address, ifa) &&
801                     ifa1->ifa_local == ifa->ifa_local)
802                         return ifa1;
803         }
804         return NULL;
805 }
806
807 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
808 {
809         struct net *net = sock_net(skb->sk);
810         struct in_ifaddr *ifa;
811         struct in_ifaddr *ifa_existing;
812         __u32 valid_lft = INFINITY_LIFE_TIME;
813         __u32 prefered_lft = INFINITY_LIFE_TIME;
814
815         ASSERT_RTNL();
816
817         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
818         if (IS_ERR(ifa))
819                 return PTR_ERR(ifa);
820
821         ifa_existing = find_matching_ifa(ifa);
822         if (!ifa_existing) {
823                 /* It would be best to check for !NLM_F_CREATE here but
824                  * userspace alreay relies on not having to provide this.
825                  */
826                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
827                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
828         } else {
829                 inet_free_ifa(ifa);
830
831                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
832                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
833                         return -EEXIST;
834                 ifa = ifa_existing;
835                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836                 cancel_delayed_work(&check_lifetime_work);
837                 schedule_delayed_work(&check_lifetime_work, 0);
838                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
839                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
840         }
841         return 0;
842 }
843
844 /*
845  *      Determine a default network mask, based on the IP address.
846  */
847
848 static int inet_abc_len(__be32 addr)
849 {
850         int rc = -1;    /* Something else, probably a multicast. */
851
852         if (ipv4_is_zeronet(addr))
853                 rc = 0;
854         else {
855                 __u32 haddr = ntohl(addr);
856
857                 if (IN_CLASSA(haddr))
858                         rc = 8;
859                 else if (IN_CLASSB(haddr))
860                         rc = 16;
861                 else if (IN_CLASSC(haddr))
862                         rc = 24;
863         }
864
865         return rc;
866 }
867
868
869 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
870 {
871         struct ifreq ifr;
872         struct sockaddr_in sin_orig;
873         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
874         struct in_device *in_dev;
875         struct in_ifaddr **ifap = NULL;
876         struct in_ifaddr *ifa = NULL;
877         struct net_device *dev;
878         char *colon;
879         int ret = -EFAULT;
880         int tryaddrmatch = 0;
881
882         /*
883          *      Fetch the caller's info block into kernel space
884          */
885
886         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
887                 goto out;
888         ifr.ifr_name[IFNAMSIZ - 1] = 0;
889
890         /* save original address for comparison */
891         memcpy(&sin_orig, sin, sizeof(*sin));
892
893         colon = strchr(ifr.ifr_name, ':');
894         if (colon)
895                 *colon = 0;
896
897         dev_load(net, ifr.ifr_name);
898
899         switch (cmd) {
900         case SIOCGIFADDR:       /* Get interface address */
901         case SIOCGIFBRDADDR:    /* Get the broadcast address */
902         case SIOCGIFDSTADDR:    /* Get the destination address */
903         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
904                 /* Note that these ioctls will not sleep,
905                    so that we do not impose a lock.
906                    One day we will be forced to put shlock here (I mean SMP)
907                  */
908                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
909                 memset(sin, 0, sizeof(*sin));
910                 sin->sin_family = AF_INET;
911                 break;
912
913         case SIOCSIFFLAGS:
914                 ret = -EPERM;
915                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
916                         goto out;
917                 break;
918         case SIOCSIFADDR:       /* Set interface address (and family) */
919         case SIOCSIFBRDADDR:    /* Set the broadcast address */
920         case SIOCSIFDSTADDR:    /* Set the destination address */
921         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
922         case SIOCKILLADDR:      /* Nuke all sockets on this address */
923                 ret = -EPERM;
924                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
925                         goto out;
926                 ret = -EINVAL;
927                 if (sin->sin_family != AF_INET)
928                         goto out;
929                 break;
930         default:
931                 ret = -EINVAL;
932                 goto out;
933         }
934
935         rtnl_lock();
936
937         ret = -ENODEV;
938         dev = __dev_get_by_name(net, ifr.ifr_name);
939         if (!dev)
940                 goto done;
941
942         if (colon)
943                 *colon = ':';
944
945         in_dev = __in_dev_get_rtnl(dev);
946         if (in_dev) {
947                 if (tryaddrmatch) {
948                         /* Matthias Andree */
949                         /* compare label and address (4.4BSD style) */
950                         /* note: we only do this for a limited set of ioctls
951                            and only if the original address family was AF_INET.
952                            This is checked above. */
953                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
954                              ifap = &ifa->ifa_next) {
955                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
956                                     sin_orig.sin_addr.s_addr ==
957                                                         ifa->ifa_local) {
958                                         break; /* found */
959                                 }
960                         }
961                 }
962                 /* we didn't get a match, maybe the application is
963                    4.3BSD-style and passed in junk so we fall back to
964                    comparing just the label */
965                 if (!ifa) {
966                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
967                              ifap = &ifa->ifa_next)
968                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
969                                         break;
970                 }
971         }
972
973         ret = -EADDRNOTAVAIL;
974         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
975             && cmd != SIOCKILLADDR)
976                 goto done;
977
978         switch (cmd) {
979         case SIOCGIFADDR:       /* Get interface address */
980                 sin->sin_addr.s_addr = ifa->ifa_local;
981                 goto rarok;
982
983         case SIOCGIFBRDADDR:    /* Get the broadcast address */
984                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
985                 goto rarok;
986
987         case SIOCGIFDSTADDR:    /* Get the destination address */
988                 sin->sin_addr.s_addr = ifa->ifa_address;
989                 goto rarok;
990
991         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
992                 sin->sin_addr.s_addr = ifa->ifa_mask;
993                 goto rarok;
994
995         case SIOCSIFFLAGS:
996                 if (colon) {
997                         ret = -EADDRNOTAVAIL;
998                         if (!ifa)
999                                 break;
1000                         ret = 0;
1001                         if (!(ifr.ifr_flags & IFF_UP))
1002                                 inet_del_ifa(in_dev, ifap, 1);
1003                         break;
1004                 }
1005                 ret = dev_change_flags(dev, ifr.ifr_flags);
1006                 break;
1007
1008         case SIOCSIFADDR:       /* Set interface address (and family) */
1009                 ret = -EINVAL;
1010                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1011                         break;
1012
1013                 if (!ifa) {
1014                         ret = -ENOBUFS;
1015                         ifa = inet_alloc_ifa();
1016                         if (!ifa)
1017                                 break;
1018                         INIT_HLIST_NODE(&ifa->hash);
1019                         if (colon)
1020                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1021                         else
1022                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1023                 } else {
1024                         ret = 0;
1025                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1026                                 break;
1027                         inet_del_ifa(in_dev, ifap, 0);
1028                         ifa->ifa_broadcast = 0;
1029                         ifa->ifa_scope = 0;
1030                 }
1031
1032                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1033
1034                 if (!(dev->flags & IFF_POINTOPOINT)) {
1035                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1036                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1037                         if ((dev->flags & IFF_BROADCAST) &&
1038                             ifa->ifa_prefixlen < 31)
1039                                 ifa->ifa_broadcast = ifa->ifa_address |
1040                                                      ~ifa->ifa_mask;
1041                 } else {
1042                         ifa->ifa_prefixlen = 32;
1043                         ifa->ifa_mask = inet_make_mask(32);
1044                 }
1045                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1046                 ret = inet_set_ifa(dev, ifa);
1047                 break;
1048
1049         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1050                 ret = 0;
1051                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1052                         inet_del_ifa(in_dev, ifap, 0);
1053                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1054                         inet_insert_ifa(ifa);
1055                 }
1056                 break;
1057
1058         case SIOCSIFDSTADDR:    /* Set the destination address */
1059                 ret = 0;
1060                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1061                         break;
1062                 ret = -EINVAL;
1063                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1064                         break;
1065                 ret = 0;
1066                 inet_del_ifa(in_dev, ifap, 0);
1067                 ifa->ifa_address = sin->sin_addr.s_addr;
1068                 inet_insert_ifa(ifa);
1069                 break;
1070
1071         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1072
1073                 /*
1074                  *      The mask we set must be legal.
1075                  */
1076                 ret = -EINVAL;
1077                 if (bad_mask(sin->sin_addr.s_addr, 0))
1078                         break;
1079                 ret = 0;
1080                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1081                         __be32 old_mask = ifa->ifa_mask;
1082                         inet_del_ifa(in_dev, ifap, 0);
1083                         ifa->ifa_mask = sin->sin_addr.s_addr;
1084                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1085
1086                         /* See if current broadcast address matches
1087                          * with current netmask, then recalculate
1088                          * the broadcast address. Otherwise it's a
1089                          * funny address, so don't touch it since
1090                          * the user seems to know what (s)he's doing...
1091                          */
1092                         if ((dev->flags & IFF_BROADCAST) &&
1093                             (ifa->ifa_prefixlen < 31) &&
1094                             (ifa->ifa_broadcast ==
1095                              (ifa->ifa_local|~old_mask))) {
1096                                 ifa->ifa_broadcast = (ifa->ifa_local |
1097                                                       ~sin->sin_addr.s_addr);
1098                         }
1099                         inet_insert_ifa(ifa);
1100                 }
1101                 break;
1102         case SIOCKILLADDR:      /* Nuke all connections on this address */
1103                 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
1104                 break;
1105         }
1106 done:
1107         rtnl_unlock();
1108 out:
1109         return ret;
1110 rarok:
1111         rtnl_unlock();
1112         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1113         goto out;
1114 }
1115
1116 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1117 {
1118         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1119         struct in_ifaddr *ifa;
1120         struct ifreq ifr;
1121         int done = 0;
1122
1123         if (!in_dev)
1124                 goto out;
1125
1126         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1127                 if (!buf) {
1128                         done += sizeof(ifr);
1129                         continue;
1130                 }
1131                 if (len < (int) sizeof(ifr))
1132                         break;
1133                 memset(&ifr, 0, sizeof(struct ifreq));
1134                 if (ifa->ifa_label)
1135                         strcpy(ifr.ifr_name, ifa->ifa_label);
1136                 else
1137                         strcpy(ifr.ifr_name, dev->name);
1138
1139                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1141                                                                 ifa->ifa_local;
1142
1143                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1144                         done = -EFAULT;
1145                         break;
1146                 }
1147                 buf  += sizeof(struct ifreq);
1148                 len  -= sizeof(struct ifreq);
1149                 done += sizeof(struct ifreq);
1150         }
1151 out:
1152         return done;
1153 }
1154
1155 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156 {
1157         __be32 addr = 0;
1158         struct in_device *in_dev;
1159         struct net *net = dev_net(dev);
1160
1161         rcu_read_lock();
1162         in_dev = __in_dev_get_rcu(dev);
1163         if (!in_dev)
1164                 goto no_in_dev;
1165
1166         for_primary_ifa(in_dev) {
1167                 if (ifa->ifa_scope > scope)
1168                         continue;
1169                 if (!dst || inet_ifa_match(dst, ifa)) {
1170                         addr = ifa->ifa_local;
1171                         break;
1172                 }
1173                 if (!addr)
1174                         addr = ifa->ifa_local;
1175         } endfor_ifa(in_dev);
1176
1177         if (addr)
1178                 goto out_unlock;
1179 no_in_dev:
1180
1181         /* Not loopback addresses on loopback should be preferred
1182            in this case. It is importnat that lo is the first interface
1183            in dev_base list.
1184          */
1185         for_each_netdev_rcu(net, dev) {
1186                 in_dev = __in_dev_get_rcu(dev);
1187                 if (!in_dev)
1188                         continue;
1189
1190                 for_primary_ifa(in_dev) {
1191                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192                             ifa->ifa_scope <= scope) {
1193                                 addr = ifa->ifa_local;
1194                                 goto out_unlock;
1195                         }
1196                 } endfor_ifa(in_dev);
1197         }
1198 out_unlock:
1199         rcu_read_unlock();
1200         return addr;
1201 }
1202 EXPORT_SYMBOL(inet_select_addr);
1203
1204 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205                               __be32 local, int scope)
1206 {
1207         int same = 0;
1208         __be32 addr = 0;
1209
1210         for_ifa(in_dev) {
1211                 if (!addr &&
1212                     (local == ifa->ifa_local || !local) &&
1213                     ifa->ifa_scope <= scope) {
1214                         addr = ifa->ifa_local;
1215                         if (same)
1216                                 break;
1217                 }
1218                 if (!same) {
1219                         same = (!local || inet_ifa_match(local, ifa)) &&
1220                                 (!dst || inet_ifa_match(dst, ifa));
1221                         if (same && addr) {
1222                                 if (local || !dst)
1223                                         break;
1224                                 /* Is the selected addr into dst subnet? */
1225                                 if (inet_ifa_match(addr, ifa))
1226                                         break;
1227                                 /* No, then can we use new local src? */
1228                                 if (ifa->ifa_scope <= scope) {
1229                                         addr = ifa->ifa_local;
1230                                         break;
1231                                 }
1232                                 /* search for large dst subnet for addr */
1233                                 same = 0;
1234                         }
1235                 }
1236         } endfor_ifa(in_dev);
1237
1238         return same ? addr : 0;
1239 }
1240
1241 /*
1242  * Confirm that local IP address exists using wildcards:
1243  * - in_dev: only on this interface, 0=any interface
1244  * - dst: only in the same subnet as dst, 0=any dst
1245  * - local: address, 0=autoselect the local address
1246  * - scope: maximum allowed scope value for the local address
1247  */
1248 __be32 inet_confirm_addr(struct in_device *in_dev,
1249                          __be32 dst, __be32 local, int scope)
1250 {
1251         __be32 addr = 0;
1252         struct net_device *dev;
1253         struct net *net;
1254
1255         if (scope != RT_SCOPE_LINK)
1256                 return confirm_addr_indev(in_dev, dst, local, scope);
1257
1258         net = dev_net(in_dev->dev);
1259         rcu_read_lock();
1260         for_each_netdev_rcu(net, dev) {
1261                 in_dev = __in_dev_get_rcu(dev);
1262                 if (in_dev) {
1263                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1264                         if (addr)
1265                                 break;
1266                 }
1267         }
1268         rcu_read_unlock();
1269
1270         return addr;
1271 }
1272 EXPORT_SYMBOL(inet_confirm_addr);
1273
1274 /*
1275  *      Device notifier
1276  */
1277
1278 int register_inetaddr_notifier(struct notifier_block *nb)
1279 {
1280         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1281 }
1282 EXPORT_SYMBOL(register_inetaddr_notifier);
1283
1284 int unregister_inetaddr_notifier(struct notifier_block *nb)
1285 {
1286         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1287 }
1288 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1289
1290 /* Rename ifa_labels for a device name change. Make some effort to preserve
1291  * existing alias numbering and to create unique labels if possible.
1292 */
1293 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1294 {
1295         struct in_ifaddr *ifa;
1296         int named = 0;
1297
1298         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1299                 char old[IFNAMSIZ], *dot;
1300
1301                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1302                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1303                 if (named++ == 0)
1304                         goto skip;
1305                 dot = strchr(old, ':');
1306                 if (dot == NULL) {
1307                         sprintf(old, ":%d", named);
1308                         dot = old;
1309                 }
1310                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1311                         strcat(ifa->ifa_label, dot);
1312                 else
1313                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1314 skip:
1315                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1316         }
1317 }
1318
1319 static bool inetdev_valid_mtu(unsigned int mtu)
1320 {
1321         return mtu >= 68;
1322 }
1323
1324 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1325                                         struct in_device *in_dev)
1326
1327 {
1328         struct in_ifaddr *ifa;
1329
1330         for (ifa = in_dev->ifa_list; ifa;
1331              ifa = ifa->ifa_next) {
1332                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1333                          ifa->ifa_local, dev,
1334                          ifa->ifa_local, NULL,
1335                          dev->dev_addr, NULL);
1336         }
1337 }
1338
1339 /* Called only under RTNL semaphore */
1340
1341 static int inetdev_event(struct notifier_block *this, unsigned long event,
1342                          void *ptr)
1343 {
1344         struct net_device *dev = ptr;
1345         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1346
1347         ASSERT_RTNL();
1348
1349         if (!in_dev) {
1350                 if (event == NETDEV_REGISTER) {
1351                         in_dev = inetdev_init(dev);
1352                         if (!in_dev)
1353                                 return notifier_from_errno(-ENOMEM);
1354                         if (dev->flags & IFF_LOOPBACK) {
1355                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1356                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1357                         }
1358                 } else if (event == NETDEV_CHANGEMTU) {
1359                         /* Re-enabling IP */
1360                         if (inetdev_valid_mtu(dev->mtu))
1361                                 in_dev = inetdev_init(dev);
1362                 }
1363                 goto out;
1364         }
1365
1366         switch (event) {
1367         case NETDEV_REGISTER:
1368                 pr_debug("%s: bug\n", __func__);
1369                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1370                 break;
1371         case NETDEV_UP:
1372                 if (!inetdev_valid_mtu(dev->mtu))
1373                         break;
1374                 if (dev->flags & IFF_LOOPBACK) {
1375                         struct in_ifaddr *ifa = inet_alloc_ifa();
1376
1377                         if (ifa) {
1378                                 INIT_HLIST_NODE(&ifa->hash);
1379                                 ifa->ifa_local =
1380                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1381                                 ifa->ifa_prefixlen = 8;
1382                                 ifa->ifa_mask = inet_make_mask(8);
1383                                 in_dev_hold(in_dev);
1384                                 ifa->ifa_dev = in_dev;
1385                                 ifa->ifa_scope = RT_SCOPE_HOST;
1386                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1387                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1388                                                  INFINITY_LIFE_TIME);
1389                                 inet_insert_ifa(ifa);
1390                         }
1391                 }
1392                 ip_mc_up(in_dev);
1393                 /* fall through */
1394         case NETDEV_CHANGEADDR:
1395                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1396                         break;
1397                 /* fall through */
1398         case NETDEV_NOTIFY_PEERS:
1399                 /* Send gratuitous ARP to notify of link change */
1400                 inetdev_send_gratuitous_arp(dev, in_dev);
1401                 break;
1402         case NETDEV_DOWN:
1403                 ip_mc_down(in_dev);
1404                 break;
1405         case NETDEV_PRE_TYPE_CHANGE:
1406                 ip_mc_unmap(in_dev);
1407                 break;
1408         case NETDEV_POST_TYPE_CHANGE:
1409                 ip_mc_remap(in_dev);
1410                 break;
1411         case NETDEV_CHANGEMTU:
1412                 if (inetdev_valid_mtu(dev->mtu))
1413                         break;
1414                 /* disable IP when MTU is not enough */
1415         case NETDEV_UNREGISTER:
1416                 inetdev_destroy(in_dev);
1417                 break;
1418         case NETDEV_CHANGENAME:
1419                 /* Do not notify about label change, this event is
1420                  * not interesting to applications using netlink.
1421                  */
1422                 inetdev_changename(dev, in_dev);
1423
1424                 devinet_sysctl_unregister(in_dev);
1425                 devinet_sysctl_register(in_dev);
1426                 break;
1427         }
1428 out:
1429         return NOTIFY_DONE;
1430 }
1431
1432 static struct notifier_block ip_netdev_notifier = {
1433         .notifier_call = inetdev_event,
1434 };
1435
1436 static size_t inet_nlmsg_size(void)
1437 {
1438         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1439                + nla_total_size(4) /* IFA_ADDRESS */
1440                + nla_total_size(4) /* IFA_LOCAL */
1441                + nla_total_size(4) /* IFA_BROADCAST */
1442                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1443                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1444 }
1445
1446 static inline u32 cstamp_delta(unsigned long cstamp)
1447 {
1448         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1449 }
1450
1451 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1452                          unsigned long tstamp, u32 preferred, u32 valid)
1453 {
1454         struct ifa_cacheinfo ci;
1455
1456         ci.cstamp = cstamp_delta(cstamp);
1457         ci.tstamp = cstamp_delta(tstamp);
1458         ci.ifa_prefered = preferred;
1459         ci.ifa_valid = valid;
1460
1461         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1462 }
1463
1464 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1465                             u32 portid, u32 seq, int event, unsigned int flags)
1466 {
1467         struct ifaddrmsg *ifm;
1468         struct nlmsghdr  *nlh;
1469         u32 preferred, valid;
1470
1471         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1472         if (nlh == NULL)
1473                 return -EMSGSIZE;
1474
1475         ifm = nlmsg_data(nlh);
1476         ifm->ifa_family = AF_INET;
1477         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1478         ifm->ifa_flags = ifa->ifa_flags;
1479         ifm->ifa_scope = ifa->ifa_scope;
1480         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1481
1482         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1483                 preferred = ifa->ifa_preferred_lft;
1484                 valid = ifa->ifa_valid_lft;
1485                 if (preferred != INFINITY_LIFE_TIME) {
1486                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1487
1488                         if (preferred > tval)
1489                                 preferred -= tval;
1490                         else
1491                                 preferred = 0;
1492                         if (valid != INFINITY_LIFE_TIME) {
1493                                 if (valid > tval)
1494                                         valid -= tval;
1495                                 else
1496                                         valid = 0;
1497                         }
1498                 }
1499         } else {
1500                 preferred = INFINITY_LIFE_TIME;
1501                 valid = INFINITY_LIFE_TIME;
1502         }
1503         if ((ifa->ifa_address &&
1504              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1505             (ifa->ifa_local &&
1506              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1507             (ifa->ifa_broadcast &&
1508              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1509             (ifa->ifa_label[0] &&
1510              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1511             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1512                           preferred, valid))
1513                 goto nla_put_failure;
1514
1515         return nlmsg_end(skb, nlh);
1516
1517 nla_put_failure:
1518         nlmsg_cancel(skb, nlh);
1519         return -EMSGSIZE;
1520 }
1521
1522 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1523 {
1524         struct net *net = sock_net(skb->sk);
1525         int h, s_h;
1526         int idx, s_idx;
1527         int ip_idx, s_ip_idx;
1528         struct net_device *dev;
1529         struct in_device *in_dev;
1530         struct in_ifaddr *ifa;
1531         struct hlist_head *head;
1532
1533         s_h = cb->args[0];
1534         s_idx = idx = cb->args[1];
1535         s_ip_idx = ip_idx = cb->args[2];
1536
1537         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1538                 idx = 0;
1539                 head = &net->dev_index_head[h];
1540                 rcu_read_lock();
1541                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1542                           net->dev_base_seq;
1543                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1544                         if (idx < s_idx)
1545                                 goto cont;
1546                         if (h > s_h || idx > s_idx)
1547                                 s_ip_idx = 0;
1548                         in_dev = __in_dev_get_rcu(dev);
1549                         if (!in_dev)
1550                                 goto cont;
1551
1552                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1553                              ifa = ifa->ifa_next, ip_idx++) {
1554                                 if (ip_idx < s_ip_idx)
1555                                         continue;
1556                                 if (inet_fill_ifaddr(skb, ifa,
1557                                              NETLINK_CB(cb->skb).portid,
1558                                              cb->nlh->nlmsg_seq,
1559                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1560                                         rcu_read_unlock();
1561                                         goto done;
1562                                 }
1563                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1564                         }
1565 cont:
1566                         idx++;
1567                 }
1568                 rcu_read_unlock();
1569         }
1570
1571 done:
1572         cb->args[0] = h;
1573         cb->args[1] = idx;
1574         cb->args[2] = ip_idx;
1575
1576         return skb->len;
1577 }
1578
1579 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1580                       u32 portid)
1581 {
1582         struct sk_buff *skb;
1583         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1584         int err = -ENOBUFS;
1585         struct net *net;
1586
1587         net = dev_net(ifa->ifa_dev->dev);
1588         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1589         if (skb == NULL)
1590                 goto errout;
1591
1592         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1593         if (err < 0) {
1594                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1595                 WARN_ON(err == -EMSGSIZE);
1596                 kfree_skb(skb);
1597                 goto errout;
1598         }
1599         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1600         return;
1601 errout:
1602         if (err < 0)
1603                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1604 }
1605
1606 static size_t inet_get_link_af_size(const struct net_device *dev)
1607 {
1608         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1609
1610         if (!in_dev)
1611                 return 0;
1612
1613         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1614 }
1615
1616 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1617 {
1618         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1619         struct nlattr *nla;
1620         int i;
1621
1622         if (!in_dev)
1623                 return -ENODATA;
1624
1625         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1626         if (nla == NULL)
1627                 return -EMSGSIZE;
1628
1629         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1630                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1631
1632         return 0;
1633 }
1634
1635 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1636         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1637 };
1638
1639 static int inet_validate_link_af(const struct net_device *dev,
1640                                  const struct nlattr *nla)
1641 {
1642         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1643         int err, rem;
1644
1645         if (dev && !__in_dev_get_rtnl(dev))
1646                 return -EAFNOSUPPORT;
1647
1648         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1649         if (err < 0)
1650                 return err;
1651
1652         if (tb[IFLA_INET_CONF]) {
1653                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1654                         int cfgid = nla_type(a);
1655
1656                         if (nla_len(a) < 4)
1657                                 return -EINVAL;
1658
1659                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1660                                 return -EINVAL;
1661                 }
1662         }
1663
1664         return 0;
1665 }
1666
1667 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1668 {
1669         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1670         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1671         int rem;
1672
1673         if (!in_dev)
1674                 return -EAFNOSUPPORT;
1675
1676         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1677                 BUG();
1678
1679         if (tb[IFLA_INET_CONF]) {
1680                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1681                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1682         }
1683
1684         return 0;
1685 }
1686
1687 static int inet_netconf_msgsize_devconf(int type)
1688 {
1689         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1690                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1691
1692         /* type -1 is used for ALL */
1693         if (type == -1 || type == NETCONFA_FORWARDING)
1694                 size += nla_total_size(4);
1695         if (type == -1 || type == NETCONFA_RP_FILTER)
1696                 size += nla_total_size(4);
1697         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1698                 size += nla_total_size(4);
1699
1700         return size;
1701 }
1702
1703 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1704                                      struct ipv4_devconf *devconf, u32 portid,
1705                                      u32 seq, int event, unsigned int flags,
1706                                      int type)
1707 {
1708         struct nlmsghdr  *nlh;
1709         struct netconfmsg *ncm;
1710
1711         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1712                         flags);
1713         if (nlh == NULL)
1714                 return -EMSGSIZE;
1715
1716         ncm = nlmsg_data(nlh);
1717         ncm->ncm_family = AF_INET;
1718
1719         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1720                 goto nla_put_failure;
1721
1722         /* type -1 is used for ALL */
1723         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1724             nla_put_s32(skb, NETCONFA_FORWARDING,
1725                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1726                 goto nla_put_failure;
1727         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1728             nla_put_s32(skb, NETCONFA_RP_FILTER,
1729                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1730                 goto nla_put_failure;
1731         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1732             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1733                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1734                 goto nla_put_failure;
1735
1736         return nlmsg_end(skb, nlh);
1737
1738 nla_put_failure:
1739         nlmsg_cancel(skb, nlh);
1740         return -EMSGSIZE;
1741 }
1742
1743 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1744                                  struct ipv4_devconf *devconf)
1745 {
1746         struct sk_buff *skb;
1747         int err = -ENOBUFS;
1748
1749         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1750         if (skb == NULL)
1751                 goto errout;
1752
1753         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1754                                         RTM_NEWNETCONF, 0, type);
1755         if (err < 0) {
1756                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1757                 WARN_ON(err == -EMSGSIZE);
1758                 kfree_skb(skb);
1759                 goto errout;
1760         }
1761         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1762         return;
1763 errout:
1764         if (err < 0)
1765                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1766 }
1767
1768 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1769         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1770         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1771         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1772 };
1773
1774 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1775                                     struct nlmsghdr *nlh)
1776 {
1777         struct net *net = sock_net(in_skb->sk);
1778         struct nlattr *tb[NETCONFA_MAX+1];
1779         struct netconfmsg *ncm;
1780         struct sk_buff *skb;
1781         struct ipv4_devconf *devconf;
1782         struct in_device *in_dev;
1783         struct net_device *dev;
1784         int ifindex;
1785         int err;
1786
1787         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1788                           devconf_ipv4_policy);
1789         if (err < 0)
1790                 goto errout;
1791
1792         err = EINVAL;
1793         if (!tb[NETCONFA_IFINDEX])
1794                 goto errout;
1795
1796         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1797         switch (ifindex) {
1798         case NETCONFA_IFINDEX_ALL:
1799                 devconf = net->ipv4.devconf_all;
1800                 break;
1801         case NETCONFA_IFINDEX_DEFAULT:
1802                 devconf = net->ipv4.devconf_dflt;
1803                 break;
1804         default:
1805                 dev = __dev_get_by_index(net, ifindex);
1806                 if (dev == NULL)
1807                         goto errout;
1808                 in_dev = __in_dev_get_rtnl(dev);
1809                 if (in_dev == NULL)
1810                         goto errout;
1811                 devconf = &in_dev->cnf;
1812                 break;
1813         }
1814
1815         err = -ENOBUFS;
1816         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1817         if (skb == NULL)
1818                 goto errout;
1819
1820         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1821                                         NETLINK_CB(in_skb).portid,
1822                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1823                                         -1);
1824         if (err < 0) {
1825                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1826                 WARN_ON(err == -EMSGSIZE);
1827                 kfree_skb(skb);
1828                 goto errout;
1829         }
1830         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1831 errout:
1832         return err;
1833 }
1834
1835 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1836                                      struct netlink_callback *cb)
1837 {
1838         struct net *net = sock_net(skb->sk);
1839         int h, s_h;
1840         int idx, s_idx;
1841         struct net_device *dev;
1842         struct in_device *in_dev;
1843         struct hlist_head *head;
1844
1845         s_h = cb->args[0];
1846         s_idx = idx = cb->args[1];
1847
1848         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1849                 idx = 0;
1850                 head = &net->dev_index_head[h];
1851                 rcu_read_lock();
1852                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1853                           net->dev_base_seq;
1854                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1855                         if (idx < s_idx)
1856                                 goto cont;
1857                         in_dev = __in_dev_get_rcu(dev);
1858                         if (!in_dev)
1859                                 goto cont;
1860
1861                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1862                                                       &in_dev->cnf,
1863                                                       NETLINK_CB(cb->skb).portid,
1864                                                       cb->nlh->nlmsg_seq,
1865                                                       RTM_NEWNETCONF,
1866                                                       NLM_F_MULTI,
1867                                                       -1) <= 0) {
1868                                 rcu_read_unlock();
1869                                 goto done;
1870                         }
1871                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1872 cont:
1873                         idx++;
1874                 }
1875                 rcu_read_unlock();
1876         }
1877         if (h == NETDEV_HASHENTRIES) {
1878                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1879                                               net->ipv4.devconf_all,
1880                                               NETLINK_CB(cb->skb).portid,
1881                                               cb->nlh->nlmsg_seq,
1882                                               RTM_NEWNETCONF, NLM_F_MULTI,
1883                                               -1) <= 0)
1884                         goto done;
1885                 else
1886                         h++;
1887         }
1888         if (h == NETDEV_HASHENTRIES + 1) {
1889                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1890                                               net->ipv4.devconf_dflt,
1891                                               NETLINK_CB(cb->skb).portid,
1892                                               cb->nlh->nlmsg_seq,
1893                                               RTM_NEWNETCONF, NLM_F_MULTI,
1894                                               -1) <= 0)
1895                         goto done;
1896                 else
1897                         h++;
1898         }
1899 done:
1900         cb->args[0] = h;
1901         cb->args[1] = idx;
1902
1903         return skb->len;
1904 }
1905
1906 #ifdef CONFIG_SYSCTL
1907
1908 static void devinet_copy_dflt_conf(struct net *net, int i)
1909 {
1910         struct net_device *dev;
1911
1912         rcu_read_lock();
1913         for_each_netdev_rcu(net, dev) {
1914                 struct in_device *in_dev;
1915
1916                 in_dev = __in_dev_get_rcu(dev);
1917                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1918                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1919         }
1920         rcu_read_unlock();
1921 }
1922
1923 /* called with RTNL locked */
1924 static void inet_forward_change(struct net *net)
1925 {
1926         struct net_device *dev;
1927         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1928
1929         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1930         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1931         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1932                                     NETCONFA_IFINDEX_ALL,
1933                                     net->ipv4.devconf_all);
1934         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1935                                     NETCONFA_IFINDEX_DEFAULT,
1936                                     net->ipv4.devconf_dflt);
1937
1938         for_each_netdev(net, dev) {
1939                 struct in_device *in_dev;
1940                 if (on)
1941                         dev_disable_lro(dev);
1942                 rcu_read_lock();
1943                 in_dev = __in_dev_get_rcu(dev);
1944                 if (in_dev) {
1945                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1946                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1947                                                     dev->ifindex, &in_dev->cnf);
1948                 }
1949                 rcu_read_unlock();
1950         }
1951 }
1952
1953 static int devinet_conf_proc(ctl_table *ctl, int write,
1954                              void __user *buffer,
1955                              size_t *lenp, loff_t *ppos)
1956 {
1957         int old_value = *(int *)ctl->data;
1958         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1959         int new_value = *(int *)ctl->data;
1960
1961         if (write) {
1962                 struct ipv4_devconf *cnf = ctl->extra1;
1963                 struct net *net = ctl->extra2;
1964                 int i = (int *)ctl->data - cnf->data;
1965
1966                 set_bit(i, cnf->state);
1967
1968                 if (cnf == net->ipv4.devconf_dflt)
1969                         devinet_copy_dflt_conf(net, i);
1970                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1971                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1972                         if ((new_value == 0) && (old_value != 0))
1973                                 rt_cache_flush(net);
1974                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1975                     new_value != old_value) {
1976                         int ifindex;
1977
1978                         if (cnf == net->ipv4.devconf_dflt)
1979                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1980                         else if (cnf == net->ipv4.devconf_all)
1981                                 ifindex = NETCONFA_IFINDEX_ALL;
1982                         else {
1983                                 struct in_device *idev =
1984                                         container_of(cnf, struct in_device,
1985                                                      cnf);
1986                                 ifindex = idev->dev->ifindex;
1987                         }
1988                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1989                                                     ifindex, cnf);
1990                 }
1991         }
1992
1993         return ret;
1994 }
1995
1996 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1997                                   void __user *buffer,
1998                                   size_t *lenp, loff_t *ppos)
1999 {
2000         int *valp = ctl->data;
2001         int val = *valp;
2002         loff_t pos = *ppos;
2003         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2004
2005         if (write && *valp != val) {
2006                 struct net *net = ctl->extra2;
2007
2008                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2009                         if (!rtnl_trylock()) {
2010                                 /* Restore the original values before restarting */
2011                                 *valp = val;
2012                                 *ppos = pos;
2013                                 return restart_syscall();
2014                         }
2015                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2016                                 inet_forward_change(net);
2017                         } else {
2018                                 struct ipv4_devconf *cnf = ctl->extra1;
2019                                 struct in_device *idev =
2020                                         container_of(cnf, struct in_device, cnf);
2021                                 if (*valp)
2022                                         dev_disable_lro(idev->dev);
2023                                 inet_netconf_notify_devconf(net,
2024                                                             NETCONFA_FORWARDING,
2025                                                             idev->dev->ifindex,
2026                                                             cnf);
2027                         }
2028                         rtnl_unlock();
2029                         rt_cache_flush(net);
2030                 } else
2031                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2032                                                     NETCONFA_IFINDEX_DEFAULT,
2033                                                     net->ipv4.devconf_dflt);
2034         }
2035
2036         return ret;
2037 }
2038
2039 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2040                                 void __user *buffer,
2041                                 size_t *lenp, loff_t *ppos)
2042 {
2043         int *valp = ctl->data;
2044         int val = *valp;
2045         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2046         struct net *net = ctl->extra2;
2047
2048         if (write && *valp != val)
2049                 rt_cache_flush(net);
2050
2051         return ret;
2052 }
2053
2054 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2055         { \
2056                 .procname       = name, \
2057                 .data           = ipv4_devconf.data + \
2058                                   IPV4_DEVCONF_ ## attr - 1, \
2059                 .maxlen         = sizeof(int), \
2060                 .mode           = mval, \
2061                 .proc_handler   = proc, \
2062                 .extra1         = &ipv4_devconf, \
2063         }
2064
2065 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2066         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2067
2068 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2069         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2070
2071 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2072         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2073
2074 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2075         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2076
2077 static struct devinet_sysctl_table {
2078         struct ctl_table_header *sysctl_header;
2079         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2080 } devinet_sysctl = {
2081         .devinet_vars = {
2082                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2083                                              devinet_sysctl_forward),
2084                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2085
2086                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2087                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2088                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2089                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2090                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2091                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2092                                         "accept_source_route"),
2093                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2094                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2095                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2096                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2097                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2098                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2099                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2100                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2101                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2102                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2103                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2104                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2105                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2106
2107                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2108                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2109                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2110                                               "force_igmp_version"),
2111                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2112                                               "promote_secondaries"),
2113                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2114                                               "route_localnet"),
2115         },
2116 };
2117
2118 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2119                                         struct ipv4_devconf *p)
2120 {
2121         int i;
2122         struct devinet_sysctl_table *t;
2123         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2124
2125         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2126         if (!t)
2127                 goto out;
2128
2129         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2130                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2131                 t->devinet_vars[i].extra1 = p;
2132                 t->devinet_vars[i].extra2 = net;
2133         }
2134
2135         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2136
2137         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2138         if (!t->sysctl_header)
2139                 goto free;
2140
2141         p->sysctl = t;
2142         return 0;
2143
2144 free:
2145         kfree(t);
2146 out:
2147         return -ENOBUFS;
2148 }
2149
2150 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2151 {
2152         struct devinet_sysctl_table *t = cnf->sysctl;
2153
2154         if (t == NULL)
2155                 return;
2156
2157         cnf->sysctl = NULL;
2158         unregister_net_sysctl_table(t->sysctl_header);
2159         kfree(t);
2160 }
2161
2162 static void devinet_sysctl_register(struct in_device *idev)
2163 {
2164         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2165         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2166                                         &idev->cnf);
2167 }
2168
2169 static void devinet_sysctl_unregister(struct in_device *idev)
2170 {
2171         __devinet_sysctl_unregister(&idev->cnf);
2172         neigh_sysctl_unregister(idev->arp_parms);
2173 }
2174
2175 static struct ctl_table ctl_forward_entry[] = {
2176         {
2177                 .procname       = "ip_forward",
2178                 .data           = &ipv4_devconf.data[
2179                                         IPV4_DEVCONF_FORWARDING - 1],
2180                 .maxlen         = sizeof(int),
2181                 .mode           = 0644,
2182                 .proc_handler   = devinet_sysctl_forward,
2183                 .extra1         = &ipv4_devconf,
2184                 .extra2         = &init_net,
2185         },
2186         { },
2187 };
2188 #endif
2189
2190 static __net_init int devinet_init_net(struct net *net)
2191 {
2192         int err;
2193         struct ipv4_devconf *all, *dflt;
2194 #ifdef CONFIG_SYSCTL
2195         struct ctl_table *tbl = ctl_forward_entry;
2196         struct ctl_table_header *forw_hdr;
2197 #endif
2198
2199         err = -ENOMEM;
2200         all = &ipv4_devconf;
2201         dflt = &ipv4_devconf_dflt;
2202
2203         if (!net_eq(net, &init_net)) {
2204                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2205                 if (all == NULL)
2206                         goto err_alloc_all;
2207
2208                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2209                 if (dflt == NULL)
2210                         goto err_alloc_dflt;
2211
2212 #ifdef CONFIG_SYSCTL
2213                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2214                 if (tbl == NULL)
2215                         goto err_alloc_ctl;
2216
2217                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2218                 tbl[0].extra1 = all;
2219                 tbl[0].extra2 = net;
2220 #endif
2221         }
2222
2223 #ifdef CONFIG_SYSCTL
2224         err = __devinet_sysctl_register(net, "all", all);
2225         if (err < 0)
2226                 goto err_reg_all;
2227
2228         err = __devinet_sysctl_register(net, "default", dflt);
2229         if (err < 0)
2230                 goto err_reg_dflt;
2231
2232         err = -ENOMEM;
2233         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2234         if (forw_hdr == NULL)
2235                 goto err_reg_ctl;
2236         net->ipv4.forw_hdr = forw_hdr;
2237 #endif
2238
2239         net->ipv4.devconf_all = all;
2240         net->ipv4.devconf_dflt = dflt;
2241         return 0;
2242
2243 #ifdef CONFIG_SYSCTL
2244 err_reg_ctl:
2245         __devinet_sysctl_unregister(dflt);
2246 err_reg_dflt:
2247         __devinet_sysctl_unregister(all);
2248 err_reg_all:
2249         if (tbl != ctl_forward_entry)
2250                 kfree(tbl);
2251 err_alloc_ctl:
2252 #endif
2253         if (dflt != &ipv4_devconf_dflt)
2254                 kfree(dflt);
2255 err_alloc_dflt:
2256         if (all != &ipv4_devconf)
2257                 kfree(all);
2258 err_alloc_all:
2259         return err;
2260 }
2261
2262 static __net_exit void devinet_exit_net(struct net *net)
2263 {
2264 #ifdef CONFIG_SYSCTL
2265         struct ctl_table *tbl;
2266
2267         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2268         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2269         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2270         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2271         kfree(tbl);
2272 #endif
2273         kfree(net->ipv4.devconf_dflt);
2274         kfree(net->ipv4.devconf_all);
2275 }
2276
2277 static __net_initdata struct pernet_operations devinet_ops = {
2278         .init = devinet_init_net,
2279         .exit = devinet_exit_net,
2280 };
2281
2282 static struct rtnl_af_ops inet_af_ops = {
2283         .family           = AF_INET,
2284         .fill_link_af     = inet_fill_link_af,
2285         .get_link_af_size = inet_get_link_af_size,
2286         .validate_link_af = inet_validate_link_af,
2287         .set_link_af      = inet_set_link_af,
2288 };
2289
2290 void __init devinet_init(void)
2291 {
2292         int i;
2293
2294         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2295                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2296
2297         register_pernet_subsys(&devinet_ops);
2298
2299         register_gifconf(PF_INET, inet_gifconf);
2300         register_netdevice_notifier(&ip_netdev_notifier);
2301
2302         schedule_delayed_work(&check_lifetime_work, 0);
2303
2304         rtnl_af_register(&inet_af_ops);
2305
2306         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2307         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2308         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2309         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2310                       inet_netconf_dump_devconf, NULL);
2311 }
2312