Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/tcp.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #include "fib_lookup.h"
70
71 static struct ipv4_devconf ipv4_devconf = {
72         .data = {
73                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
78                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91         },
92 };
93
94 #define IPV4_DEVCONF_DFLT(net, attr) \
95         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96
97 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
98         [IFA_LOCAL]             = { .type = NLA_U32 },
99         [IFA_ADDRESS]           = { .type = NLA_U32 },
100         [IFA_BROADCAST]         = { .type = NLA_U32 },
101         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
102         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
103         [IFA_FLAGS]             = { .type = NLA_U32 },
104 };
105
106 #define IN4_ADDR_HSIZE_SHIFT    8
107 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
108
109 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
110
111 static u32 inet_addr_hash(const struct net *net, __be32 addr)
112 {
113         u32 val = (__force u32) addr ^ net_hash_mix(net);
114
115         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120         u32 hash = inet_addr_hash(net, ifa->ifa_local);
121
122         ASSERT_RTNL();
123         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 }
125
126 static void inet_hash_remove(struct in_ifaddr *ifa)
127 {
128         ASSERT_RTNL();
129         hlist_del_init_rcu(&ifa->hash);
130 }
131
132 /**
133  * __ip_dev_find - find the first device with a given source address.
134  * @net: the net namespace
135  * @addr: the source address
136  * @devref: if true, take a reference on the found device
137  *
138  * If a caller uses devref=false, it should be protected by RCU, or RTNL
139  */
140 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141 {
142         u32 hash = inet_addr_hash(net, addr);
143         struct net_device *result = NULL;
144         struct in_ifaddr *ifa;
145
146         rcu_read_lock();
147         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
148                 if (ifa->ifa_local == addr) {
149                         struct net_device *dev = ifa->ifa_dev->dev;
150
151                         if (!net_eq(dev_net(dev), net))
152                                 continue;
153                         result = dev;
154                         break;
155                 }
156         }
157         if (!result) {
158                 struct flowi4 fl4 = { .daddr = addr };
159                 struct fib_result res = { 0 };
160                 struct fib_table *local;
161
162                 /* Fallback to FIB local table so that communication
163                  * over loopback subnets work.
164                  */
165                 local = fib_get_table(net, RT_TABLE_LOCAL);
166                 if (local &&
167                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
168                     res.type == RTN_LOCAL)
169                         result = FIB_RES_DEV(res);
170         }
171         if (result && devref)
172                 dev_hold(result);
173         rcu_read_unlock();
174         return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177
178 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179
180 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
181 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
182                          int destroy);
183 #ifdef CONFIG_SYSCTL
184 static int devinet_sysctl_register(struct in_device *idev);
185 static void devinet_sysctl_unregister(struct in_device *idev);
186 #else
187 static int devinet_sysctl_register(struct in_device *idev)
188 {
189         return 0;
190 }
191 static void devinet_sysctl_unregister(struct in_device *idev)
192 {
193 }
194 #endif
195
196 /* Locks all the inet devices. */
197
198 static struct in_ifaddr *inet_alloc_ifa(void)
199 {
200         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 }
202
203 static void inet_rcu_free_ifa(struct rcu_head *head)
204 {
205         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206         if (ifa->ifa_dev)
207                 in_dev_put(ifa->ifa_dev);
208         kfree(ifa);
209 }
210
211 static void inet_free_ifa(struct in_ifaddr *ifa)
212 {
213         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 }
215
216 void in_dev_finish_destroy(struct in_device *idev)
217 {
218         struct net_device *dev = idev->dev;
219
220         WARN_ON(idev->ifa_list);
221         WARN_ON(idev->mc_list);
222         kfree(rcu_dereference_protected(idev->mc_hash, 1));
223 #ifdef NET_REFCNT_DEBUG
224         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
225 #endif
226         dev_put(dev);
227         if (!idev->dead)
228                 pr_err("Freeing alive in_device %p\n", idev);
229         else
230                 kfree(idev);
231 }
232 EXPORT_SYMBOL(in_dev_finish_destroy);
233
234 static struct in_device *inetdev_init(struct net_device *dev)
235 {
236         struct in_device *in_dev;
237         int err = -ENOMEM;
238
239         ASSERT_RTNL();
240
241         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242         if (!in_dev)
243                 goto out;
244         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245                         sizeof(in_dev->cnf));
246         in_dev->cnf.sysctl = NULL;
247         in_dev->dev = dev;
248         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249         if (!in_dev->arp_parms)
250                 goto out_kfree;
251         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252                 dev_disable_lro(dev);
253         /* Reference in_dev->dev */
254         dev_hold(dev);
255         /* Account for reference dev->ip_ptr (below) */
256         in_dev_hold(in_dev);
257
258         err = devinet_sysctl_register(in_dev);
259         if (err) {
260                 in_dev->dead = 1;
261                 in_dev_put(in_dev);
262                 in_dev = NULL;
263                 goto out;
264         }
265         ip_mc_init_dev(in_dev);
266         if (dev->flags & IFF_UP)
267                 ip_mc_up(in_dev);
268
269         /* we can receive as soon as ip_ptr is set -- do this last */
270         rcu_assign_pointer(dev->ip_ptr, in_dev);
271 out:
272         return in_dev ?: ERR_PTR(err);
273 out_kfree:
274         kfree(in_dev);
275         in_dev = NULL;
276         goto out;
277 }
278
279 static void in_dev_rcu_put(struct rcu_head *head)
280 {
281         struct in_device *idev = container_of(head, struct in_device, rcu_head);
282         in_dev_put(idev);
283 }
284
285 static void inetdev_destroy(struct in_device *in_dev)
286 {
287         struct in_ifaddr *ifa;
288         struct net_device *dev;
289
290         ASSERT_RTNL();
291
292         dev = in_dev->dev;
293
294         in_dev->dead = 1;
295
296         ip_mc_destroy_dev(in_dev);
297
298         while ((ifa = in_dev->ifa_list) != NULL) {
299                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
300                 inet_free_ifa(ifa);
301         }
302
303         RCU_INIT_POINTER(dev->ip_ptr, NULL);
304
305         devinet_sysctl_unregister(in_dev);
306         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
307         arp_ifdown(dev);
308
309         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
310 }
311
312 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
313 {
314         rcu_read_lock();
315         for_primary_ifa(in_dev) {
316                 if (inet_ifa_match(a, ifa)) {
317                         if (!b || inet_ifa_match(b, ifa)) {
318                                 rcu_read_unlock();
319                                 return 1;
320                         }
321                 }
322         } endfor_ifa(in_dev);
323         rcu_read_unlock();
324         return 0;
325 }
326
327 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328                          int destroy, struct nlmsghdr *nlh, u32 portid)
329 {
330         struct in_ifaddr *promote = NULL;
331         struct in_ifaddr *ifa, *ifa1 = *ifap;
332         struct in_ifaddr *last_prim = in_dev->ifa_list;
333         struct in_ifaddr *prev_prom = NULL;
334         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
335
336         ASSERT_RTNL();
337
338         /* 1. Deleting primary ifaddr forces deletion all secondaries
339          * unless alias promotion is set
340          **/
341
342         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
343                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
344
345                 while ((ifa = *ifap1) != NULL) {
346                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
347                             ifa1->ifa_scope <= ifa->ifa_scope)
348                                 last_prim = ifa;
349
350                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
351                             ifa1->ifa_mask != ifa->ifa_mask ||
352                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
353                                 ifap1 = &ifa->ifa_next;
354                                 prev_prom = ifa;
355                                 continue;
356                         }
357
358                         if (!do_promote) {
359                                 inet_hash_remove(ifa);
360                                 *ifap1 = ifa->ifa_next;
361
362                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
363                                 blocking_notifier_call_chain(&inetaddr_chain,
364                                                 NETDEV_DOWN, ifa);
365                                 inet_free_ifa(ifa);
366                         } else {
367                                 promote = ifa;
368                                 break;
369                         }
370                 }
371         }
372
373         /* On promotion all secondaries from subnet are changing
374          * the primary IP, we must remove all their routes silently
375          * and later to add them back with new prefsrc. Do this
376          * while all addresses are on the device list.
377          */
378         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
379                 if (ifa1->ifa_mask == ifa->ifa_mask &&
380                     inet_ifa_match(ifa1->ifa_address, ifa))
381                         fib_del_ifaddr(ifa, ifa1);
382         }
383
384         /* 2. Unlink it */
385
386         *ifap = ifa1->ifa_next;
387         inet_hash_remove(ifa1);
388
389         /* 3. Announce address deletion */
390
391         /* Send message first, then call notifier.
392            At first sight, FIB update triggered by notifier
393            will refer to already deleted ifaddr, that could confuse
394            netlink listeners. It is not true: look, gated sees
395            that route deleted and if it still thinks that ifaddr
396            is valid, it will try to restore deleted routes... Grr.
397            So that, this order is correct.
398          */
399         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
400         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
401
402         if (promote) {
403                 struct in_ifaddr *next_sec = promote->ifa_next;
404
405                 if (prev_prom) {
406                         prev_prom->ifa_next = promote->ifa_next;
407                         promote->ifa_next = last_prim->ifa_next;
408                         last_prim->ifa_next = promote;
409                 }
410
411                 promote->ifa_flags &= ~IFA_F_SECONDARY;
412                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
413                 blocking_notifier_call_chain(&inetaddr_chain,
414                                 NETDEV_UP, promote);
415                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
416                         if (ifa1->ifa_mask != ifa->ifa_mask ||
417                             !inet_ifa_match(ifa1->ifa_address, ifa))
418                                         continue;
419                         fib_add_ifaddr(ifa);
420                 }
421
422         }
423         if (destroy)
424                 inet_free_ifa(ifa1);
425 }
426
427 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
428                          int destroy)
429 {
430         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
431 }
432
433 static void check_lifetime(struct work_struct *work);
434
435 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
436
437 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
438                              u32 portid)
439 {
440         struct in_device *in_dev = ifa->ifa_dev;
441         struct in_ifaddr *ifa1, **ifap, **last_primary;
442
443         ASSERT_RTNL();
444
445         if (!ifa->ifa_local) {
446                 inet_free_ifa(ifa);
447                 return 0;
448         }
449
450         ifa->ifa_flags &= ~IFA_F_SECONDARY;
451         last_primary = &in_dev->ifa_list;
452
453         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
454              ifap = &ifa1->ifa_next) {
455                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
456                     ifa->ifa_scope <= ifa1->ifa_scope)
457                         last_primary = &ifa1->ifa_next;
458                 if (ifa1->ifa_mask == ifa->ifa_mask &&
459                     inet_ifa_match(ifa1->ifa_address, ifa)) {
460                         if (ifa1->ifa_local == ifa->ifa_local) {
461                                 inet_free_ifa(ifa);
462                                 return -EEXIST;
463                         }
464                         if (ifa1->ifa_scope != ifa->ifa_scope) {
465                                 inet_free_ifa(ifa);
466                                 return -EINVAL;
467                         }
468                         ifa->ifa_flags |= IFA_F_SECONDARY;
469                 }
470         }
471
472         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
473                 prandom_seed((__force u32) ifa->ifa_local);
474                 ifap = last_primary;
475         }
476
477         ifa->ifa_next = *ifap;
478         *ifap = ifa;
479
480         inet_hash_insert(dev_net(in_dev->dev), ifa);
481
482         cancel_delayed_work(&check_lifetime_work);
483         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
484
485         /* Send message first, then call notifier.
486            Notifier will trigger FIB update, so that
487            listeners of netlink will know about new ifaddr */
488         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
489         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
490
491         return 0;
492 }
493
494 static int inet_insert_ifa(struct in_ifaddr *ifa)
495 {
496         return __inet_insert_ifa(ifa, NULL, 0);
497 }
498
499 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
500 {
501         struct in_device *in_dev = __in_dev_get_rtnl(dev);
502
503         ASSERT_RTNL();
504
505         if (!in_dev) {
506                 inet_free_ifa(ifa);
507                 return -ENOBUFS;
508         }
509         ipv4_devconf_setall(in_dev);
510         neigh_parms_data_state_setall(in_dev->arp_parms);
511         if (ifa->ifa_dev != in_dev) {
512                 WARN_ON(ifa->ifa_dev);
513                 in_dev_hold(in_dev);
514                 ifa->ifa_dev = in_dev;
515         }
516         if (ipv4_is_loopback(ifa->ifa_local))
517                 ifa->ifa_scope = RT_SCOPE_HOST;
518         return inet_insert_ifa(ifa);
519 }
520
521 /* Caller must hold RCU or RTNL :
522  * We dont take a reference on found in_device
523  */
524 struct in_device *inetdev_by_index(struct net *net, int ifindex)
525 {
526         struct net_device *dev;
527         struct in_device *in_dev = NULL;
528
529         rcu_read_lock();
530         dev = dev_get_by_index_rcu(net, ifindex);
531         if (dev)
532                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
533         rcu_read_unlock();
534         return in_dev;
535 }
536 EXPORT_SYMBOL(inetdev_by_index);
537
538 /* Called only from RTNL semaphored context. No locks. */
539
540 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
541                                     __be32 mask)
542 {
543         ASSERT_RTNL();
544
545         for_primary_ifa(in_dev) {
546                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
547                         return ifa;
548         } endfor_ifa(in_dev);
549         return NULL;
550 }
551
552 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
553 {
554         struct ip_mreqn mreq = {
555                 .imr_multiaddr.s_addr = ifa->ifa_address,
556                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
557         };
558         int ret;
559
560         ASSERT_RTNL();
561
562         lock_sock(sk);
563         if (join)
564                 ret = ip_mc_join_group(sk, &mreq);
565         else
566                 ret = ip_mc_leave_group(sk, &mreq);
567         release_sock(sk);
568
569         return ret;
570 }
571
572 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
573 {
574         struct net *net = sock_net(skb->sk);
575         struct nlattr *tb[IFA_MAX+1];
576         struct in_device *in_dev;
577         struct ifaddrmsg *ifm;
578         struct in_ifaddr *ifa, **ifap;
579         int err = -EINVAL;
580
581         ASSERT_RTNL();
582
583         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
584         if (err < 0)
585                 goto errout;
586
587         ifm = nlmsg_data(nlh);
588         in_dev = inetdev_by_index(net, ifm->ifa_index);
589         if (!in_dev) {
590                 err = -ENODEV;
591                 goto errout;
592         }
593
594         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
595              ifap = &ifa->ifa_next) {
596                 if (tb[IFA_LOCAL] &&
597                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
598                         continue;
599
600                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
601                         continue;
602
603                 if (tb[IFA_ADDRESS] &&
604                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
605                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
606                         continue;
607
608                 if (ipv4_is_multicast(ifa->ifa_address))
609                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
610                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
611                 return 0;
612         }
613
614         err = -EADDRNOTAVAIL;
615 errout:
616         return err;
617 }
618
619 #define INFINITY_LIFE_TIME      0xFFFFFFFF
620
621 static void check_lifetime(struct work_struct *work)
622 {
623         unsigned long now, next, next_sec, next_sched;
624         struct in_ifaddr *ifa;
625         struct hlist_node *n;
626         int i;
627
628         now = jiffies;
629         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
630
631         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
632                 bool change_needed = false;
633
634                 rcu_read_lock();
635                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
636                         unsigned long age;
637
638                         if (ifa->ifa_flags & IFA_F_PERMANENT)
639                                 continue;
640
641                         /* We try to batch several events at once. */
642                         age = (now - ifa->ifa_tstamp +
643                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644
645                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646                             age >= ifa->ifa_valid_lft) {
647                                 change_needed = true;
648                         } else if (ifa->ifa_preferred_lft ==
649                                    INFINITY_LIFE_TIME) {
650                                 continue;
651                         } else if (age >= ifa->ifa_preferred_lft) {
652                                 if (time_before(ifa->ifa_tstamp +
653                                                 ifa->ifa_valid_lft * HZ, next))
654                                         next = ifa->ifa_tstamp +
655                                                ifa->ifa_valid_lft * HZ;
656
657                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
658                                         change_needed = true;
659                         } else if (time_before(ifa->ifa_tstamp +
660                                                ifa->ifa_preferred_lft * HZ,
661                                                next)) {
662                                 next = ifa->ifa_tstamp +
663                                        ifa->ifa_preferred_lft * HZ;
664                         }
665                 }
666                 rcu_read_unlock();
667                 if (!change_needed)
668                         continue;
669                 rtnl_lock();
670                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
671                         unsigned long age;
672
673                         if (ifa->ifa_flags & IFA_F_PERMANENT)
674                                 continue;
675
676                         /* We try to batch several events at once. */
677                         age = (now - ifa->ifa_tstamp +
678                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
679
680                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
681                             age >= ifa->ifa_valid_lft) {
682                                 struct in_ifaddr **ifap;
683
684                                 for (ifap = &ifa->ifa_dev->ifa_list;
685                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
686                                         if (*ifap == ifa) {
687                                                 inet_del_ifa(ifa->ifa_dev,
688                                                              ifap, 1);
689                                                 break;
690                                         }
691                                 }
692                         } else if (ifa->ifa_preferred_lft !=
693                                    INFINITY_LIFE_TIME &&
694                                    age >= ifa->ifa_preferred_lft &&
695                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
696                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
697                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
698                         }
699                 }
700                 rtnl_unlock();
701         }
702
703         next_sec = round_jiffies_up(next);
704         next_sched = next;
705
706         /* If rounded timeout is accurate enough, accept it. */
707         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
708                 next_sched = next_sec;
709
710         now = jiffies;
711         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
712         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
713                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
714
715         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
716                         next_sched - now);
717 }
718
719 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
720                              __u32 prefered_lft)
721 {
722         unsigned long timeout;
723
724         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
725
726         timeout = addrconf_timeout_fixup(valid_lft, HZ);
727         if (addrconf_finite_timeout(timeout))
728                 ifa->ifa_valid_lft = timeout;
729         else
730                 ifa->ifa_flags |= IFA_F_PERMANENT;
731
732         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
733         if (addrconf_finite_timeout(timeout)) {
734                 if (timeout == 0)
735                         ifa->ifa_flags |= IFA_F_DEPRECATED;
736                 ifa->ifa_preferred_lft = timeout;
737         }
738         ifa->ifa_tstamp = jiffies;
739         if (!ifa->ifa_cstamp)
740                 ifa->ifa_cstamp = ifa->ifa_tstamp;
741 }
742
743 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
744                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
745 {
746         struct nlattr *tb[IFA_MAX+1];
747         struct in_ifaddr *ifa;
748         struct ifaddrmsg *ifm;
749         struct net_device *dev;
750         struct in_device *in_dev;
751         int err;
752
753         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
754         if (err < 0)
755                 goto errout;
756
757         ifm = nlmsg_data(nlh);
758         err = -EINVAL;
759         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
760                 goto errout;
761
762         dev = __dev_get_by_index(net, ifm->ifa_index);
763         err = -ENODEV;
764         if (!dev)
765                 goto errout;
766
767         in_dev = __in_dev_get_rtnl(dev);
768         err = -ENOBUFS;
769         if (!in_dev)
770                 goto errout;
771
772         ifa = inet_alloc_ifa();
773         if (!ifa)
774                 /*
775                  * A potential indev allocation can be left alive, it stays
776                  * assigned to its device and is destroy with it.
777                  */
778                 goto errout;
779
780         ipv4_devconf_setall(in_dev);
781         neigh_parms_data_state_setall(in_dev->arp_parms);
782         in_dev_hold(in_dev);
783
784         if (!tb[IFA_ADDRESS])
785                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
786
787         INIT_HLIST_NODE(&ifa->hash);
788         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
789         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
790         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
791                                          ifm->ifa_flags;
792         ifa->ifa_scope = ifm->ifa_scope;
793         ifa->ifa_dev = in_dev;
794
795         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
796         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
797
798         if (tb[IFA_BROADCAST])
799                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
800
801         if (tb[IFA_LABEL])
802                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
803         else
804                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
805
806         if (tb[IFA_CACHEINFO]) {
807                 struct ifa_cacheinfo *ci;
808
809                 ci = nla_data(tb[IFA_CACHEINFO]);
810                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
811                         err = -EINVAL;
812                         goto errout_free;
813                 }
814                 *pvalid_lft = ci->ifa_valid;
815                 *pprefered_lft = ci->ifa_prefered;
816         }
817
818         return ifa;
819
820 errout_free:
821         inet_free_ifa(ifa);
822 errout:
823         return ERR_PTR(err);
824 }
825
826 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
827 {
828         struct in_device *in_dev = ifa->ifa_dev;
829         struct in_ifaddr *ifa1, **ifap;
830
831         if (!ifa->ifa_local)
832                 return NULL;
833
834         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
835              ifap = &ifa1->ifa_next) {
836                 if (ifa1->ifa_mask == ifa->ifa_mask &&
837                     inet_ifa_match(ifa1->ifa_address, ifa) &&
838                     ifa1->ifa_local == ifa->ifa_local)
839                         return ifa1;
840         }
841         return NULL;
842 }
843
844 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
845 {
846         struct net *net = sock_net(skb->sk);
847         struct in_ifaddr *ifa;
848         struct in_ifaddr *ifa_existing;
849         __u32 valid_lft = INFINITY_LIFE_TIME;
850         __u32 prefered_lft = INFINITY_LIFE_TIME;
851
852         ASSERT_RTNL();
853
854         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
855         if (IS_ERR(ifa))
856                 return PTR_ERR(ifa);
857
858         ifa_existing = find_matching_ifa(ifa);
859         if (!ifa_existing) {
860                 /* It would be best to check for !NLM_F_CREATE here but
861                  * userspace already relies on not having to provide this.
862                  */
863                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
864                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
865                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
866                                                true, ifa);
867
868                         if (ret < 0) {
869                                 inet_free_ifa(ifa);
870                                 return ret;
871                         }
872                 }
873                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
874         } else {
875                 inet_free_ifa(ifa);
876
877                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
878                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
879                         return -EEXIST;
880                 ifa = ifa_existing;
881                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
882                 cancel_delayed_work(&check_lifetime_work);
883                 queue_delayed_work(system_power_efficient_wq,
884                                 &check_lifetime_work, 0);
885                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
886         }
887         return 0;
888 }
889
890 /*
891  *      Determine a default network mask, based on the IP address.
892  */
893
894 static int inet_abc_len(__be32 addr)
895 {
896         int rc = -1;    /* Something else, probably a multicast. */
897
898         if (ipv4_is_zeronet(addr))
899                 rc = 0;
900         else {
901                 __u32 haddr = ntohl(addr);
902
903                 if (IN_CLASSA(haddr))
904                         rc = 8;
905                 else if (IN_CLASSB(haddr))
906                         rc = 16;
907                 else if (IN_CLASSC(haddr))
908                         rc = 24;
909         }
910
911         return rc;
912 }
913
914
915 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
916 {
917         struct ifreq ifr;
918         struct sockaddr_in sin_orig;
919         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
920         struct in_device *in_dev;
921         struct in_ifaddr **ifap = NULL;
922         struct in_ifaddr *ifa = NULL;
923         struct net_device *dev;
924         char *colon;
925         int ret = -EFAULT;
926         int tryaddrmatch = 0;
927
928         /*
929          *      Fetch the caller's info block into kernel space
930          */
931
932         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
933                 goto out;
934         ifr.ifr_name[IFNAMSIZ - 1] = 0;
935
936         /* save original address for comparison */
937         memcpy(&sin_orig, sin, sizeof(*sin));
938
939         colon = strchr(ifr.ifr_name, ':');
940         if (colon)
941                 *colon = 0;
942
943         dev_load(net, ifr.ifr_name);
944
945         switch (cmd) {
946         case SIOCGIFADDR:       /* Get interface address */
947         case SIOCGIFBRDADDR:    /* Get the broadcast address */
948         case SIOCGIFDSTADDR:    /* Get the destination address */
949         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
950                 /* Note that these ioctls will not sleep,
951                    so that we do not impose a lock.
952                    One day we will be forced to put shlock here (I mean SMP)
953                  */
954                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
955                 memset(sin, 0, sizeof(*sin));
956                 sin->sin_family = AF_INET;
957                 break;
958
959         case SIOCSIFFLAGS:
960                 ret = -EPERM;
961                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
962                         goto out;
963                 break;
964         case SIOCSIFADDR:       /* Set interface address (and family) */
965         case SIOCSIFBRDADDR:    /* Set the broadcast address */
966         case SIOCSIFDSTADDR:    /* Set the destination address */
967         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
968         case SIOCKILLADDR:      /* Nuke all sockets on this address */
969                 ret = -EPERM;
970                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
971                         goto out;
972                 ret = -EINVAL;
973                 if (sin->sin_family != AF_INET)
974                         goto out;
975                 break;
976         default:
977                 ret = -EINVAL;
978                 goto out;
979         }
980
981         rtnl_lock();
982
983         ret = -ENODEV;
984         dev = __dev_get_by_name(net, ifr.ifr_name);
985         if (!dev)
986                 goto done;
987
988         if (colon)
989                 *colon = ':';
990
991         in_dev = __in_dev_get_rtnl(dev);
992         if (in_dev) {
993                 if (tryaddrmatch) {
994                         /* Matthias Andree */
995                         /* compare label and address (4.4BSD style) */
996                         /* note: we only do this for a limited set of ioctls
997                            and only if the original address family was AF_INET.
998                            This is checked above. */
999                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1000                              ifap = &ifa->ifa_next) {
1001                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1002                                     sin_orig.sin_addr.s_addr ==
1003                                                         ifa->ifa_local) {
1004                                         break; /* found */
1005                                 }
1006                         }
1007                 }
1008                 /* we didn't get a match, maybe the application is
1009                    4.3BSD-style and passed in junk so we fall back to
1010                    comparing just the label */
1011                 if (!ifa) {
1012                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1013                              ifap = &ifa->ifa_next)
1014                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1015                                         break;
1016                 }
1017         }
1018
1019         ret = -EADDRNOTAVAIL;
1020         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
1021             && cmd != SIOCKILLADDR)
1022                 goto done;
1023
1024         switch (cmd) {
1025         case SIOCGIFADDR:       /* Get interface address */
1026                 sin->sin_addr.s_addr = ifa->ifa_local;
1027                 goto rarok;
1028
1029         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1030                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1031                 goto rarok;
1032
1033         case SIOCGIFDSTADDR:    /* Get the destination address */
1034                 sin->sin_addr.s_addr = ifa->ifa_address;
1035                 goto rarok;
1036
1037         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1038                 sin->sin_addr.s_addr = ifa->ifa_mask;
1039                 goto rarok;
1040
1041         case SIOCSIFFLAGS:
1042                 if (colon) {
1043                         ret = -EADDRNOTAVAIL;
1044                         if (!ifa)
1045                                 break;
1046                         ret = 0;
1047                         if (!(ifr.ifr_flags & IFF_UP))
1048                                 inet_del_ifa(in_dev, ifap, 1);
1049                         break;
1050                 }
1051                 ret = dev_change_flags(dev, ifr.ifr_flags);
1052                 break;
1053
1054         case SIOCSIFADDR:       /* Set interface address (and family) */
1055                 ret = -EINVAL;
1056                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1057                         break;
1058
1059                 if (!ifa) {
1060                         ret = -ENOBUFS;
1061                         ifa = inet_alloc_ifa();
1062                         if (!ifa)
1063                                 break;
1064                         INIT_HLIST_NODE(&ifa->hash);
1065                         if (colon)
1066                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1067                         else
1068                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069                 } else {
1070                         ret = 0;
1071                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1072                                 break;
1073                         inet_del_ifa(in_dev, ifap, 0);
1074                         ifa->ifa_broadcast = 0;
1075                         ifa->ifa_scope = 0;
1076                 }
1077
1078                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1079
1080                 if (!(dev->flags & IFF_POINTOPOINT)) {
1081                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1082                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1083                         if ((dev->flags & IFF_BROADCAST) &&
1084                             ifa->ifa_prefixlen < 31)
1085                                 ifa->ifa_broadcast = ifa->ifa_address |
1086                                                      ~ifa->ifa_mask;
1087                 } else {
1088                         ifa->ifa_prefixlen = 32;
1089                         ifa->ifa_mask = inet_make_mask(32);
1090                 }
1091                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1092                 ret = inet_set_ifa(dev, ifa);
1093                 break;
1094
1095         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1096                 ret = 0;
1097                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1098                         inet_del_ifa(in_dev, ifap, 0);
1099                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1100                         inet_insert_ifa(ifa);
1101                 }
1102                 break;
1103
1104         case SIOCSIFDSTADDR:    /* Set the destination address */
1105                 ret = 0;
1106                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1107                         break;
1108                 ret = -EINVAL;
1109                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110                         break;
1111                 ret = 0;
1112                 inet_del_ifa(in_dev, ifap, 0);
1113                 ifa->ifa_address = sin->sin_addr.s_addr;
1114                 inet_insert_ifa(ifa);
1115                 break;
1116
1117         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1118
1119                 /*
1120                  *      The mask we set must be legal.
1121                  */
1122                 ret = -EINVAL;
1123                 if (bad_mask(sin->sin_addr.s_addr, 0))
1124                         break;
1125                 ret = 0;
1126                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1127                         __be32 old_mask = ifa->ifa_mask;
1128                         inet_del_ifa(in_dev, ifap, 0);
1129                         ifa->ifa_mask = sin->sin_addr.s_addr;
1130                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1131
1132                         /* See if current broadcast address matches
1133                          * with current netmask, then recalculate
1134                          * the broadcast address. Otherwise it's a
1135                          * funny address, so don't touch it since
1136                          * the user seems to know what (s)he's doing...
1137                          */
1138                         if ((dev->flags & IFF_BROADCAST) &&
1139                             (ifa->ifa_prefixlen < 31) &&
1140                             (ifa->ifa_broadcast ==
1141                              (ifa->ifa_local|~old_mask))) {
1142                                 ifa->ifa_broadcast = (ifa->ifa_local |
1143                                                       ~sin->sin_addr.s_addr);
1144                         }
1145                         inet_insert_ifa(ifa);
1146                 }
1147                 break;
1148         case SIOCKILLADDR:      /* Nuke all connections on this address */
1149                 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
1150                 break;
1151         }
1152 done:
1153         rtnl_unlock();
1154 out:
1155         return ret;
1156 rarok:
1157         rtnl_unlock();
1158         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1159         goto out;
1160 }
1161
1162 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1163 {
1164         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1165         struct in_ifaddr *ifa;
1166         struct ifreq ifr;
1167         int done = 0;
1168
1169         if (!in_dev)
1170                 goto out;
1171
1172         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1173                 if (!buf) {
1174                         done += sizeof(ifr);
1175                         continue;
1176                 }
1177                 if (len < (int) sizeof(ifr))
1178                         break;
1179                 memset(&ifr, 0, sizeof(struct ifreq));
1180                 strcpy(ifr.ifr_name, ifa->ifa_label);
1181
1182                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1183                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1184                                                                 ifa->ifa_local;
1185
1186                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1187                         done = -EFAULT;
1188                         break;
1189                 }
1190                 buf  += sizeof(struct ifreq);
1191                 len  -= sizeof(struct ifreq);
1192                 done += sizeof(struct ifreq);
1193         }
1194 out:
1195         return done;
1196 }
1197
1198 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1199 {
1200         __be32 addr = 0;
1201         struct in_device *in_dev;
1202         struct net *net = dev_net(dev);
1203
1204         rcu_read_lock();
1205         in_dev = __in_dev_get_rcu(dev);
1206         if (!in_dev)
1207                 goto no_in_dev;
1208
1209         for_primary_ifa(in_dev) {
1210                 if (ifa->ifa_scope > scope)
1211                         continue;
1212                 if (!dst || inet_ifa_match(dst, ifa)) {
1213                         addr = ifa->ifa_local;
1214                         break;
1215                 }
1216                 if (!addr)
1217                         addr = ifa->ifa_local;
1218         } endfor_ifa(in_dev);
1219
1220         if (addr)
1221                 goto out_unlock;
1222 no_in_dev:
1223
1224         /* Not loopback addresses on loopback should be preferred
1225            in this case. It is important that lo is the first interface
1226            in dev_base list.
1227          */
1228         for_each_netdev_rcu(net, dev) {
1229                 in_dev = __in_dev_get_rcu(dev);
1230                 if (!in_dev)
1231                         continue;
1232
1233                 for_primary_ifa(in_dev) {
1234                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1235                             ifa->ifa_scope <= scope) {
1236                                 addr = ifa->ifa_local;
1237                                 goto out_unlock;
1238                         }
1239                 } endfor_ifa(in_dev);
1240         }
1241 out_unlock:
1242         rcu_read_unlock();
1243         return addr;
1244 }
1245 EXPORT_SYMBOL(inet_select_addr);
1246
1247 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1248                               __be32 local, int scope)
1249 {
1250         int same = 0;
1251         __be32 addr = 0;
1252
1253         for_ifa(in_dev) {
1254                 if (!addr &&
1255                     (local == ifa->ifa_local || !local) &&
1256                     ifa->ifa_scope <= scope) {
1257                         addr = ifa->ifa_local;
1258                         if (same)
1259                                 break;
1260                 }
1261                 if (!same) {
1262                         same = (!local || inet_ifa_match(local, ifa)) &&
1263                                 (!dst || inet_ifa_match(dst, ifa));
1264                         if (same && addr) {
1265                                 if (local || !dst)
1266                                         break;
1267                                 /* Is the selected addr into dst subnet? */
1268                                 if (inet_ifa_match(addr, ifa))
1269                                         break;
1270                                 /* No, then can we use new local src? */
1271                                 if (ifa->ifa_scope <= scope) {
1272                                         addr = ifa->ifa_local;
1273                                         break;
1274                                 }
1275                                 /* search for large dst subnet for addr */
1276                                 same = 0;
1277                         }
1278                 }
1279         } endfor_ifa(in_dev);
1280
1281         return same ? addr : 0;
1282 }
1283
1284 /*
1285  * Confirm that local IP address exists using wildcards:
1286  * - net: netns to check, cannot be NULL
1287  * - in_dev: only on this interface, NULL=any interface
1288  * - dst: only in the same subnet as dst, 0=any dst
1289  * - local: address, 0=autoselect the local address
1290  * - scope: maximum allowed scope value for the local address
1291  */
1292 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1293                          __be32 dst, __be32 local, int scope)
1294 {
1295         __be32 addr = 0;
1296         struct net_device *dev;
1297
1298         if (in_dev)
1299                 return confirm_addr_indev(in_dev, dst, local, scope);
1300
1301         rcu_read_lock();
1302         for_each_netdev_rcu(net, dev) {
1303                 in_dev = __in_dev_get_rcu(dev);
1304                 if (in_dev) {
1305                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1306                         if (addr)
1307                                 break;
1308                 }
1309         }
1310         rcu_read_unlock();
1311
1312         return addr;
1313 }
1314 EXPORT_SYMBOL(inet_confirm_addr);
1315
1316 /*
1317  *      Device notifier
1318  */
1319
1320 int register_inetaddr_notifier(struct notifier_block *nb)
1321 {
1322         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1323 }
1324 EXPORT_SYMBOL(register_inetaddr_notifier);
1325
1326 int unregister_inetaddr_notifier(struct notifier_block *nb)
1327 {
1328         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1329 }
1330 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1331
1332 /* Rename ifa_labels for a device name change. Make some effort to preserve
1333  * existing alias numbering and to create unique labels if possible.
1334 */
1335 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1336 {
1337         struct in_ifaddr *ifa;
1338         int named = 0;
1339
1340         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1341                 char old[IFNAMSIZ], *dot;
1342
1343                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1344                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1345                 if (named++ == 0)
1346                         goto skip;
1347                 dot = strchr(old, ':');
1348                 if (!dot) {
1349                         sprintf(old, ":%d", named);
1350                         dot = old;
1351                 }
1352                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1353                         strcat(ifa->ifa_label, dot);
1354                 else
1355                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1356 skip:
1357                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1358         }
1359 }
1360
1361 static bool inetdev_valid_mtu(unsigned int mtu)
1362 {
1363         return mtu >= 68;
1364 }
1365
1366 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1367                                         struct in_device *in_dev)
1368
1369 {
1370         struct in_ifaddr *ifa;
1371
1372         for (ifa = in_dev->ifa_list; ifa;
1373              ifa = ifa->ifa_next) {
1374                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1375                          ifa->ifa_local, dev,
1376                          ifa->ifa_local, NULL,
1377                          dev->dev_addr, NULL);
1378         }
1379 }
1380
1381 /* Called only under RTNL semaphore */
1382
1383 static int inetdev_event(struct notifier_block *this, unsigned long event,
1384                          void *ptr)
1385 {
1386         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1387         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1388
1389         ASSERT_RTNL();
1390
1391         if (!in_dev) {
1392                 if (event == NETDEV_REGISTER) {
1393                         in_dev = inetdev_init(dev);
1394                         if (IS_ERR(in_dev))
1395                                 return notifier_from_errno(PTR_ERR(in_dev));
1396                         if (dev->flags & IFF_LOOPBACK) {
1397                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1398                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1399                         }
1400                 } else if (event == NETDEV_CHANGEMTU) {
1401                         /* Re-enabling IP */
1402                         if (inetdev_valid_mtu(dev->mtu))
1403                                 in_dev = inetdev_init(dev);
1404                 }
1405                 goto out;
1406         }
1407
1408         switch (event) {
1409         case NETDEV_REGISTER:
1410                 pr_debug("%s: bug\n", __func__);
1411                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1412                 break;
1413         case NETDEV_UP:
1414                 if (!inetdev_valid_mtu(dev->mtu))
1415                         break;
1416                 if (dev->flags & IFF_LOOPBACK) {
1417                         struct in_ifaddr *ifa = inet_alloc_ifa();
1418
1419                         if (ifa) {
1420                                 INIT_HLIST_NODE(&ifa->hash);
1421                                 ifa->ifa_local =
1422                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1423                                 ifa->ifa_prefixlen = 8;
1424                                 ifa->ifa_mask = inet_make_mask(8);
1425                                 in_dev_hold(in_dev);
1426                                 ifa->ifa_dev = in_dev;
1427                                 ifa->ifa_scope = RT_SCOPE_HOST;
1428                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1429                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1430                                                  INFINITY_LIFE_TIME);
1431                                 ipv4_devconf_setall(in_dev);
1432                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1433                                 inet_insert_ifa(ifa);
1434                         }
1435                 }
1436                 ip_mc_up(in_dev);
1437                 /* fall through */
1438         case NETDEV_CHANGEADDR:
1439                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1440                         break;
1441                 /* fall through */
1442         case NETDEV_NOTIFY_PEERS:
1443                 /* Send gratuitous ARP to notify of link change */
1444                 inetdev_send_gratuitous_arp(dev, in_dev);
1445                 break;
1446         case NETDEV_DOWN:
1447                 ip_mc_down(in_dev);
1448                 break;
1449         case NETDEV_PRE_TYPE_CHANGE:
1450                 ip_mc_unmap(in_dev);
1451                 break;
1452         case NETDEV_POST_TYPE_CHANGE:
1453                 ip_mc_remap(in_dev);
1454                 break;
1455         case NETDEV_CHANGEMTU:
1456                 if (inetdev_valid_mtu(dev->mtu))
1457                         break;
1458                 /* disable IP when MTU is not enough */
1459         case NETDEV_UNREGISTER:
1460                 inetdev_destroy(in_dev);
1461                 break;
1462         case NETDEV_CHANGENAME:
1463                 /* Do not notify about label change, this event is
1464                  * not interesting to applications using netlink.
1465                  */
1466                 inetdev_changename(dev, in_dev);
1467
1468                 devinet_sysctl_unregister(in_dev);
1469                 devinet_sysctl_register(in_dev);
1470                 break;
1471         }
1472 out:
1473         return NOTIFY_DONE;
1474 }
1475
1476 static struct notifier_block ip_netdev_notifier = {
1477         .notifier_call = inetdev_event,
1478 };
1479
1480 static size_t inet_nlmsg_size(void)
1481 {
1482         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1483                + nla_total_size(4) /* IFA_ADDRESS */
1484                + nla_total_size(4) /* IFA_LOCAL */
1485                + nla_total_size(4) /* IFA_BROADCAST */
1486                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1487                + nla_total_size(4)  /* IFA_FLAGS */
1488                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1489 }
1490
1491 static inline u32 cstamp_delta(unsigned long cstamp)
1492 {
1493         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1494 }
1495
1496 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1497                          unsigned long tstamp, u32 preferred, u32 valid)
1498 {
1499         struct ifa_cacheinfo ci;
1500
1501         ci.cstamp = cstamp_delta(cstamp);
1502         ci.tstamp = cstamp_delta(tstamp);
1503         ci.ifa_prefered = preferred;
1504         ci.ifa_valid = valid;
1505
1506         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1507 }
1508
1509 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1510                             u32 portid, u32 seq, int event, unsigned int flags)
1511 {
1512         struct ifaddrmsg *ifm;
1513         struct nlmsghdr  *nlh;
1514         u32 preferred, valid;
1515
1516         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1517         if (!nlh)
1518                 return -EMSGSIZE;
1519
1520         ifm = nlmsg_data(nlh);
1521         ifm->ifa_family = AF_INET;
1522         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1523         ifm->ifa_flags = ifa->ifa_flags;
1524         ifm->ifa_scope = ifa->ifa_scope;
1525         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1526
1527         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1528                 preferred = ifa->ifa_preferred_lft;
1529                 valid = ifa->ifa_valid_lft;
1530                 if (preferred != INFINITY_LIFE_TIME) {
1531                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1532
1533                         if (preferred > tval)
1534                                 preferred -= tval;
1535                         else
1536                                 preferred = 0;
1537                         if (valid != INFINITY_LIFE_TIME) {
1538                                 if (valid > tval)
1539                                         valid -= tval;
1540                                 else
1541                                         valid = 0;
1542                         }
1543                 }
1544         } else {
1545                 preferred = INFINITY_LIFE_TIME;
1546                 valid = INFINITY_LIFE_TIME;
1547         }
1548         if ((ifa->ifa_address &&
1549              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1550             (ifa->ifa_local &&
1551              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1552             (ifa->ifa_broadcast &&
1553              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1554             (ifa->ifa_label[0] &&
1555              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1556             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1557             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1558                           preferred, valid))
1559                 goto nla_put_failure;
1560
1561         nlmsg_end(skb, nlh);
1562         return 0;
1563
1564 nla_put_failure:
1565         nlmsg_cancel(skb, nlh);
1566         return -EMSGSIZE;
1567 }
1568
1569 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1570 {
1571         struct net *net = sock_net(skb->sk);
1572         int h, s_h;
1573         int idx, s_idx;
1574         int ip_idx, s_ip_idx;
1575         struct net_device *dev;
1576         struct in_device *in_dev;
1577         struct in_ifaddr *ifa;
1578         struct hlist_head *head;
1579
1580         s_h = cb->args[0];
1581         s_idx = idx = cb->args[1];
1582         s_ip_idx = ip_idx = cb->args[2];
1583
1584         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1585                 idx = 0;
1586                 head = &net->dev_index_head[h];
1587                 rcu_read_lock();
1588                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1589                           net->dev_base_seq;
1590                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1591                         if (idx < s_idx)
1592                                 goto cont;
1593                         if (h > s_h || idx > s_idx)
1594                                 s_ip_idx = 0;
1595                         in_dev = __in_dev_get_rcu(dev);
1596                         if (!in_dev)
1597                                 goto cont;
1598
1599                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1600                              ifa = ifa->ifa_next, ip_idx++) {
1601                                 if (ip_idx < s_ip_idx)
1602                                         continue;
1603                                 if (inet_fill_ifaddr(skb, ifa,
1604                                              NETLINK_CB(cb->skb).portid,
1605                                              cb->nlh->nlmsg_seq,
1606                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1607                                         rcu_read_unlock();
1608                                         goto done;
1609                                 }
1610                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1611                         }
1612 cont:
1613                         idx++;
1614                 }
1615                 rcu_read_unlock();
1616         }
1617
1618 done:
1619         cb->args[0] = h;
1620         cb->args[1] = idx;
1621         cb->args[2] = ip_idx;
1622
1623         return skb->len;
1624 }
1625
1626 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1627                       u32 portid)
1628 {
1629         struct sk_buff *skb;
1630         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1631         int err = -ENOBUFS;
1632         struct net *net;
1633
1634         net = dev_net(ifa->ifa_dev->dev);
1635         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1636         if (!skb)
1637                 goto errout;
1638
1639         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1640         if (err < 0) {
1641                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1642                 WARN_ON(err == -EMSGSIZE);
1643                 kfree_skb(skb);
1644                 goto errout;
1645         }
1646         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1647         return;
1648 errout:
1649         if (err < 0)
1650                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1651 }
1652
1653 static size_t inet_get_link_af_size(const struct net_device *dev,
1654                                     u32 ext_filter_mask)
1655 {
1656         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1657
1658         if (!in_dev)
1659                 return 0;
1660
1661         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1662 }
1663
1664 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1665                              u32 ext_filter_mask)
1666 {
1667         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1668         struct nlattr *nla;
1669         int i;
1670
1671         if (!in_dev)
1672                 return -ENODATA;
1673
1674         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1675         if (!nla)
1676                 return -EMSGSIZE;
1677
1678         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1679                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1680
1681         return 0;
1682 }
1683
1684 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1685         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1686 };
1687
1688 static int inet_validate_link_af(const struct net_device *dev,
1689                                  const struct nlattr *nla)
1690 {
1691         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1692         int err, rem;
1693
1694         if (dev && !__in_dev_get_rtnl(dev))
1695                 return -EAFNOSUPPORT;
1696
1697         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1698         if (err < 0)
1699                 return err;
1700
1701         if (tb[IFLA_INET_CONF]) {
1702                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1703                         int cfgid = nla_type(a);
1704
1705                         if (nla_len(a) < 4)
1706                                 return -EINVAL;
1707
1708                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1709                                 return -EINVAL;
1710                 }
1711         }
1712
1713         return 0;
1714 }
1715
1716 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1717 {
1718         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1719         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1720         int rem;
1721
1722         if (!in_dev)
1723                 return -EAFNOSUPPORT;
1724
1725         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1726                 BUG();
1727
1728         if (tb[IFLA_INET_CONF]) {
1729                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1730                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1731         }
1732
1733         return 0;
1734 }
1735
1736 static int inet_netconf_msgsize_devconf(int type)
1737 {
1738         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1739                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1740
1741         /* type -1 is used for ALL */
1742         if (type == -1 || type == NETCONFA_FORWARDING)
1743                 size += nla_total_size(4);
1744         if (type == -1 || type == NETCONFA_RP_FILTER)
1745                 size += nla_total_size(4);
1746         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1747                 size += nla_total_size(4);
1748         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1749                 size += nla_total_size(4);
1750         if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1751                 size += nla_total_size(4);
1752
1753         return size;
1754 }
1755
1756 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1757                                      struct ipv4_devconf *devconf, u32 portid,
1758                                      u32 seq, int event, unsigned int flags,
1759                                      int type)
1760 {
1761         struct nlmsghdr  *nlh;
1762         struct netconfmsg *ncm;
1763
1764         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1765                         flags);
1766         if (!nlh)
1767                 return -EMSGSIZE;
1768
1769         ncm = nlmsg_data(nlh);
1770         ncm->ncm_family = AF_INET;
1771
1772         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1773                 goto nla_put_failure;
1774
1775         /* type -1 is used for ALL */
1776         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1777             nla_put_s32(skb, NETCONFA_FORWARDING,
1778                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1779                 goto nla_put_failure;
1780         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1781             nla_put_s32(skb, NETCONFA_RP_FILTER,
1782                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1783                 goto nla_put_failure;
1784         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1785             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1786                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1787                 goto nla_put_failure;
1788         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1789             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1790                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1791                 goto nla_put_failure;
1792         if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1793             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1794                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1795                 goto nla_put_failure;
1796
1797         nlmsg_end(skb, nlh);
1798         return 0;
1799
1800 nla_put_failure:
1801         nlmsg_cancel(skb, nlh);
1802         return -EMSGSIZE;
1803 }
1804
1805 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1806                                  struct ipv4_devconf *devconf)
1807 {
1808         struct sk_buff *skb;
1809         int err = -ENOBUFS;
1810
1811         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1812         if (!skb)
1813                 goto errout;
1814
1815         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1816                                         RTM_NEWNETCONF, 0, type);
1817         if (err < 0) {
1818                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1819                 WARN_ON(err == -EMSGSIZE);
1820                 kfree_skb(skb);
1821                 goto errout;
1822         }
1823         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1824         return;
1825 errout:
1826         if (err < 0)
1827                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1828 }
1829
1830 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1831         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1832         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1833         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1834         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1835         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1836 };
1837
1838 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1839                                     struct nlmsghdr *nlh)
1840 {
1841         struct net *net = sock_net(in_skb->sk);
1842         struct nlattr *tb[NETCONFA_MAX+1];
1843         struct netconfmsg *ncm;
1844         struct sk_buff *skb;
1845         struct ipv4_devconf *devconf;
1846         struct in_device *in_dev;
1847         struct net_device *dev;
1848         int ifindex;
1849         int err;
1850
1851         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1852                           devconf_ipv4_policy);
1853         if (err < 0)
1854                 goto errout;
1855
1856         err = -EINVAL;
1857         if (!tb[NETCONFA_IFINDEX])
1858                 goto errout;
1859
1860         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1861         switch (ifindex) {
1862         case NETCONFA_IFINDEX_ALL:
1863                 devconf = net->ipv4.devconf_all;
1864                 break;
1865         case NETCONFA_IFINDEX_DEFAULT:
1866                 devconf = net->ipv4.devconf_dflt;
1867                 break;
1868         default:
1869                 dev = __dev_get_by_index(net, ifindex);
1870                 if (!dev)
1871                         goto errout;
1872                 in_dev = __in_dev_get_rtnl(dev);
1873                 if (!in_dev)
1874                         goto errout;
1875                 devconf = &in_dev->cnf;
1876                 break;
1877         }
1878
1879         err = -ENOBUFS;
1880         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1881         if (!skb)
1882                 goto errout;
1883
1884         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1885                                         NETLINK_CB(in_skb).portid,
1886                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1887                                         -1);
1888         if (err < 0) {
1889                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1890                 WARN_ON(err == -EMSGSIZE);
1891                 kfree_skb(skb);
1892                 goto errout;
1893         }
1894         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1895 errout:
1896         return err;
1897 }
1898
1899 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1900                                      struct netlink_callback *cb)
1901 {
1902         struct net *net = sock_net(skb->sk);
1903         int h, s_h;
1904         int idx, s_idx;
1905         struct net_device *dev;
1906         struct in_device *in_dev;
1907         struct hlist_head *head;
1908
1909         s_h = cb->args[0];
1910         s_idx = idx = cb->args[1];
1911
1912         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1913                 idx = 0;
1914                 head = &net->dev_index_head[h];
1915                 rcu_read_lock();
1916                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1917                           net->dev_base_seq;
1918                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1919                         if (idx < s_idx)
1920                                 goto cont;
1921                         in_dev = __in_dev_get_rcu(dev);
1922                         if (!in_dev)
1923                                 goto cont;
1924
1925                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1926                                                       &in_dev->cnf,
1927                                                       NETLINK_CB(cb->skb).portid,
1928                                                       cb->nlh->nlmsg_seq,
1929                                                       RTM_NEWNETCONF,
1930                                                       NLM_F_MULTI,
1931                                                       -1) < 0) {
1932                                 rcu_read_unlock();
1933                                 goto done;
1934                         }
1935                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1936 cont:
1937                         idx++;
1938                 }
1939                 rcu_read_unlock();
1940         }
1941         if (h == NETDEV_HASHENTRIES) {
1942                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1943                                               net->ipv4.devconf_all,
1944                                               NETLINK_CB(cb->skb).portid,
1945                                               cb->nlh->nlmsg_seq,
1946                                               RTM_NEWNETCONF, NLM_F_MULTI,
1947                                               -1) < 0)
1948                         goto done;
1949                 else
1950                         h++;
1951         }
1952         if (h == NETDEV_HASHENTRIES + 1) {
1953                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1954                                               net->ipv4.devconf_dflt,
1955                                               NETLINK_CB(cb->skb).portid,
1956                                               cb->nlh->nlmsg_seq,
1957                                               RTM_NEWNETCONF, NLM_F_MULTI,
1958                                               -1) < 0)
1959                         goto done;
1960                 else
1961                         h++;
1962         }
1963 done:
1964         cb->args[0] = h;
1965         cb->args[1] = idx;
1966
1967         return skb->len;
1968 }
1969
1970 #ifdef CONFIG_SYSCTL
1971
1972 static void devinet_copy_dflt_conf(struct net *net, int i)
1973 {
1974         struct net_device *dev;
1975
1976         rcu_read_lock();
1977         for_each_netdev_rcu(net, dev) {
1978                 struct in_device *in_dev;
1979
1980                 in_dev = __in_dev_get_rcu(dev);
1981                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1982                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1983         }
1984         rcu_read_unlock();
1985 }
1986
1987 /* called with RTNL locked */
1988 static void inet_forward_change(struct net *net)
1989 {
1990         struct net_device *dev;
1991         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1992
1993         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1994         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1995         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1996                                     NETCONFA_IFINDEX_ALL,
1997                                     net->ipv4.devconf_all);
1998         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1999                                     NETCONFA_IFINDEX_DEFAULT,
2000                                     net->ipv4.devconf_dflt);
2001
2002         for_each_netdev(net, dev) {
2003                 struct in_device *in_dev;
2004                 if (on)
2005                         dev_disable_lro(dev);
2006                 rcu_read_lock();
2007                 in_dev = __in_dev_get_rcu(dev);
2008                 if (in_dev) {
2009                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2010                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2011                                                     dev->ifindex, &in_dev->cnf);
2012                 }
2013                 rcu_read_unlock();
2014         }
2015 }
2016
2017 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2018 {
2019         if (cnf == net->ipv4.devconf_dflt)
2020                 return NETCONFA_IFINDEX_DEFAULT;
2021         else if (cnf == net->ipv4.devconf_all)
2022                 return NETCONFA_IFINDEX_ALL;
2023         else {
2024                 struct in_device *idev
2025                         = container_of(cnf, struct in_device, cnf);
2026                 return idev->dev->ifindex;
2027         }
2028 }
2029
2030 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2031                              void __user *buffer,
2032                              size_t *lenp, loff_t *ppos)
2033 {
2034         int old_value = *(int *)ctl->data;
2035         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2036         int new_value = *(int *)ctl->data;
2037
2038         if (write) {
2039                 struct ipv4_devconf *cnf = ctl->extra1;
2040                 struct net *net = ctl->extra2;
2041                 int i = (int *)ctl->data - cnf->data;
2042                 int ifindex;
2043
2044                 set_bit(i, cnf->state);
2045
2046                 if (cnf == net->ipv4.devconf_dflt)
2047                         devinet_copy_dflt_conf(net, i);
2048                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2049                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2050                         if ((new_value == 0) && (old_value != 0))
2051                                 rt_cache_flush(net);
2052
2053                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2054                     new_value != old_value) {
2055                         ifindex = devinet_conf_ifindex(net, cnf);
2056                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2057                                                     ifindex, cnf);
2058                 }
2059                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2060                     new_value != old_value) {
2061                         ifindex = devinet_conf_ifindex(net, cnf);
2062                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2063                                                     ifindex, cnf);
2064                 }
2065                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2066                     new_value != old_value) {
2067                         ifindex = devinet_conf_ifindex(net, cnf);
2068                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2069                                                     ifindex, cnf);
2070                 }
2071         }
2072
2073         return ret;
2074 }
2075
2076 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2077                                   void __user *buffer,
2078                                   size_t *lenp, loff_t *ppos)
2079 {
2080         int *valp = ctl->data;
2081         int val = *valp;
2082         loff_t pos = *ppos;
2083         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2084
2085         if (write && *valp != val) {
2086                 struct net *net = ctl->extra2;
2087
2088                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2089                         if (!rtnl_trylock()) {
2090                                 /* Restore the original values before restarting */
2091                                 *valp = val;
2092                                 *ppos = pos;
2093                                 return restart_syscall();
2094                         }
2095                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2096                                 inet_forward_change(net);
2097                         } else {
2098                                 struct ipv4_devconf *cnf = ctl->extra1;
2099                                 struct in_device *idev =
2100                                         container_of(cnf, struct in_device, cnf);
2101                                 if (*valp)
2102                                         dev_disable_lro(idev->dev);
2103                                 inet_netconf_notify_devconf(net,
2104                                                             NETCONFA_FORWARDING,
2105                                                             idev->dev->ifindex,
2106                                                             cnf);
2107                         }
2108                         rtnl_unlock();
2109                         rt_cache_flush(net);
2110                 } else
2111                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2112                                                     NETCONFA_IFINDEX_DEFAULT,
2113                                                     net->ipv4.devconf_dflt);
2114         }
2115
2116         return ret;
2117 }
2118
2119 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2120                                 void __user *buffer,
2121                                 size_t *lenp, loff_t *ppos)
2122 {
2123         int *valp = ctl->data;
2124         int val = *valp;
2125         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2126         struct net *net = ctl->extra2;
2127
2128         if (write && *valp != val)
2129                 rt_cache_flush(net);
2130
2131         return ret;
2132 }
2133
2134 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2135         { \
2136                 .procname       = name, \
2137                 .data           = ipv4_devconf.data + \
2138                                   IPV4_DEVCONF_ ## attr - 1, \
2139                 .maxlen         = sizeof(int), \
2140                 .mode           = mval, \
2141                 .proc_handler   = proc, \
2142                 .extra1         = &ipv4_devconf, \
2143         }
2144
2145 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2146         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2147
2148 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2149         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2150
2151 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2152         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2153
2154 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2155         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2156
2157 static struct devinet_sysctl_table {
2158         struct ctl_table_header *sysctl_header;
2159         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2160 } devinet_sysctl = {
2161         .devinet_vars = {
2162                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2163                                              devinet_sysctl_forward),
2164                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2165
2166                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2167                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2168                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2169                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2170                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2171                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2172                                         "accept_source_route"),
2173                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2174                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2175                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2176                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2177                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2178                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2179                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2180                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2181                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2182                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2183                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2184                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2185                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2186                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2187                                         "force_igmp_version"),
2188                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2189                                         "igmpv2_unsolicited_report_interval"),
2190                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2191                                         "igmpv3_unsolicited_report_interval"),
2192                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2193                                         "ignore_routes_with_linkdown"),
2194
2195                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2196                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2197                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2198                                               "promote_secondaries"),
2199                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2200                                               "route_localnet"),
2201         },
2202 };
2203
2204 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2205                                         struct ipv4_devconf *p)
2206 {
2207         int i;
2208         struct devinet_sysctl_table *t;
2209         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2210
2211         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2212         if (!t)
2213                 goto out;
2214
2215         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2216                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2217                 t->devinet_vars[i].extra1 = p;
2218                 t->devinet_vars[i].extra2 = net;
2219         }
2220
2221         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2222
2223         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2224         if (!t->sysctl_header)
2225                 goto free;
2226
2227         p->sysctl = t;
2228         return 0;
2229
2230 free:
2231         kfree(t);
2232 out:
2233         return -ENOBUFS;
2234 }
2235
2236 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2237 {
2238         struct devinet_sysctl_table *t = cnf->sysctl;
2239
2240         if (!t)
2241                 return;
2242
2243         cnf->sysctl = NULL;
2244         unregister_net_sysctl_table(t->sysctl_header);
2245         kfree(t);
2246 }
2247
2248 static int devinet_sysctl_register(struct in_device *idev)
2249 {
2250         int err;
2251
2252         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2253                 return -EINVAL;
2254
2255         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2256         if (err)
2257                 return err;
2258         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2259                                         &idev->cnf);
2260         if (err)
2261                 neigh_sysctl_unregister(idev->arp_parms);
2262         return err;
2263 }
2264
2265 static void devinet_sysctl_unregister(struct in_device *idev)
2266 {
2267         __devinet_sysctl_unregister(&idev->cnf);
2268         neigh_sysctl_unregister(idev->arp_parms);
2269 }
2270
2271 static struct ctl_table ctl_forward_entry[] = {
2272         {
2273                 .procname       = "ip_forward",
2274                 .data           = &ipv4_devconf.data[
2275                                         IPV4_DEVCONF_FORWARDING - 1],
2276                 .maxlen         = sizeof(int),
2277                 .mode           = 0644,
2278                 .proc_handler   = devinet_sysctl_forward,
2279                 .extra1         = &ipv4_devconf,
2280                 .extra2         = &init_net,
2281         },
2282         { },
2283 };
2284 #endif
2285
2286 static __net_init int devinet_init_net(struct net *net)
2287 {
2288         int err;
2289         struct ipv4_devconf *all, *dflt;
2290 #ifdef CONFIG_SYSCTL
2291         struct ctl_table *tbl = ctl_forward_entry;
2292         struct ctl_table_header *forw_hdr;
2293 #endif
2294
2295         err = -ENOMEM;
2296         all = &ipv4_devconf;
2297         dflt = &ipv4_devconf_dflt;
2298
2299         if (!net_eq(net, &init_net)) {
2300                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2301                 if (!all)
2302                         goto err_alloc_all;
2303
2304                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2305                 if (!dflt)
2306                         goto err_alloc_dflt;
2307
2308 #ifdef CONFIG_SYSCTL
2309                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2310                 if (!tbl)
2311                         goto err_alloc_ctl;
2312
2313                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2314                 tbl[0].extra1 = all;
2315                 tbl[0].extra2 = net;
2316 #endif
2317         }
2318
2319 #ifdef CONFIG_SYSCTL
2320         err = __devinet_sysctl_register(net, "all", all);
2321         if (err < 0)
2322                 goto err_reg_all;
2323
2324         err = __devinet_sysctl_register(net, "default", dflt);
2325         if (err < 0)
2326                 goto err_reg_dflt;
2327
2328         err = -ENOMEM;
2329         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2330         if (!forw_hdr)
2331                 goto err_reg_ctl;
2332         net->ipv4.forw_hdr = forw_hdr;
2333 #endif
2334
2335         net->ipv4.devconf_all = all;
2336         net->ipv4.devconf_dflt = dflt;
2337         return 0;
2338
2339 #ifdef CONFIG_SYSCTL
2340 err_reg_ctl:
2341         __devinet_sysctl_unregister(dflt);
2342 err_reg_dflt:
2343         __devinet_sysctl_unregister(all);
2344 err_reg_all:
2345         if (tbl != ctl_forward_entry)
2346                 kfree(tbl);
2347 err_alloc_ctl:
2348 #endif
2349         if (dflt != &ipv4_devconf_dflt)
2350                 kfree(dflt);
2351 err_alloc_dflt:
2352         if (all != &ipv4_devconf)
2353                 kfree(all);
2354 err_alloc_all:
2355         return err;
2356 }
2357
2358 static __net_exit void devinet_exit_net(struct net *net)
2359 {
2360 #ifdef CONFIG_SYSCTL
2361         struct ctl_table *tbl;
2362
2363         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2364         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2365         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2366         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2367         kfree(tbl);
2368 #endif
2369         kfree(net->ipv4.devconf_dflt);
2370         kfree(net->ipv4.devconf_all);
2371 }
2372
2373 static __net_initdata struct pernet_operations devinet_ops = {
2374         .init = devinet_init_net,
2375         .exit = devinet_exit_net,
2376 };
2377
2378 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2379         .family           = AF_INET,
2380         .fill_link_af     = inet_fill_link_af,
2381         .get_link_af_size = inet_get_link_af_size,
2382         .validate_link_af = inet_validate_link_af,
2383         .set_link_af      = inet_set_link_af,
2384 };
2385
2386 void __init devinet_init(void)
2387 {
2388         int i;
2389
2390         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2391                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2392
2393         register_pernet_subsys(&devinet_ops);
2394
2395         register_gifconf(PF_INET, inet_gifconf);
2396         register_netdevice_notifier(&ip_netdev_notifier);
2397
2398         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2399
2400         rtnl_af_register(&inet_af_ops);
2401
2402         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2403         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2404         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2405         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2406                       inet_netconf_dump_devconf, NULL);
2407 }