Merge branch 'lsk-v4.4-eas-v5.2' of git://git.linaro.org/arm/eas/kernel.git
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/tcp.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #include "fib_lookup.h"
70
71 static struct ipv4_devconf ipv4_devconf = {
72         .data = {
73                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
78                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91         },
92 };
93
94 #define IPV4_DEVCONF_DFLT(net, attr) \
95         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96
97 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
98         [IFA_LOCAL]             = { .type = NLA_U32 },
99         [IFA_ADDRESS]           = { .type = NLA_U32 },
100         [IFA_BROADCAST]         = { .type = NLA_U32 },
101         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
102         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
103         [IFA_FLAGS]             = { .type = NLA_U32 },
104 };
105
106 #define IN4_ADDR_HSIZE_SHIFT    8
107 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
108
109 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
110
111 static u32 inet_addr_hash(const struct net *net, __be32 addr)
112 {
113         u32 val = (__force u32) addr ^ net_hash_mix(net);
114
115         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120         u32 hash = inet_addr_hash(net, ifa->ifa_local);
121
122         ASSERT_RTNL();
123         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 }
125
126 static void inet_hash_remove(struct in_ifaddr *ifa)
127 {
128         ASSERT_RTNL();
129         hlist_del_init_rcu(&ifa->hash);
130 }
131
132 /**
133  * __ip_dev_find - find the first device with a given source address.
134  * @net: the net namespace
135  * @addr: the source address
136  * @devref: if true, take a reference on the found device
137  *
138  * If a caller uses devref=false, it should be protected by RCU, or RTNL
139  */
140 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141 {
142         u32 hash = inet_addr_hash(net, addr);
143         struct net_device *result = NULL;
144         struct in_ifaddr *ifa;
145
146         rcu_read_lock();
147         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
148                 if (ifa->ifa_local == addr) {
149                         struct net_device *dev = ifa->ifa_dev->dev;
150
151                         if (!net_eq(dev_net(dev), net))
152                                 continue;
153                         result = dev;
154                         break;
155                 }
156         }
157         if (!result) {
158                 struct flowi4 fl4 = { .daddr = addr };
159                 struct fib_result res = { 0 };
160                 struct fib_table *local;
161
162                 /* Fallback to FIB local table so that communication
163                  * over loopback subnets work.
164                  */
165                 local = fib_get_table(net, RT_TABLE_LOCAL);
166                 if (local &&
167                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
168                     res.type == RTN_LOCAL)
169                         result = FIB_RES_DEV(res);
170         }
171         if (result && devref)
172                 dev_hold(result);
173         rcu_read_unlock();
174         return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177
178 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179
180 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
181 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
182                          int destroy);
183 #ifdef CONFIG_SYSCTL
184 static int devinet_sysctl_register(struct in_device *idev);
185 static void devinet_sysctl_unregister(struct in_device *idev);
186 #else
187 static int devinet_sysctl_register(struct in_device *idev)
188 {
189         return 0;
190 }
191 static void devinet_sysctl_unregister(struct in_device *idev)
192 {
193 }
194 #endif
195
196 /* Locks all the inet devices. */
197
198 static struct in_ifaddr *inet_alloc_ifa(void)
199 {
200         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 }
202
203 static void inet_rcu_free_ifa(struct rcu_head *head)
204 {
205         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206         if (ifa->ifa_dev)
207                 in_dev_put(ifa->ifa_dev);
208         kfree(ifa);
209 }
210
211 static void inet_free_ifa(struct in_ifaddr *ifa)
212 {
213         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 }
215
216 void in_dev_finish_destroy(struct in_device *idev)
217 {
218         struct net_device *dev = idev->dev;
219
220         WARN_ON(idev->ifa_list);
221         WARN_ON(idev->mc_list);
222         kfree(rcu_dereference_protected(idev->mc_hash, 1));
223 #ifdef NET_REFCNT_DEBUG
224         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
225 #endif
226         dev_put(dev);
227         if (!idev->dead)
228                 pr_err("Freeing alive in_device %p\n", idev);
229         else
230                 kfree(idev);
231 }
232 EXPORT_SYMBOL(in_dev_finish_destroy);
233
234 static struct in_device *inetdev_init(struct net_device *dev)
235 {
236         struct in_device *in_dev;
237         int err = -ENOMEM;
238
239         ASSERT_RTNL();
240
241         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242         if (!in_dev)
243                 goto out;
244         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245                         sizeof(in_dev->cnf));
246         in_dev->cnf.sysctl = NULL;
247         in_dev->dev = dev;
248         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249         if (!in_dev->arp_parms)
250                 goto out_kfree;
251         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252                 dev_disable_lro(dev);
253         /* Reference in_dev->dev */
254         dev_hold(dev);
255         /* Account for reference dev->ip_ptr (below) */
256         in_dev_hold(in_dev);
257
258         err = devinet_sysctl_register(in_dev);
259         if (err) {
260                 in_dev->dead = 1;
261                 in_dev_put(in_dev);
262                 in_dev = NULL;
263                 goto out;
264         }
265         ip_mc_init_dev(in_dev);
266         if (dev->flags & IFF_UP)
267                 ip_mc_up(in_dev);
268
269         /* we can receive as soon as ip_ptr is set -- do this last */
270         rcu_assign_pointer(dev->ip_ptr, in_dev);
271 out:
272         return in_dev ?: ERR_PTR(err);
273 out_kfree:
274         kfree(in_dev);
275         in_dev = NULL;
276         goto out;
277 }
278
279 static void in_dev_rcu_put(struct rcu_head *head)
280 {
281         struct in_device *idev = container_of(head, struct in_device, rcu_head);
282         in_dev_put(idev);
283 }
284
285 static void inetdev_destroy(struct in_device *in_dev)
286 {
287         struct in_ifaddr *ifa;
288         struct net_device *dev;
289
290         ASSERT_RTNL();
291
292         dev = in_dev->dev;
293
294         in_dev->dead = 1;
295
296         ip_mc_destroy_dev(in_dev);
297
298         while ((ifa = in_dev->ifa_list) != NULL) {
299                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
300                 inet_free_ifa(ifa);
301         }
302
303         RCU_INIT_POINTER(dev->ip_ptr, NULL);
304
305         devinet_sysctl_unregister(in_dev);
306         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
307         arp_ifdown(dev);
308
309         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
310 }
311
312 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
313 {
314         rcu_read_lock();
315         for_primary_ifa(in_dev) {
316                 if (inet_ifa_match(a, ifa)) {
317                         if (!b || inet_ifa_match(b, ifa)) {
318                                 rcu_read_unlock();
319                                 return 1;
320                         }
321                 }
322         } endfor_ifa(in_dev);
323         rcu_read_unlock();
324         return 0;
325 }
326
327 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328                          int destroy, struct nlmsghdr *nlh, u32 portid)
329 {
330         struct in_ifaddr *promote = NULL;
331         struct in_ifaddr *ifa, *ifa1 = *ifap;
332         struct in_ifaddr *last_prim = in_dev->ifa_list;
333         struct in_ifaddr *prev_prom = NULL;
334         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
335
336         ASSERT_RTNL();
337
338         if (in_dev->dead)
339                 goto no_promotions;
340
341         /* 1. Deleting primary ifaddr forces deletion all secondaries
342          * unless alias promotion is set
343          **/
344
345         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
346                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
347
348                 while ((ifa = *ifap1) != NULL) {
349                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
350                             ifa1->ifa_scope <= ifa->ifa_scope)
351                                 last_prim = ifa;
352
353                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
354                             ifa1->ifa_mask != ifa->ifa_mask ||
355                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
356                                 ifap1 = &ifa->ifa_next;
357                                 prev_prom = ifa;
358                                 continue;
359                         }
360
361                         if (!do_promote) {
362                                 inet_hash_remove(ifa);
363                                 *ifap1 = ifa->ifa_next;
364
365                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
366                                 blocking_notifier_call_chain(&inetaddr_chain,
367                                                 NETDEV_DOWN, ifa);
368                                 inet_free_ifa(ifa);
369                         } else {
370                                 promote = ifa;
371                                 break;
372                         }
373                 }
374         }
375
376         /* On promotion all secondaries from subnet are changing
377          * the primary IP, we must remove all their routes silently
378          * and later to add them back with new prefsrc. Do this
379          * while all addresses are on the device list.
380          */
381         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
382                 if (ifa1->ifa_mask == ifa->ifa_mask &&
383                     inet_ifa_match(ifa1->ifa_address, ifa))
384                         fib_del_ifaddr(ifa, ifa1);
385         }
386
387 no_promotions:
388         /* 2. Unlink it */
389
390         *ifap = ifa1->ifa_next;
391         inet_hash_remove(ifa1);
392
393         /* 3. Announce address deletion */
394
395         /* Send message first, then call notifier.
396            At first sight, FIB update triggered by notifier
397            will refer to already deleted ifaddr, that could confuse
398            netlink listeners. It is not true: look, gated sees
399            that route deleted and if it still thinks that ifaddr
400            is valid, it will try to restore deleted routes... Grr.
401            So that, this order is correct.
402          */
403         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
404         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
405
406         if (promote) {
407                 struct in_ifaddr *next_sec = promote->ifa_next;
408
409                 if (prev_prom) {
410                         prev_prom->ifa_next = promote->ifa_next;
411                         promote->ifa_next = last_prim->ifa_next;
412                         last_prim->ifa_next = promote;
413                 }
414
415                 promote->ifa_flags &= ~IFA_F_SECONDARY;
416                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
417                 blocking_notifier_call_chain(&inetaddr_chain,
418                                 NETDEV_UP, promote);
419                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
420                         if (ifa1->ifa_mask != ifa->ifa_mask ||
421                             !inet_ifa_match(ifa1->ifa_address, ifa))
422                                         continue;
423                         fib_add_ifaddr(ifa);
424                 }
425
426         }
427         if (destroy)
428                 inet_free_ifa(ifa1);
429 }
430
431 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
432                          int destroy)
433 {
434         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
435 }
436
437 static void check_lifetime(struct work_struct *work);
438
439 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
440
441 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
442                              u32 portid)
443 {
444         struct in_device *in_dev = ifa->ifa_dev;
445         struct in_ifaddr *ifa1, **ifap, **last_primary;
446
447         ASSERT_RTNL();
448
449         if (!ifa->ifa_local) {
450                 inet_free_ifa(ifa);
451                 return 0;
452         }
453
454         ifa->ifa_flags &= ~IFA_F_SECONDARY;
455         last_primary = &in_dev->ifa_list;
456
457         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
458              ifap = &ifa1->ifa_next) {
459                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
460                     ifa->ifa_scope <= ifa1->ifa_scope)
461                         last_primary = &ifa1->ifa_next;
462                 if (ifa1->ifa_mask == ifa->ifa_mask &&
463                     inet_ifa_match(ifa1->ifa_address, ifa)) {
464                         if (ifa1->ifa_local == ifa->ifa_local) {
465                                 inet_free_ifa(ifa);
466                                 return -EEXIST;
467                         }
468                         if (ifa1->ifa_scope != ifa->ifa_scope) {
469                                 inet_free_ifa(ifa);
470                                 return -EINVAL;
471                         }
472                         ifa->ifa_flags |= IFA_F_SECONDARY;
473                 }
474         }
475
476         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
477                 prandom_seed((__force u32) ifa->ifa_local);
478                 ifap = last_primary;
479         }
480
481         ifa->ifa_next = *ifap;
482         *ifap = ifa;
483
484         inet_hash_insert(dev_net(in_dev->dev), ifa);
485
486         cancel_delayed_work(&check_lifetime_work);
487         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
488
489         /* Send message first, then call notifier.
490            Notifier will trigger FIB update, so that
491            listeners of netlink will know about new ifaddr */
492         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
493         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
494
495         return 0;
496 }
497
498 static int inet_insert_ifa(struct in_ifaddr *ifa)
499 {
500         return __inet_insert_ifa(ifa, NULL, 0);
501 }
502
503 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
504 {
505         struct in_device *in_dev = __in_dev_get_rtnl(dev);
506
507         ASSERT_RTNL();
508
509         if (!in_dev) {
510                 inet_free_ifa(ifa);
511                 return -ENOBUFS;
512         }
513         ipv4_devconf_setall(in_dev);
514         neigh_parms_data_state_setall(in_dev->arp_parms);
515         if (ifa->ifa_dev != in_dev) {
516                 WARN_ON(ifa->ifa_dev);
517                 in_dev_hold(in_dev);
518                 ifa->ifa_dev = in_dev;
519         }
520         if (ipv4_is_loopback(ifa->ifa_local))
521                 ifa->ifa_scope = RT_SCOPE_HOST;
522         return inet_insert_ifa(ifa);
523 }
524
525 /* Caller must hold RCU or RTNL :
526  * We dont take a reference on found in_device
527  */
528 struct in_device *inetdev_by_index(struct net *net, int ifindex)
529 {
530         struct net_device *dev;
531         struct in_device *in_dev = NULL;
532
533         rcu_read_lock();
534         dev = dev_get_by_index_rcu(net, ifindex);
535         if (dev)
536                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
537         rcu_read_unlock();
538         return in_dev;
539 }
540 EXPORT_SYMBOL(inetdev_by_index);
541
542 /* Called only from RTNL semaphored context. No locks. */
543
544 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
545                                     __be32 mask)
546 {
547         ASSERT_RTNL();
548
549         for_primary_ifa(in_dev) {
550                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
551                         return ifa;
552         } endfor_ifa(in_dev);
553         return NULL;
554 }
555
556 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
557 {
558         struct ip_mreqn mreq = {
559                 .imr_multiaddr.s_addr = ifa->ifa_address,
560                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
561         };
562         int ret;
563
564         ASSERT_RTNL();
565
566         lock_sock(sk);
567         if (join)
568                 ret = ip_mc_join_group(sk, &mreq);
569         else
570                 ret = ip_mc_leave_group(sk, &mreq);
571         release_sock(sk);
572
573         return ret;
574 }
575
576 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
577 {
578         struct net *net = sock_net(skb->sk);
579         struct nlattr *tb[IFA_MAX+1];
580         struct in_device *in_dev;
581         struct ifaddrmsg *ifm;
582         struct in_ifaddr *ifa, **ifap;
583         int err = -EINVAL;
584
585         ASSERT_RTNL();
586
587         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
588         if (err < 0)
589                 goto errout;
590
591         ifm = nlmsg_data(nlh);
592         in_dev = inetdev_by_index(net, ifm->ifa_index);
593         if (!in_dev) {
594                 err = -ENODEV;
595                 goto errout;
596         }
597
598         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
599              ifap = &ifa->ifa_next) {
600                 if (tb[IFA_LOCAL] &&
601                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
602                         continue;
603
604                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
605                         continue;
606
607                 if (tb[IFA_ADDRESS] &&
608                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
609                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
610                         continue;
611
612                 if (ipv4_is_multicast(ifa->ifa_address))
613                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
614                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
615                 return 0;
616         }
617
618         err = -EADDRNOTAVAIL;
619 errout:
620         return err;
621 }
622
623 #define INFINITY_LIFE_TIME      0xFFFFFFFF
624
625 static void check_lifetime(struct work_struct *work)
626 {
627         unsigned long now, next, next_sec, next_sched;
628         struct in_ifaddr *ifa;
629         struct hlist_node *n;
630         int i;
631
632         now = jiffies;
633         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
634
635         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
636                 bool change_needed = false;
637
638                 rcu_read_lock();
639                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
640                         unsigned long age;
641
642                         if (ifa->ifa_flags & IFA_F_PERMANENT)
643                                 continue;
644
645                         /* We try to batch several events at once. */
646                         age = (now - ifa->ifa_tstamp +
647                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
648
649                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
650                             age >= ifa->ifa_valid_lft) {
651                                 change_needed = true;
652                         } else if (ifa->ifa_preferred_lft ==
653                                    INFINITY_LIFE_TIME) {
654                                 continue;
655                         } else if (age >= ifa->ifa_preferred_lft) {
656                                 if (time_before(ifa->ifa_tstamp +
657                                                 ifa->ifa_valid_lft * HZ, next))
658                                         next = ifa->ifa_tstamp +
659                                                ifa->ifa_valid_lft * HZ;
660
661                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
662                                         change_needed = true;
663                         } else if (time_before(ifa->ifa_tstamp +
664                                                ifa->ifa_preferred_lft * HZ,
665                                                next)) {
666                                 next = ifa->ifa_tstamp +
667                                        ifa->ifa_preferred_lft * HZ;
668                         }
669                 }
670                 rcu_read_unlock();
671                 if (!change_needed)
672                         continue;
673                 rtnl_lock();
674                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
675                         unsigned long age;
676
677                         if (ifa->ifa_flags & IFA_F_PERMANENT)
678                                 continue;
679
680                         /* We try to batch several events at once. */
681                         age = (now - ifa->ifa_tstamp +
682                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
683
684                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
685                             age >= ifa->ifa_valid_lft) {
686                                 struct in_ifaddr **ifap;
687
688                                 for (ifap = &ifa->ifa_dev->ifa_list;
689                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
690                                         if (*ifap == ifa) {
691                                                 inet_del_ifa(ifa->ifa_dev,
692                                                              ifap, 1);
693                                                 break;
694                                         }
695                                 }
696                         } else if (ifa->ifa_preferred_lft !=
697                                    INFINITY_LIFE_TIME &&
698                                    age >= ifa->ifa_preferred_lft &&
699                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
700                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
701                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
702                         }
703                 }
704                 rtnl_unlock();
705         }
706
707         next_sec = round_jiffies_up(next);
708         next_sched = next;
709
710         /* If rounded timeout is accurate enough, accept it. */
711         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
712                 next_sched = next_sec;
713
714         now = jiffies;
715         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
716         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
717                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
718
719         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
720                         next_sched - now);
721 }
722
723 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
724                              __u32 prefered_lft)
725 {
726         unsigned long timeout;
727
728         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
729
730         timeout = addrconf_timeout_fixup(valid_lft, HZ);
731         if (addrconf_finite_timeout(timeout))
732                 ifa->ifa_valid_lft = timeout;
733         else
734                 ifa->ifa_flags |= IFA_F_PERMANENT;
735
736         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
737         if (addrconf_finite_timeout(timeout)) {
738                 if (timeout == 0)
739                         ifa->ifa_flags |= IFA_F_DEPRECATED;
740                 ifa->ifa_preferred_lft = timeout;
741         }
742         ifa->ifa_tstamp = jiffies;
743         if (!ifa->ifa_cstamp)
744                 ifa->ifa_cstamp = ifa->ifa_tstamp;
745 }
746
747 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
748                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
749 {
750         struct nlattr *tb[IFA_MAX+1];
751         struct in_ifaddr *ifa;
752         struct ifaddrmsg *ifm;
753         struct net_device *dev;
754         struct in_device *in_dev;
755         int err;
756
757         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
758         if (err < 0)
759                 goto errout;
760
761         ifm = nlmsg_data(nlh);
762         err = -EINVAL;
763         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
764                 goto errout;
765
766         dev = __dev_get_by_index(net, ifm->ifa_index);
767         err = -ENODEV;
768         if (!dev)
769                 goto errout;
770
771         in_dev = __in_dev_get_rtnl(dev);
772         err = -ENOBUFS;
773         if (!in_dev)
774                 goto errout;
775
776         ifa = inet_alloc_ifa();
777         if (!ifa)
778                 /*
779                  * A potential indev allocation can be left alive, it stays
780                  * assigned to its device and is destroy with it.
781                  */
782                 goto errout;
783
784         ipv4_devconf_setall(in_dev);
785         neigh_parms_data_state_setall(in_dev->arp_parms);
786         in_dev_hold(in_dev);
787
788         if (!tb[IFA_ADDRESS])
789                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
790
791         INIT_HLIST_NODE(&ifa->hash);
792         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
793         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
794         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
795                                          ifm->ifa_flags;
796         ifa->ifa_scope = ifm->ifa_scope;
797         ifa->ifa_dev = in_dev;
798
799         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
800         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
801
802         if (tb[IFA_BROADCAST])
803                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
804
805         if (tb[IFA_LABEL])
806                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
807         else
808                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809
810         if (tb[IFA_CACHEINFO]) {
811                 struct ifa_cacheinfo *ci;
812
813                 ci = nla_data(tb[IFA_CACHEINFO]);
814                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
815                         err = -EINVAL;
816                         goto errout_free;
817                 }
818                 *pvalid_lft = ci->ifa_valid;
819                 *pprefered_lft = ci->ifa_prefered;
820         }
821
822         return ifa;
823
824 errout_free:
825         inet_free_ifa(ifa);
826 errout:
827         return ERR_PTR(err);
828 }
829
830 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
831 {
832         struct in_device *in_dev = ifa->ifa_dev;
833         struct in_ifaddr *ifa1, **ifap;
834
835         if (!ifa->ifa_local)
836                 return NULL;
837
838         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
839              ifap = &ifa1->ifa_next) {
840                 if (ifa1->ifa_mask == ifa->ifa_mask &&
841                     inet_ifa_match(ifa1->ifa_address, ifa) &&
842                     ifa1->ifa_local == ifa->ifa_local)
843                         return ifa1;
844         }
845         return NULL;
846 }
847
848 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
849 {
850         struct net *net = sock_net(skb->sk);
851         struct in_ifaddr *ifa;
852         struct in_ifaddr *ifa_existing;
853         __u32 valid_lft = INFINITY_LIFE_TIME;
854         __u32 prefered_lft = INFINITY_LIFE_TIME;
855
856         ASSERT_RTNL();
857
858         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
859         if (IS_ERR(ifa))
860                 return PTR_ERR(ifa);
861
862         ifa_existing = find_matching_ifa(ifa);
863         if (!ifa_existing) {
864                 /* It would be best to check for !NLM_F_CREATE here but
865                  * userspace already relies on not having to provide this.
866                  */
867                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
868                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
869                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
870                                                true, ifa);
871
872                         if (ret < 0) {
873                                 inet_free_ifa(ifa);
874                                 return ret;
875                         }
876                 }
877                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
878         } else {
879                 inet_free_ifa(ifa);
880
881                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
882                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
883                         return -EEXIST;
884                 ifa = ifa_existing;
885                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
886                 cancel_delayed_work(&check_lifetime_work);
887                 queue_delayed_work(system_power_efficient_wq,
888                                 &check_lifetime_work, 0);
889                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
890         }
891         return 0;
892 }
893
894 /*
895  *      Determine a default network mask, based on the IP address.
896  */
897
898 static int inet_abc_len(__be32 addr)
899 {
900         int rc = -1;    /* Something else, probably a multicast. */
901
902         if (ipv4_is_zeronet(addr))
903                 rc = 0;
904         else {
905                 __u32 haddr = ntohl(addr);
906
907                 if (IN_CLASSA(haddr))
908                         rc = 8;
909                 else if (IN_CLASSB(haddr))
910                         rc = 16;
911                 else if (IN_CLASSC(haddr))
912                         rc = 24;
913         }
914
915         return rc;
916 }
917
918
919 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
920 {
921         struct ifreq ifr;
922         struct sockaddr_in sin_orig;
923         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
924         struct in_device *in_dev;
925         struct in_ifaddr **ifap = NULL;
926         struct in_ifaddr *ifa = NULL;
927         struct net_device *dev;
928         char *colon;
929         int ret = -EFAULT;
930         int tryaddrmatch = 0;
931
932         /*
933          *      Fetch the caller's info block into kernel space
934          */
935
936         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
937                 goto out;
938         ifr.ifr_name[IFNAMSIZ - 1] = 0;
939
940         /* save original address for comparison */
941         memcpy(&sin_orig, sin, sizeof(*sin));
942
943         colon = strchr(ifr.ifr_name, ':');
944         if (colon)
945                 *colon = 0;
946
947         dev_load(net, ifr.ifr_name);
948
949         switch (cmd) {
950         case SIOCGIFADDR:       /* Get interface address */
951         case SIOCGIFBRDADDR:    /* Get the broadcast address */
952         case SIOCGIFDSTADDR:    /* Get the destination address */
953         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
954                 /* Note that these ioctls will not sleep,
955                    so that we do not impose a lock.
956                    One day we will be forced to put shlock here (I mean SMP)
957                  */
958                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
959                 memset(sin, 0, sizeof(*sin));
960                 sin->sin_family = AF_INET;
961                 break;
962
963         case SIOCSIFFLAGS:
964                 ret = -EPERM;
965                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
966                         goto out;
967                 break;
968         case SIOCSIFADDR:       /* Set interface address (and family) */
969         case SIOCSIFBRDADDR:    /* Set the broadcast address */
970         case SIOCSIFDSTADDR:    /* Set the destination address */
971         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
972         case SIOCKILLADDR:      /* Nuke all sockets on this address */
973                 ret = -EPERM;
974                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
975                         goto out;
976                 ret = -EINVAL;
977                 if (sin->sin_family != AF_INET)
978                         goto out;
979                 break;
980         default:
981                 ret = -EINVAL;
982                 goto out;
983         }
984
985         rtnl_lock();
986
987         ret = -ENODEV;
988         dev = __dev_get_by_name(net, ifr.ifr_name);
989         if (!dev)
990                 goto done;
991
992         if (colon)
993                 *colon = ':';
994
995         in_dev = __in_dev_get_rtnl(dev);
996         if (in_dev) {
997                 if (tryaddrmatch) {
998                         /* Matthias Andree */
999                         /* compare label and address (4.4BSD style) */
1000                         /* note: we only do this for a limited set of ioctls
1001                            and only if the original address family was AF_INET.
1002                            This is checked above. */
1003                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1004                              ifap = &ifa->ifa_next) {
1005                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1006                                     sin_orig.sin_addr.s_addr ==
1007                                                         ifa->ifa_local) {
1008                                         break; /* found */
1009                                 }
1010                         }
1011                 }
1012                 /* we didn't get a match, maybe the application is
1013                    4.3BSD-style and passed in junk so we fall back to
1014                    comparing just the label */
1015                 if (!ifa) {
1016                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1017                              ifap = &ifa->ifa_next)
1018                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1019                                         break;
1020                 }
1021         }
1022
1023         ret = -EADDRNOTAVAIL;
1024         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
1025             && cmd != SIOCKILLADDR)
1026                 goto done;
1027
1028         switch (cmd) {
1029         case SIOCGIFADDR:       /* Get interface address */
1030                 sin->sin_addr.s_addr = ifa->ifa_local;
1031                 goto rarok;
1032
1033         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1034                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1035                 goto rarok;
1036
1037         case SIOCGIFDSTADDR:    /* Get the destination address */
1038                 sin->sin_addr.s_addr = ifa->ifa_address;
1039                 goto rarok;
1040
1041         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1042                 sin->sin_addr.s_addr = ifa->ifa_mask;
1043                 goto rarok;
1044
1045         case SIOCSIFFLAGS:
1046                 if (colon) {
1047                         ret = -EADDRNOTAVAIL;
1048                         if (!ifa)
1049                                 break;
1050                         ret = 0;
1051                         if (!(ifr.ifr_flags & IFF_UP))
1052                                 inet_del_ifa(in_dev, ifap, 1);
1053                         break;
1054                 }
1055                 ret = dev_change_flags(dev, ifr.ifr_flags);
1056                 break;
1057
1058         case SIOCSIFADDR:       /* Set interface address (and family) */
1059                 ret = -EINVAL;
1060                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1061                         break;
1062
1063                 if (!ifa) {
1064                         ret = -ENOBUFS;
1065                         ifa = inet_alloc_ifa();
1066                         if (!ifa)
1067                                 break;
1068                         INIT_HLIST_NODE(&ifa->hash);
1069                         if (colon)
1070                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1071                         else
1072                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1073                 } else {
1074                         ret = 0;
1075                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1076                                 break;
1077                         inet_del_ifa(in_dev, ifap, 0);
1078                         ifa->ifa_broadcast = 0;
1079                         ifa->ifa_scope = 0;
1080                 }
1081
1082                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1083
1084                 if (!(dev->flags & IFF_POINTOPOINT)) {
1085                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1086                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1087                         if ((dev->flags & IFF_BROADCAST) &&
1088                             ifa->ifa_prefixlen < 31)
1089                                 ifa->ifa_broadcast = ifa->ifa_address |
1090                                                      ~ifa->ifa_mask;
1091                 } else {
1092                         ifa->ifa_prefixlen = 32;
1093                         ifa->ifa_mask = inet_make_mask(32);
1094                 }
1095                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1096                 ret = inet_set_ifa(dev, ifa);
1097                 break;
1098
1099         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1100                 ret = 0;
1101                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1102                         inet_del_ifa(in_dev, ifap, 0);
1103                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1104                         inet_insert_ifa(ifa);
1105                 }
1106                 break;
1107
1108         case SIOCSIFDSTADDR:    /* Set the destination address */
1109                 ret = 0;
1110                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1111                         break;
1112                 ret = -EINVAL;
1113                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1114                         break;
1115                 ret = 0;
1116                 inet_del_ifa(in_dev, ifap, 0);
1117                 ifa->ifa_address = sin->sin_addr.s_addr;
1118                 inet_insert_ifa(ifa);
1119                 break;
1120
1121         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1122
1123                 /*
1124                  *      The mask we set must be legal.
1125                  */
1126                 ret = -EINVAL;
1127                 if (bad_mask(sin->sin_addr.s_addr, 0))
1128                         break;
1129                 ret = 0;
1130                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1131                         __be32 old_mask = ifa->ifa_mask;
1132                         inet_del_ifa(in_dev, ifap, 0);
1133                         ifa->ifa_mask = sin->sin_addr.s_addr;
1134                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1135
1136                         /* See if current broadcast address matches
1137                          * with current netmask, then recalculate
1138                          * the broadcast address. Otherwise it's a
1139                          * funny address, so don't touch it since
1140                          * the user seems to know what (s)he's doing...
1141                          */
1142                         if ((dev->flags & IFF_BROADCAST) &&
1143                             (ifa->ifa_prefixlen < 31) &&
1144                             (ifa->ifa_broadcast ==
1145                              (ifa->ifa_local|~old_mask))) {
1146                                 ifa->ifa_broadcast = (ifa->ifa_local |
1147                                                       ~sin->sin_addr.s_addr);
1148                         }
1149                         inet_insert_ifa(ifa);
1150                 }
1151                 break;
1152         case SIOCKILLADDR:      /* Nuke all connections on this address */
1153                 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
1154                 break;
1155         }
1156 done:
1157         rtnl_unlock();
1158 out:
1159         return ret;
1160 rarok:
1161         rtnl_unlock();
1162         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1163         goto out;
1164 }
1165
1166 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1167 {
1168         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1169         struct in_ifaddr *ifa;
1170         struct ifreq ifr;
1171         int done = 0;
1172
1173         if (!in_dev)
1174                 goto out;
1175
1176         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1177                 if (!buf) {
1178                         done += sizeof(ifr);
1179                         continue;
1180                 }
1181                 if (len < (int) sizeof(ifr))
1182                         break;
1183                 memset(&ifr, 0, sizeof(struct ifreq));
1184                 strcpy(ifr.ifr_name, ifa->ifa_label);
1185
1186                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1187                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1188                                                                 ifa->ifa_local;
1189
1190                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1191                         done = -EFAULT;
1192                         break;
1193                 }
1194                 buf  += sizeof(struct ifreq);
1195                 len  -= sizeof(struct ifreq);
1196                 done += sizeof(struct ifreq);
1197         }
1198 out:
1199         return done;
1200 }
1201
1202 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1203 {
1204         __be32 addr = 0;
1205         struct in_device *in_dev;
1206         struct net *net = dev_net(dev);
1207
1208         rcu_read_lock();
1209         in_dev = __in_dev_get_rcu(dev);
1210         if (!in_dev)
1211                 goto no_in_dev;
1212
1213         for_primary_ifa(in_dev) {
1214                 if (ifa->ifa_scope > scope)
1215                         continue;
1216                 if (!dst || inet_ifa_match(dst, ifa)) {
1217                         addr = ifa->ifa_local;
1218                         break;
1219                 }
1220                 if (!addr)
1221                         addr = ifa->ifa_local;
1222         } endfor_ifa(in_dev);
1223
1224         if (addr)
1225                 goto out_unlock;
1226 no_in_dev:
1227
1228         /* Not loopback addresses on loopback should be preferred
1229            in this case. It is important that lo is the first interface
1230            in dev_base list.
1231          */
1232         for_each_netdev_rcu(net, dev) {
1233                 in_dev = __in_dev_get_rcu(dev);
1234                 if (!in_dev)
1235                         continue;
1236
1237                 for_primary_ifa(in_dev) {
1238                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1239                             ifa->ifa_scope <= scope) {
1240                                 addr = ifa->ifa_local;
1241                                 goto out_unlock;
1242                         }
1243                 } endfor_ifa(in_dev);
1244         }
1245 out_unlock:
1246         rcu_read_unlock();
1247         return addr;
1248 }
1249 EXPORT_SYMBOL(inet_select_addr);
1250
1251 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1252                               __be32 local, int scope)
1253 {
1254         int same = 0;
1255         __be32 addr = 0;
1256
1257         for_ifa(in_dev) {
1258                 if (!addr &&
1259                     (local == ifa->ifa_local || !local) &&
1260                     ifa->ifa_scope <= scope) {
1261                         addr = ifa->ifa_local;
1262                         if (same)
1263                                 break;
1264                 }
1265                 if (!same) {
1266                         same = (!local || inet_ifa_match(local, ifa)) &&
1267                                 (!dst || inet_ifa_match(dst, ifa));
1268                         if (same && addr) {
1269                                 if (local || !dst)
1270                                         break;
1271                                 /* Is the selected addr into dst subnet? */
1272                                 if (inet_ifa_match(addr, ifa))
1273                                         break;
1274                                 /* No, then can we use new local src? */
1275                                 if (ifa->ifa_scope <= scope) {
1276                                         addr = ifa->ifa_local;
1277                                         break;
1278                                 }
1279                                 /* search for large dst subnet for addr */
1280                                 same = 0;
1281                         }
1282                 }
1283         } endfor_ifa(in_dev);
1284
1285         return same ? addr : 0;
1286 }
1287
1288 /*
1289  * Confirm that local IP address exists using wildcards:
1290  * - net: netns to check, cannot be NULL
1291  * - in_dev: only on this interface, NULL=any interface
1292  * - dst: only in the same subnet as dst, 0=any dst
1293  * - local: address, 0=autoselect the local address
1294  * - scope: maximum allowed scope value for the local address
1295  */
1296 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1297                          __be32 dst, __be32 local, int scope)
1298 {
1299         __be32 addr = 0;
1300         struct net_device *dev;
1301
1302         if (in_dev)
1303                 return confirm_addr_indev(in_dev, dst, local, scope);
1304
1305         rcu_read_lock();
1306         for_each_netdev_rcu(net, dev) {
1307                 in_dev = __in_dev_get_rcu(dev);
1308                 if (in_dev) {
1309                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1310                         if (addr)
1311                                 break;
1312                 }
1313         }
1314         rcu_read_unlock();
1315
1316         return addr;
1317 }
1318 EXPORT_SYMBOL(inet_confirm_addr);
1319
1320 /*
1321  *      Device notifier
1322  */
1323
1324 int register_inetaddr_notifier(struct notifier_block *nb)
1325 {
1326         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1327 }
1328 EXPORT_SYMBOL(register_inetaddr_notifier);
1329
1330 int unregister_inetaddr_notifier(struct notifier_block *nb)
1331 {
1332         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1333 }
1334 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1335
1336 /* Rename ifa_labels for a device name change. Make some effort to preserve
1337  * existing alias numbering and to create unique labels if possible.
1338 */
1339 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1340 {
1341         struct in_ifaddr *ifa;
1342         int named = 0;
1343
1344         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1345                 char old[IFNAMSIZ], *dot;
1346
1347                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1348                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1349                 if (named++ == 0)
1350                         goto skip;
1351                 dot = strchr(old, ':');
1352                 if (!dot) {
1353                         sprintf(old, ":%d", named);
1354                         dot = old;
1355                 }
1356                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1357                         strcat(ifa->ifa_label, dot);
1358                 else
1359                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1360 skip:
1361                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1362         }
1363 }
1364
1365 static bool inetdev_valid_mtu(unsigned int mtu)
1366 {
1367         return mtu >= 68;
1368 }
1369
1370 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1371                                         struct in_device *in_dev)
1372
1373 {
1374         struct in_ifaddr *ifa;
1375
1376         for (ifa = in_dev->ifa_list; ifa;
1377              ifa = ifa->ifa_next) {
1378                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1379                          ifa->ifa_local, dev,
1380                          ifa->ifa_local, NULL,
1381                          dev->dev_addr, NULL);
1382         }
1383 }
1384
1385 /* Called only under RTNL semaphore */
1386
1387 static int inetdev_event(struct notifier_block *this, unsigned long event,
1388                          void *ptr)
1389 {
1390         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1391         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1392
1393         ASSERT_RTNL();
1394
1395         if (!in_dev) {
1396                 if (event == NETDEV_REGISTER) {
1397                         in_dev = inetdev_init(dev);
1398                         if (IS_ERR(in_dev))
1399                                 return notifier_from_errno(PTR_ERR(in_dev));
1400                         if (dev->flags & IFF_LOOPBACK) {
1401                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1402                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1403                         }
1404                 } else if (event == NETDEV_CHANGEMTU) {
1405                         /* Re-enabling IP */
1406                         if (inetdev_valid_mtu(dev->mtu))
1407                                 in_dev = inetdev_init(dev);
1408                 }
1409                 goto out;
1410         }
1411
1412         switch (event) {
1413         case NETDEV_REGISTER:
1414                 pr_debug("%s: bug\n", __func__);
1415                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1416                 break;
1417         case NETDEV_UP:
1418                 if (!inetdev_valid_mtu(dev->mtu))
1419                         break;
1420                 if (dev->flags & IFF_LOOPBACK) {
1421                         struct in_ifaddr *ifa = inet_alloc_ifa();
1422
1423                         if (ifa) {
1424                                 INIT_HLIST_NODE(&ifa->hash);
1425                                 ifa->ifa_local =
1426                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1427                                 ifa->ifa_prefixlen = 8;
1428                                 ifa->ifa_mask = inet_make_mask(8);
1429                                 in_dev_hold(in_dev);
1430                                 ifa->ifa_dev = in_dev;
1431                                 ifa->ifa_scope = RT_SCOPE_HOST;
1432                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1433                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1434                                                  INFINITY_LIFE_TIME);
1435                                 ipv4_devconf_setall(in_dev);
1436                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1437                                 inet_insert_ifa(ifa);
1438                         }
1439                 }
1440                 ip_mc_up(in_dev);
1441                 /* fall through */
1442         case NETDEV_CHANGEADDR:
1443                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1444                         break;
1445                 /* fall through */
1446         case NETDEV_NOTIFY_PEERS:
1447                 /* Send gratuitous ARP to notify of link change */
1448                 inetdev_send_gratuitous_arp(dev, in_dev);
1449                 break;
1450         case NETDEV_DOWN:
1451                 ip_mc_down(in_dev);
1452                 break;
1453         case NETDEV_PRE_TYPE_CHANGE:
1454                 ip_mc_unmap(in_dev);
1455                 break;
1456         case NETDEV_POST_TYPE_CHANGE:
1457                 ip_mc_remap(in_dev);
1458                 break;
1459         case NETDEV_CHANGEMTU:
1460                 if (inetdev_valid_mtu(dev->mtu))
1461                         break;
1462                 /* disable IP when MTU is not enough */
1463         case NETDEV_UNREGISTER:
1464                 inetdev_destroy(in_dev);
1465                 break;
1466         case NETDEV_CHANGENAME:
1467                 /* Do not notify about label change, this event is
1468                  * not interesting to applications using netlink.
1469                  */
1470                 inetdev_changename(dev, in_dev);
1471
1472                 devinet_sysctl_unregister(in_dev);
1473                 devinet_sysctl_register(in_dev);
1474                 break;
1475         }
1476 out:
1477         return NOTIFY_DONE;
1478 }
1479
1480 static struct notifier_block ip_netdev_notifier = {
1481         .notifier_call = inetdev_event,
1482 };
1483
1484 static size_t inet_nlmsg_size(void)
1485 {
1486         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1487                + nla_total_size(4) /* IFA_ADDRESS */
1488                + nla_total_size(4) /* IFA_LOCAL */
1489                + nla_total_size(4) /* IFA_BROADCAST */
1490                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1491                + nla_total_size(4)  /* IFA_FLAGS */
1492                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1493 }
1494
1495 static inline u32 cstamp_delta(unsigned long cstamp)
1496 {
1497         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1498 }
1499
1500 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1501                          unsigned long tstamp, u32 preferred, u32 valid)
1502 {
1503         struct ifa_cacheinfo ci;
1504
1505         ci.cstamp = cstamp_delta(cstamp);
1506         ci.tstamp = cstamp_delta(tstamp);
1507         ci.ifa_prefered = preferred;
1508         ci.ifa_valid = valid;
1509
1510         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1511 }
1512
1513 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1514                             u32 portid, u32 seq, int event, unsigned int flags)
1515 {
1516         struct ifaddrmsg *ifm;
1517         struct nlmsghdr  *nlh;
1518         u32 preferred, valid;
1519
1520         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1521         if (!nlh)
1522                 return -EMSGSIZE;
1523
1524         ifm = nlmsg_data(nlh);
1525         ifm->ifa_family = AF_INET;
1526         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1527         ifm->ifa_flags = ifa->ifa_flags;
1528         ifm->ifa_scope = ifa->ifa_scope;
1529         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1530
1531         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1532                 preferred = ifa->ifa_preferred_lft;
1533                 valid = ifa->ifa_valid_lft;
1534                 if (preferred != INFINITY_LIFE_TIME) {
1535                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1536
1537                         if (preferred > tval)
1538                                 preferred -= tval;
1539                         else
1540                                 preferred = 0;
1541                         if (valid != INFINITY_LIFE_TIME) {
1542                                 if (valid > tval)
1543                                         valid -= tval;
1544                                 else
1545                                         valid = 0;
1546                         }
1547                 }
1548         } else {
1549                 preferred = INFINITY_LIFE_TIME;
1550                 valid = INFINITY_LIFE_TIME;
1551         }
1552         if ((ifa->ifa_address &&
1553              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1554             (ifa->ifa_local &&
1555              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1556             (ifa->ifa_broadcast &&
1557              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1558             (ifa->ifa_label[0] &&
1559              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1560             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1561             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1562                           preferred, valid))
1563                 goto nla_put_failure;
1564
1565         nlmsg_end(skb, nlh);
1566         return 0;
1567
1568 nla_put_failure:
1569         nlmsg_cancel(skb, nlh);
1570         return -EMSGSIZE;
1571 }
1572
1573 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1574 {
1575         struct net *net = sock_net(skb->sk);
1576         int h, s_h;
1577         int idx, s_idx;
1578         int ip_idx, s_ip_idx;
1579         struct net_device *dev;
1580         struct in_device *in_dev;
1581         struct in_ifaddr *ifa;
1582         struct hlist_head *head;
1583
1584         s_h = cb->args[0];
1585         s_idx = idx = cb->args[1];
1586         s_ip_idx = ip_idx = cb->args[2];
1587
1588         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1589                 idx = 0;
1590                 head = &net->dev_index_head[h];
1591                 rcu_read_lock();
1592                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1593                           net->dev_base_seq;
1594                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1595                         if (idx < s_idx)
1596                                 goto cont;
1597                         if (h > s_h || idx > s_idx)
1598                                 s_ip_idx = 0;
1599                         in_dev = __in_dev_get_rcu(dev);
1600                         if (!in_dev)
1601                                 goto cont;
1602
1603                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1604                              ifa = ifa->ifa_next, ip_idx++) {
1605                                 if (ip_idx < s_ip_idx)
1606                                         continue;
1607                                 if (inet_fill_ifaddr(skb, ifa,
1608                                              NETLINK_CB(cb->skb).portid,
1609                                              cb->nlh->nlmsg_seq,
1610                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1611                                         rcu_read_unlock();
1612                                         goto done;
1613                                 }
1614                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1615                         }
1616 cont:
1617                         idx++;
1618                 }
1619                 rcu_read_unlock();
1620         }
1621
1622 done:
1623         cb->args[0] = h;
1624         cb->args[1] = idx;
1625         cb->args[2] = ip_idx;
1626
1627         return skb->len;
1628 }
1629
1630 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1631                       u32 portid)
1632 {
1633         struct sk_buff *skb;
1634         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1635         int err = -ENOBUFS;
1636         struct net *net;
1637
1638         net = dev_net(ifa->ifa_dev->dev);
1639         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1640         if (!skb)
1641                 goto errout;
1642
1643         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1644         if (err < 0) {
1645                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1646                 WARN_ON(err == -EMSGSIZE);
1647                 kfree_skb(skb);
1648                 goto errout;
1649         }
1650         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1651         return;
1652 errout:
1653         if (err < 0)
1654                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1655 }
1656
1657 static size_t inet_get_link_af_size(const struct net_device *dev,
1658                                     u32 ext_filter_mask)
1659 {
1660         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1661
1662         if (!in_dev)
1663                 return 0;
1664
1665         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1666 }
1667
1668 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1669                              u32 ext_filter_mask)
1670 {
1671         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1672         struct nlattr *nla;
1673         int i;
1674
1675         if (!in_dev)
1676                 return -ENODATA;
1677
1678         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1679         if (!nla)
1680                 return -EMSGSIZE;
1681
1682         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1683                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1684
1685         return 0;
1686 }
1687
1688 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1689         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1690 };
1691
1692 static int inet_validate_link_af(const struct net_device *dev,
1693                                  const struct nlattr *nla)
1694 {
1695         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1696         int err, rem;
1697
1698         if (dev && !__in_dev_get_rtnl(dev))
1699                 return -EAFNOSUPPORT;
1700
1701         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1702         if (err < 0)
1703                 return err;
1704
1705         if (tb[IFLA_INET_CONF]) {
1706                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1707                         int cfgid = nla_type(a);
1708
1709                         if (nla_len(a) < 4)
1710                                 return -EINVAL;
1711
1712                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1713                                 return -EINVAL;
1714                 }
1715         }
1716
1717         return 0;
1718 }
1719
1720 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1721 {
1722         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1723         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1724         int rem;
1725
1726         if (!in_dev)
1727                 return -EAFNOSUPPORT;
1728
1729         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1730                 BUG();
1731
1732         if (tb[IFLA_INET_CONF]) {
1733                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1734                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1735         }
1736
1737         return 0;
1738 }
1739
1740 static int inet_netconf_msgsize_devconf(int type)
1741 {
1742         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1743                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1744
1745         /* type -1 is used for ALL */
1746         if (type == -1 || type == NETCONFA_FORWARDING)
1747                 size += nla_total_size(4);
1748         if (type == -1 || type == NETCONFA_RP_FILTER)
1749                 size += nla_total_size(4);
1750         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1751                 size += nla_total_size(4);
1752         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1753                 size += nla_total_size(4);
1754         if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1755                 size += nla_total_size(4);
1756
1757         return size;
1758 }
1759
1760 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1761                                      struct ipv4_devconf *devconf, u32 portid,
1762                                      u32 seq, int event, unsigned int flags,
1763                                      int type)
1764 {
1765         struct nlmsghdr  *nlh;
1766         struct netconfmsg *ncm;
1767
1768         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1769                         flags);
1770         if (!nlh)
1771                 return -EMSGSIZE;
1772
1773         ncm = nlmsg_data(nlh);
1774         ncm->ncm_family = AF_INET;
1775
1776         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1777                 goto nla_put_failure;
1778
1779         /* type -1 is used for ALL */
1780         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1781             nla_put_s32(skb, NETCONFA_FORWARDING,
1782                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1783                 goto nla_put_failure;
1784         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1785             nla_put_s32(skb, NETCONFA_RP_FILTER,
1786                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1787                 goto nla_put_failure;
1788         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1789             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1790                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1791                 goto nla_put_failure;
1792         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1793             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1794                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1795                 goto nla_put_failure;
1796         if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1797             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1798                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1799                 goto nla_put_failure;
1800
1801         nlmsg_end(skb, nlh);
1802         return 0;
1803
1804 nla_put_failure:
1805         nlmsg_cancel(skb, nlh);
1806         return -EMSGSIZE;
1807 }
1808
1809 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1810                                  struct ipv4_devconf *devconf)
1811 {
1812         struct sk_buff *skb;
1813         int err = -ENOBUFS;
1814
1815         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1816         if (!skb)
1817                 goto errout;
1818
1819         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1820                                         RTM_NEWNETCONF, 0, type);
1821         if (err < 0) {
1822                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1823                 WARN_ON(err == -EMSGSIZE);
1824                 kfree_skb(skb);
1825                 goto errout;
1826         }
1827         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1828         return;
1829 errout:
1830         if (err < 0)
1831                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1832 }
1833
1834 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1835         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1836         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1837         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1838         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1839         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1840 };
1841
1842 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1843                                     struct nlmsghdr *nlh)
1844 {
1845         struct net *net = sock_net(in_skb->sk);
1846         struct nlattr *tb[NETCONFA_MAX+1];
1847         struct netconfmsg *ncm;
1848         struct sk_buff *skb;
1849         struct ipv4_devconf *devconf;
1850         struct in_device *in_dev;
1851         struct net_device *dev;
1852         int ifindex;
1853         int err;
1854
1855         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1856                           devconf_ipv4_policy);
1857         if (err < 0)
1858                 goto errout;
1859
1860         err = -EINVAL;
1861         if (!tb[NETCONFA_IFINDEX])
1862                 goto errout;
1863
1864         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1865         switch (ifindex) {
1866         case NETCONFA_IFINDEX_ALL:
1867                 devconf = net->ipv4.devconf_all;
1868                 break;
1869         case NETCONFA_IFINDEX_DEFAULT:
1870                 devconf = net->ipv4.devconf_dflt;
1871                 break;
1872         default:
1873                 dev = __dev_get_by_index(net, ifindex);
1874                 if (!dev)
1875                         goto errout;
1876                 in_dev = __in_dev_get_rtnl(dev);
1877                 if (!in_dev)
1878                         goto errout;
1879                 devconf = &in_dev->cnf;
1880                 break;
1881         }
1882
1883         err = -ENOBUFS;
1884         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1885         if (!skb)
1886                 goto errout;
1887
1888         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1889                                         NETLINK_CB(in_skb).portid,
1890                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1891                                         -1);
1892         if (err < 0) {
1893                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1894                 WARN_ON(err == -EMSGSIZE);
1895                 kfree_skb(skb);
1896                 goto errout;
1897         }
1898         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1899 errout:
1900         return err;
1901 }
1902
1903 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1904                                      struct netlink_callback *cb)
1905 {
1906         struct net *net = sock_net(skb->sk);
1907         int h, s_h;
1908         int idx, s_idx;
1909         struct net_device *dev;
1910         struct in_device *in_dev;
1911         struct hlist_head *head;
1912
1913         s_h = cb->args[0];
1914         s_idx = idx = cb->args[1];
1915
1916         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1917                 idx = 0;
1918                 head = &net->dev_index_head[h];
1919                 rcu_read_lock();
1920                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1921                           net->dev_base_seq;
1922                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1923                         if (idx < s_idx)
1924                                 goto cont;
1925                         in_dev = __in_dev_get_rcu(dev);
1926                         if (!in_dev)
1927                                 goto cont;
1928
1929                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1930                                                       &in_dev->cnf,
1931                                                       NETLINK_CB(cb->skb).portid,
1932                                                       cb->nlh->nlmsg_seq,
1933                                                       RTM_NEWNETCONF,
1934                                                       NLM_F_MULTI,
1935                                                       -1) < 0) {
1936                                 rcu_read_unlock();
1937                                 goto done;
1938                         }
1939                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1940 cont:
1941                         idx++;
1942                 }
1943                 rcu_read_unlock();
1944         }
1945         if (h == NETDEV_HASHENTRIES) {
1946                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1947                                               net->ipv4.devconf_all,
1948                                               NETLINK_CB(cb->skb).portid,
1949                                               cb->nlh->nlmsg_seq,
1950                                               RTM_NEWNETCONF, NLM_F_MULTI,
1951                                               -1) < 0)
1952                         goto done;
1953                 else
1954                         h++;
1955         }
1956         if (h == NETDEV_HASHENTRIES + 1) {
1957                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1958                                               net->ipv4.devconf_dflt,
1959                                               NETLINK_CB(cb->skb).portid,
1960                                               cb->nlh->nlmsg_seq,
1961                                               RTM_NEWNETCONF, NLM_F_MULTI,
1962                                               -1) < 0)
1963                         goto done;
1964                 else
1965                         h++;
1966         }
1967 done:
1968         cb->args[0] = h;
1969         cb->args[1] = idx;
1970
1971         return skb->len;
1972 }
1973
1974 #ifdef CONFIG_SYSCTL
1975
1976 static void devinet_copy_dflt_conf(struct net *net, int i)
1977 {
1978         struct net_device *dev;
1979
1980         rcu_read_lock();
1981         for_each_netdev_rcu(net, dev) {
1982                 struct in_device *in_dev;
1983
1984                 in_dev = __in_dev_get_rcu(dev);
1985                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1986                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1987         }
1988         rcu_read_unlock();
1989 }
1990
1991 /* called with RTNL locked */
1992 static void inet_forward_change(struct net *net)
1993 {
1994         struct net_device *dev;
1995         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1996
1997         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1998         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1999         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2000                                     NETCONFA_IFINDEX_ALL,
2001                                     net->ipv4.devconf_all);
2002         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2003                                     NETCONFA_IFINDEX_DEFAULT,
2004                                     net->ipv4.devconf_dflt);
2005
2006         for_each_netdev(net, dev) {
2007                 struct in_device *in_dev;
2008                 if (on)
2009                         dev_disable_lro(dev);
2010                 rcu_read_lock();
2011                 in_dev = __in_dev_get_rcu(dev);
2012                 if (in_dev) {
2013                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2014                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2015                                                     dev->ifindex, &in_dev->cnf);
2016                 }
2017                 rcu_read_unlock();
2018         }
2019 }
2020
2021 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2022 {
2023         if (cnf == net->ipv4.devconf_dflt)
2024                 return NETCONFA_IFINDEX_DEFAULT;
2025         else if (cnf == net->ipv4.devconf_all)
2026                 return NETCONFA_IFINDEX_ALL;
2027         else {
2028                 struct in_device *idev
2029                         = container_of(cnf, struct in_device, cnf);
2030                 return idev->dev->ifindex;
2031         }
2032 }
2033
2034 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2035                              void __user *buffer,
2036                              size_t *lenp, loff_t *ppos)
2037 {
2038         int old_value = *(int *)ctl->data;
2039         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2040         int new_value = *(int *)ctl->data;
2041
2042         if (write) {
2043                 struct ipv4_devconf *cnf = ctl->extra1;
2044                 struct net *net = ctl->extra2;
2045                 int i = (int *)ctl->data - cnf->data;
2046                 int ifindex;
2047
2048                 set_bit(i, cnf->state);
2049
2050                 if (cnf == net->ipv4.devconf_dflt)
2051                         devinet_copy_dflt_conf(net, i);
2052                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2053                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2054                         if ((new_value == 0) && (old_value != 0))
2055                                 rt_cache_flush(net);
2056
2057                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2058                     new_value != old_value) {
2059                         ifindex = devinet_conf_ifindex(net, cnf);
2060                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2061                                                     ifindex, cnf);
2062                 }
2063                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2064                     new_value != old_value) {
2065                         ifindex = devinet_conf_ifindex(net, cnf);
2066                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2067                                                     ifindex, cnf);
2068                 }
2069                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2070                     new_value != old_value) {
2071                         ifindex = devinet_conf_ifindex(net, cnf);
2072                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2073                                                     ifindex, cnf);
2074                 }
2075         }
2076
2077         return ret;
2078 }
2079
2080 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2081                                   void __user *buffer,
2082                                   size_t *lenp, loff_t *ppos)
2083 {
2084         int *valp = ctl->data;
2085         int val = *valp;
2086         loff_t pos = *ppos;
2087         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2088
2089         if (write && *valp != val) {
2090                 struct net *net = ctl->extra2;
2091
2092                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2093                         if (!rtnl_trylock()) {
2094                                 /* Restore the original values before restarting */
2095                                 *valp = val;
2096                                 *ppos = pos;
2097                                 return restart_syscall();
2098                         }
2099                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2100                                 inet_forward_change(net);
2101                         } else {
2102                                 struct ipv4_devconf *cnf = ctl->extra1;
2103                                 struct in_device *idev =
2104                                         container_of(cnf, struct in_device, cnf);
2105                                 if (*valp)
2106                                         dev_disable_lro(idev->dev);
2107                                 inet_netconf_notify_devconf(net,
2108                                                             NETCONFA_FORWARDING,
2109                                                             idev->dev->ifindex,
2110                                                             cnf);
2111                         }
2112                         rtnl_unlock();
2113                         rt_cache_flush(net);
2114                 } else
2115                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2116                                                     NETCONFA_IFINDEX_DEFAULT,
2117                                                     net->ipv4.devconf_dflt);
2118         }
2119
2120         return ret;
2121 }
2122
2123 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2124                                 void __user *buffer,
2125                                 size_t *lenp, loff_t *ppos)
2126 {
2127         int *valp = ctl->data;
2128         int val = *valp;
2129         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2130         struct net *net = ctl->extra2;
2131
2132         if (write && *valp != val)
2133                 rt_cache_flush(net);
2134
2135         return ret;
2136 }
2137
2138 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2139         { \
2140                 .procname       = name, \
2141                 .data           = ipv4_devconf.data + \
2142                                   IPV4_DEVCONF_ ## attr - 1, \
2143                 .maxlen         = sizeof(int), \
2144                 .mode           = mval, \
2145                 .proc_handler   = proc, \
2146                 .extra1         = &ipv4_devconf, \
2147         }
2148
2149 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2150         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2151
2152 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2153         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2154
2155 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2156         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2157
2158 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2159         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2160
2161 static struct devinet_sysctl_table {
2162         struct ctl_table_header *sysctl_header;
2163         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2164 } devinet_sysctl = {
2165         .devinet_vars = {
2166                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2167                                              devinet_sysctl_forward),
2168                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2169
2170                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2171                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2172                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2173                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2174                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2175                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2176                                         "accept_source_route"),
2177                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2178                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2179                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2180                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2181                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2182                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2183                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2184                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2185                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2186                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2187                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2188                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2189                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2190                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2191                                         "force_igmp_version"),
2192                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2193                                         "igmpv2_unsolicited_report_interval"),
2194                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2195                                         "igmpv3_unsolicited_report_interval"),
2196                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2197                                         "ignore_routes_with_linkdown"),
2198
2199                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2200                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2201                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2202                                               "promote_secondaries"),
2203                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2204                                               "route_localnet"),
2205         },
2206 };
2207
2208 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2209                                         struct ipv4_devconf *p)
2210 {
2211         int i;
2212         struct devinet_sysctl_table *t;
2213         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2214
2215         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2216         if (!t)
2217                 goto out;
2218
2219         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2220                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2221                 t->devinet_vars[i].extra1 = p;
2222                 t->devinet_vars[i].extra2 = net;
2223         }
2224
2225         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2226
2227         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2228         if (!t->sysctl_header)
2229                 goto free;
2230
2231         p->sysctl = t;
2232         return 0;
2233
2234 free:
2235         kfree(t);
2236 out:
2237         return -ENOBUFS;
2238 }
2239
2240 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2241 {
2242         struct devinet_sysctl_table *t = cnf->sysctl;
2243
2244         if (!t)
2245                 return;
2246
2247         cnf->sysctl = NULL;
2248         unregister_net_sysctl_table(t->sysctl_header);
2249         kfree(t);
2250 }
2251
2252 static int devinet_sysctl_register(struct in_device *idev)
2253 {
2254         int err;
2255
2256         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2257                 return -EINVAL;
2258
2259         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2260         if (err)
2261                 return err;
2262         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2263                                         &idev->cnf);
2264         if (err)
2265                 neigh_sysctl_unregister(idev->arp_parms);
2266         return err;
2267 }
2268
2269 static void devinet_sysctl_unregister(struct in_device *idev)
2270 {
2271         __devinet_sysctl_unregister(&idev->cnf);
2272         neigh_sysctl_unregister(idev->arp_parms);
2273 }
2274
2275 static struct ctl_table ctl_forward_entry[] = {
2276         {
2277                 .procname       = "ip_forward",
2278                 .data           = &ipv4_devconf.data[
2279                                         IPV4_DEVCONF_FORWARDING - 1],
2280                 .maxlen         = sizeof(int),
2281                 .mode           = 0644,
2282                 .proc_handler   = devinet_sysctl_forward,
2283                 .extra1         = &ipv4_devconf,
2284                 .extra2         = &init_net,
2285         },
2286         { },
2287 };
2288 #endif
2289
2290 static __net_init int devinet_init_net(struct net *net)
2291 {
2292         int err;
2293         struct ipv4_devconf *all, *dflt;
2294 #ifdef CONFIG_SYSCTL
2295         struct ctl_table *tbl = ctl_forward_entry;
2296         struct ctl_table_header *forw_hdr;
2297 #endif
2298
2299         err = -ENOMEM;
2300         all = &ipv4_devconf;
2301         dflt = &ipv4_devconf_dflt;
2302
2303         if (!net_eq(net, &init_net)) {
2304                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2305                 if (!all)
2306                         goto err_alloc_all;
2307
2308                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2309                 if (!dflt)
2310                         goto err_alloc_dflt;
2311
2312 #ifdef CONFIG_SYSCTL
2313                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2314                 if (!tbl)
2315                         goto err_alloc_ctl;
2316
2317                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2318                 tbl[0].extra1 = all;
2319                 tbl[0].extra2 = net;
2320 #endif
2321         }
2322
2323 #ifdef CONFIG_SYSCTL
2324         err = __devinet_sysctl_register(net, "all", all);
2325         if (err < 0)
2326                 goto err_reg_all;
2327
2328         err = __devinet_sysctl_register(net, "default", dflt);
2329         if (err < 0)
2330                 goto err_reg_dflt;
2331
2332         err = -ENOMEM;
2333         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2334         if (!forw_hdr)
2335                 goto err_reg_ctl;
2336         net->ipv4.forw_hdr = forw_hdr;
2337 #endif
2338
2339         net->ipv4.devconf_all = all;
2340         net->ipv4.devconf_dflt = dflt;
2341         return 0;
2342
2343 #ifdef CONFIG_SYSCTL
2344 err_reg_ctl:
2345         __devinet_sysctl_unregister(dflt);
2346 err_reg_dflt:
2347         __devinet_sysctl_unregister(all);
2348 err_reg_all:
2349         if (tbl != ctl_forward_entry)
2350                 kfree(tbl);
2351 err_alloc_ctl:
2352 #endif
2353         if (dflt != &ipv4_devconf_dflt)
2354                 kfree(dflt);
2355 err_alloc_dflt:
2356         if (all != &ipv4_devconf)
2357                 kfree(all);
2358 err_alloc_all:
2359         return err;
2360 }
2361
2362 static __net_exit void devinet_exit_net(struct net *net)
2363 {
2364 #ifdef CONFIG_SYSCTL
2365         struct ctl_table *tbl;
2366
2367         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2368         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2369         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2370         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2371         kfree(tbl);
2372 #endif
2373         kfree(net->ipv4.devconf_dflt);
2374         kfree(net->ipv4.devconf_all);
2375 }
2376
2377 static __net_initdata struct pernet_operations devinet_ops = {
2378         .init = devinet_init_net,
2379         .exit = devinet_exit_net,
2380 };
2381
2382 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2383         .family           = AF_INET,
2384         .fill_link_af     = inet_fill_link_af,
2385         .get_link_af_size = inet_get_link_af_size,
2386         .validate_link_af = inet_validate_link_af,
2387         .set_link_af      = inet_set_link_af,
2388 };
2389
2390 void __init devinet_init(void)
2391 {
2392         int i;
2393
2394         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2395                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2396
2397         register_pernet_subsys(&devinet_ops);
2398
2399         register_gifconf(PF_INET, inet_gifconf);
2400         register_netdevice_notifier(&ip_netdev_notifier);
2401
2402         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2403
2404         rtnl_af_register(&inet_af_ops);
2405
2406         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2407         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2408         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2409         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2410                       inet_netconf_dump_devconf, NULL);
2411 }