netlink: make nlmsg_end() and genlmsg_end() void
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         /* 1. Deleting primary ifaddr forces deletion all secondaries
338          * unless alias promotion is set
339          **/
340
341         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344                 while ((ifa = *ifap1) != NULL) {
345                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346                             ifa1->ifa_scope <= ifa->ifa_scope)
347                                 last_prim = ifa;
348
349                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350                             ifa1->ifa_mask != ifa->ifa_mask ||
351                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
352                                 ifap1 = &ifa->ifa_next;
353                                 prev_prom = ifa;
354                                 continue;
355                         }
356
357                         if (!do_promote) {
358                                 inet_hash_remove(ifa);
359                                 *ifap1 = ifa->ifa_next;
360
361                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362                                 blocking_notifier_call_chain(&inetaddr_chain,
363                                                 NETDEV_DOWN, ifa);
364                                 inet_free_ifa(ifa);
365                         } else {
366                                 promote = ifa;
367                                 break;
368                         }
369                 }
370         }
371
372         /* On promotion all secondaries from subnet are changing
373          * the primary IP, we must remove all their routes silently
374          * and later to add them back with new prefsrc. Do this
375          * while all addresses are on the device list.
376          */
377         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378                 if (ifa1->ifa_mask == ifa->ifa_mask &&
379                     inet_ifa_match(ifa1->ifa_address, ifa))
380                         fib_del_ifaddr(ifa, ifa1);
381         }
382
383         /* 2. Unlink it */
384
385         *ifap = ifa1->ifa_next;
386         inet_hash_remove(ifa1);
387
388         /* 3. Announce address deletion */
389
390         /* Send message first, then call notifier.
391            At first sight, FIB update triggered by notifier
392            will refer to already deleted ifaddr, that could confuse
393            netlink listeners. It is not true: look, gated sees
394            that route deleted and if it still thinks that ifaddr
395            is valid, it will try to restore deleted routes... Grr.
396            So that, this order is correct.
397          */
398         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401         if (promote) {
402                 struct in_ifaddr *next_sec = promote->ifa_next;
403
404                 if (prev_prom) {
405                         prev_prom->ifa_next = promote->ifa_next;
406                         promote->ifa_next = last_prim->ifa_next;
407                         last_prim->ifa_next = promote;
408                 }
409
410                 promote->ifa_flags &= ~IFA_F_SECONDARY;
411                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412                 blocking_notifier_call_chain(&inetaddr_chain,
413                                 NETDEV_UP, promote);
414                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415                         if (ifa1->ifa_mask != ifa->ifa_mask ||
416                             !inet_ifa_match(ifa1->ifa_address, ifa))
417                                         continue;
418                         fib_add_ifaddr(ifa);
419                 }
420
421         }
422         if (destroy)
423                 inet_free_ifa(ifa1);
424 }
425
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427                          int destroy)
428 {
429         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431
432 static void check_lifetime(struct work_struct *work);
433
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437                              u32 portid)
438 {
439         struct in_device *in_dev = ifa->ifa_dev;
440         struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442         ASSERT_RTNL();
443
444         if (!ifa->ifa_local) {
445                 inet_free_ifa(ifa);
446                 return 0;
447         }
448
449         ifa->ifa_flags &= ~IFA_F_SECONDARY;
450         last_primary = &in_dev->ifa_list;
451
452         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453              ifap = &ifa1->ifa_next) {
454                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455                     ifa->ifa_scope <= ifa1->ifa_scope)
456                         last_primary = &ifa1->ifa_next;
457                 if (ifa1->ifa_mask == ifa->ifa_mask &&
458                     inet_ifa_match(ifa1->ifa_address, ifa)) {
459                         if (ifa1->ifa_local == ifa->ifa_local) {
460                                 inet_free_ifa(ifa);
461                                 return -EEXIST;
462                         }
463                         if (ifa1->ifa_scope != ifa->ifa_scope) {
464                                 inet_free_ifa(ifa);
465                                 return -EINVAL;
466                         }
467                         ifa->ifa_flags |= IFA_F_SECONDARY;
468                 }
469         }
470
471         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472                 prandom_seed((__force u32) ifa->ifa_local);
473                 ifap = last_primary;
474         }
475
476         ifa->ifa_next = *ifap;
477         *ifap = ifa;
478
479         inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481         cancel_delayed_work(&check_lifetime_work);
482         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484         /* Send message first, then call notifier.
485            Notifier will trigger FIB update, so that
486            listeners of netlink will know about new ifaddr */
487         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490         return 0;
491 }
492
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495         return __inet_insert_ifa(ifa, NULL, 0);
496 }
497
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500         struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502         ASSERT_RTNL();
503
504         if (!in_dev) {
505                 inet_free_ifa(ifa);
506                 return -ENOBUFS;
507         }
508         ipv4_devconf_setall(in_dev);
509         neigh_parms_data_state_setall(in_dev->arp_parms);
510         if (ifa->ifa_dev != in_dev) {
511                 WARN_ON(ifa->ifa_dev);
512                 in_dev_hold(in_dev);
513                 ifa->ifa_dev = in_dev;
514         }
515         if (ipv4_is_loopback(ifa->ifa_local))
516                 ifa->ifa_scope = RT_SCOPE_HOST;
517         return inet_insert_ifa(ifa);
518 }
519
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525         struct net_device *dev;
526         struct in_device *in_dev = NULL;
527
528         rcu_read_lock();
529         dev = dev_get_by_index_rcu(net, ifindex);
530         if (dev)
531                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532         rcu_read_unlock();
533         return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536
537 /* Called only from RTNL semaphored context. No locks. */
538
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540                                     __be32 mask)
541 {
542         ASSERT_RTNL();
543
544         for_primary_ifa(in_dev) {
545                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546                         return ifa;
547         } endfor_ifa(in_dev);
548         return NULL;
549 }
550
551 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
552 {
553         struct net *net = sock_net(skb->sk);
554         struct nlattr *tb[IFA_MAX+1];
555         struct in_device *in_dev;
556         struct ifaddrmsg *ifm;
557         struct in_ifaddr *ifa, **ifap;
558         int err = -EINVAL;
559
560         ASSERT_RTNL();
561
562         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
563         if (err < 0)
564                 goto errout;
565
566         ifm = nlmsg_data(nlh);
567         in_dev = inetdev_by_index(net, ifm->ifa_index);
568         if (in_dev == NULL) {
569                 err = -ENODEV;
570                 goto errout;
571         }
572
573         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
574              ifap = &ifa->ifa_next) {
575                 if (tb[IFA_LOCAL] &&
576                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
577                         continue;
578
579                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
580                         continue;
581
582                 if (tb[IFA_ADDRESS] &&
583                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
584                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
585                         continue;
586
587                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
588                 return 0;
589         }
590
591         err = -EADDRNOTAVAIL;
592 errout:
593         return err;
594 }
595
596 #define INFINITY_LIFE_TIME      0xFFFFFFFF
597
598 static void check_lifetime(struct work_struct *work)
599 {
600         unsigned long now, next, next_sec, next_sched;
601         struct in_ifaddr *ifa;
602         struct hlist_node *n;
603         int i;
604
605         now = jiffies;
606         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
607
608         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
609                 bool change_needed = false;
610
611                 rcu_read_lock();
612                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
613                         unsigned long age;
614
615                         if (ifa->ifa_flags & IFA_F_PERMANENT)
616                                 continue;
617
618                         /* We try to batch several events at once. */
619                         age = (now - ifa->ifa_tstamp +
620                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
621
622                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
623                             age >= ifa->ifa_valid_lft) {
624                                 change_needed = true;
625                         } else if (ifa->ifa_preferred_lft ==
626                                    INFINITY_LIFE_TIME) {
627                                 continue;
628                         } else if (age >= ifa->ifa_preferred_lft) {
629                                 if (time_before(ifa->ifa_tstamp +
630                                                 ifa->ifa_valid_lft * HZ, next))
631                                         next = ifa->ifa_tstamp +
632                                                ifa->ifa_valid_lft * HZ;
633
634                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
635                                         change_needed = true;
636                         } else if (time_before(ifa->ifa_tstamp +
637                                                ifa->ifa_preferred_lft * HZ,
638                                                next)) {
639                                 next = ifa->ifa_tstamp +
640                                        ifa->ifa_preferred_lft * HZ;
641                         }
642                 }
643                 rcu_read_unlock();
644                 if (!change_needed)
645                         continue;
646                 rtnl_lock();
647                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
648                         unsigned long age;
649
650                         if (ifa->ifa_flags & IFA_F_PERMANENT)
651                                 continue;
652
653                         /* We try to batch several events at once. */
654                         age = (now - ifa->ifa_tstamp +
655                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
656
657                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
658                             age >= ifa->ifa_valid_lft) {
659                                 struct in_ifaddr **ifap;
660
661                                 for (ifap = &ifa->ifa_dev->ifa_list;
662                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
663                                         if (*ifap == ifa) {
664                                                 inet_del_ifa(ifa->ifa_dev,
665                                                              ifap, 1);
666                                                 break;
667                                         }
668                                 }
669                         } else if (ifa->ifa_preferred_lft !=
670                                    INFINITY_LIFE_TIME &&
671                                    age >= ifa->ifa_preferred_lft &&
672                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
673                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
674                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
675                         }
676                 }
677                 rtnl_unlock();
678         }
679
680         next_sec = round_jiffies_up(next);
681         next_sched = next;
682
683         /* If rounded timeout is accurate enough, accept it. */
684         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
685                 next_sched = next_sec;
686
687         now = jiffies;
688         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
689         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
690                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
691
692         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
693                         next_sched - now);
694 }
695
696 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
697                              __u32 prefered_lft)
698 {
699         unsigned long timeout;
700
701         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
702
703         timeout = addrconf_timeout_fixup(valid_lft, HZ);
704         if (addrconf_finite_timeout(timeout))
705                 ifa->ifa_valid_lft = timeout;
706         else
707                 ifa->ifa_flags |= IFA_F_PERMANENT;
708
709         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
710         if (addrconf_finite_timeout(timeout)) {
711                 if (timeout == 0)
712                         ifa->ifa_flags |= IFA_F_DEPRECATED;
713                 ifa->ifa_preferred_lft = timeout;
714         }
715         ifa->ifa_tstamp = jiffies;
716         if (!ifa->ifa_cstamp)
717                 ifa->ifa_cstamp = ifa->ifa_tstamp;
718 }
719
720 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
721                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
722 {
723         struct nlattr *tb[IFA_MAX+1];
724         struct in_ifaddr *ifa;
725         struct ifaddrmsg *ifm;
726         struct net_device *dev;
727         struct in_device *in_dev;
728         int err;
729
730         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
731         if (err < 0)
732                 goto errout;
733
734         ifm = nlmsg_data(nlh);
735         err = -EINVAL;
736         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
737                 goto errout;
738
739         dev = __dev_get_by_index(net, ifm->ifa_index);
740         err = -ENODEV;
741         if (dev == NULL)
742                 goto errout;
743
744         in_dev = __in_dev_get_rtnl(dev);
745         err = -ENOBUFS;
746         if (in_dev == NULL)
747                 goto errout;
748
749         ifa = inet_alloc_ifa();
750         if (ifa == NULL)
751                 /*
752                  * A potential indev allocation can be left alive, it stays
753                  * assigned to its device and is destroy with it.
754                  */
755                 goto errout;
756
757         ipv4_devconf_setall(in_dev);
758         neigh_parms_data_state_setall(in_dev->arp_parms);
759         in_dev_hold(in_dev);
760
761         if (tb[IFA_ADDRESS] == NULL)
762                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
763
764         INIT_HLIST_NODE(&ifa->hash);
765         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
766         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
767         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
768                                          ifm->ifa_flags;
769         ifa->ifa_scope = ifm->ifa_scope;
770         ifa->ifa_dev = in_dev;
771
772         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
773         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
774
775         if (tb[IFA_BROADCAST])
776                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
777
778         if (tb[IFA_LABEL])
779                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
780         else
781                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
782
783         if (tb[IFA_CACHEINFO]) {
784                 struct ifa_cacheinfo *ci;
785
786                 ci = nla_data(tb[IFA_CACHEINFO]);
787                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
788                         err = -EINVAL;
789                         goto errout_free;
790                 }
791                 *pvalid_lft = ci->ifa_valid;
792                 *pprefered_lft = ci->ifa_prefered;
793         }
794
795         return ifa;
796
797 errout_free:
798         inet_free_ifa(ifa);
799 errout:
800         return ERR_PTR(err);
801 }
802
803 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
804 {
805         struct in_device *in_dev = ifa->ifa_dev;
806         struct in_ifaddr *ifa1, **ifap;
807
808         if (!ifa->ifa_local)
809                 return NULL;
810
811         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
812              ifap = &ifa1->ifa_next) {
813                 if (ifa1->ifa_mask == ifa->ifa_mask &&
814                     inet_ifa_match(ifa1->ifa_address, ifa) &&
815                     ifa1->ifa_local == ifa->ifa_local)
816                         return ifa1;
817         }
818         return NULL;
819 }
820
821 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
822 {
823         struct net *net = sock_net(skb->sk);
824         struct in_ifaddr *ifa;
825         struct in_ifaddr *ifa_existing;
826         __u32 valid_lft = INFINITY_LIFE_TIME;
827         __u32 prefered_lft = INFINITY_LIFE_TIME;
828
829         ASSERT_RTNL();
830
831         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
832         if (IS_ERR(ifa))
833                 return PTR_ERR(ifa);
834
835         ifa_existing = find_matching_ifa(ifa);
836         if (!ifa_existing) {
837                 /* It would be best to check for !NLM_F_CREATE here but
838                  * userspace already relies on not having to provide this.
839                  */
840                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
841                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
842         } else {
843                 inet_free_ifa(ifa);
844
845                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
846                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
847                         return -EEXIST;
848                 ifa = ifa_existing;
849                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
850                 cancel_delayed_work(&check_lifetime_work);
851                 queue_delayed_work(system_power_efficient_wq,
852                                 &check_lifetime_work, 0);
853                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
854                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
855         }
856         return 0;
857 }
858
859 /*
860  *      Determine a default network mask, based on the IP address.
861  */
862
863 static int inet_abc_len(__be32 addr)
864 {
865         int rc = -1;    /* Something else, probably a multicast. */
866
867         if (ipv4_is_zeronet(addr))
868                 rc = 0;
869         else {
870                 __u32 haddr = ntohl(addr);
871
872                 if (IN_CLASSA(haddr))
873                         rc = 8;
874                 else if (IN_CLASSB(haddr))
875                         rc = 16;
876                 else if (IN_CLASSC(haddr))
877                         rc = 24;
878         }
879
880         return rc;
881 }
882
883
884 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
885 {
886         struct ifreq ifr;
887         struct sockaddr_in sin_orig;
888         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
889         struct in_device *in_dev;
890         struct in_ifaddr **ifap = NULL;
891         struct in_ifaddr *ifa = NULL;
892         struct net_device *dev;
893         char *colon;
894         int ret = -EFAULT;
895         int tryaddrmatch = 0;
896
897         /*
898          *      Fetch the caller's info block into kernel space
899          */
900
901         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
902                 goto out;
903         ifr.ifr_name[IFNAMSIZ - 1] = 0;
904
905         /* save original address for comparison */
906         memcpy(&sin_orig, sin, sizeof(*sin));
907
908         colon = strchr(ifr.ifr_name, ':');
909         if (colon)
910                 *colon = 0;
911
912         dev_load(net, ifr.ifr_name);
913
914         switch (cmd) {
915         case SIOCGIFADDR:       /* Get interface address */
916         case SIOCGIFBRDADDR:    /* Get the broadcast address */
917         case SIOCGIFDSTADDR:    /* Get the destination address */
918         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
919                 /* Note that these ioctls will not sleep,
920                    so that we do not impose a lock.
921                    One day we will be forced to put shlock here (I mean SMP)
922                  */
923                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
924                 memset(sin, 0, sizeof(*sin));
925                 sin->sin_family = AF_INET;
926                 break;
927
928         case SIOCSIFFLAGS:
929                 ret = -EPERM;
930                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931                         goto out;
932                 break;
933         case SIOCSIFADDR:       /* Set interface address (and family) */
934         case SIOCSIFBRDADDR:    /* Set the broadcast address */
935         case SIOCSIFDSTADDR:    /* Set the destination address */
936         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
937                 ret = -EPERM;
938                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939                         goto out;
940                 ret = -EINVAL;
941                 if (sin->sin_family != AF_INET)
942                         goto out;
943                 break;
944         default:
945                 ret = -EINVAL;
946                 goto out;
947         }
948
949         rtnl_lock();
950
951         ret = -ENODEV;
952         dev = __dev_get_by_name(net, ifr.ifr_name);
953         if (!dev)
954                 goto done;
955
956         if (colon)
957                 *colon = ':';
958
959         in_dev = __in_dev_get_rtnl(dev);
960         if (in_dev) {
961                 if (tryaddrmatch) {
962                         /* Matthias Andree */
963                         /* compare label and address (4.4BSD style) */
964                         /* note: we only do this for a limited set of ioctls
965                            and only if the original address family was AF_INET.
966                            This is checked above. */
967                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
968                              ifap = &ifa->ifa_next) {
969                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
970                                     sin_orig.sin_addr.s_addr ==
971                                                         ifa->ifa_local) {
972                                         break; /* found */
973                                 }
974                         }
975                 }
976                 /* we didn't get a match, maybe the application is
977                    4.3BSD-style and passed in junk so we fall back to
978                    comparing just the label */
979                 if (!ifa) {
980                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
981                              ifap = &ifa->ifa_next)
982                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
983                                         break;
984                 }
985         }
986
987         ret = -EADDRNOTAVAIL;
988         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
989                 goto done;
990
991         switch (cmd) {
992         case SIOCGIFADDR:       /* Get interface address */
993                 sin->sin_addr.s_addr = ifa->ifa_local;
994                 goto rarok;
995
996         case SIOCGIFBRDADDR:    /* Get the broadcast address */
997                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
998                 goto rarok;
999
1000         case SIOCGIFDSTADDR:    /* Get the destination address */
1001                 sin->sin_addr.s_addr = ifa->ifa_address;
1002                 goto rarok;
1003
1004         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1005                 sin->sin_addr.s_addr = ifa->ifa_mask;
1006                 goto rarok;
1007
1008         case SIOCSIFFLAGS:
1009                 if (colon) {
1010                         ret = -EADDRNOTAVAIL;
1011                         if (!ifa)
1012                                 break;
1013                         ret = 0;
1014                         if (!(ifr.ifr_flags & IFF_UP))
1015                                 inet_del_ifa(in_dev, ifap, 1);
1016                         break;
1017                 }
1018                 ret = dev_change_flags(dev, ifr.ifr_flags);
1019                 break;
1020
1021         case SIOCSIFADDR:       /* Set interface address (and family) */
1022                 ret = -EINVAL;
1023                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1024                         break;
1025
1026                 if (!ifa) {
1027                         ret = -ENOBUFS;
1028                         ifa = inet_alloc_ifa();
1029                         if (!ifa)
1030                                 break;
1031                         INIT_HLIST_NODE(&ifa->hash);
1032                         if (colon)
1033                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1034                         else
1035                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1036                 } else {
1037                         ret = 0;
1038                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1039                                 break;
1040                         inet_del_ifa(in_dev, ifap, 0);
1041                         ifa->ifa_broadcast = 0;
1042                         ifa->ifa_scope = 0;
1043                 }
1044
1045                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1046
1047                 if (!(dev->flags & IFF_POINTOPOINT)) {
1048                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1049                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1050                         if ((dev->flags & IFF_BROADCAST) &&
1051                             ifa->ifa_prefixlen < 31)
1052                                 ifa->ifa_broadcast = ifa->ifa_address |
1053                                                      ~ifa->ifa_mask;
1054                 } else {
1055                         ifa->ifa_prefixlen = 32;
1056                         ifa->ifa_mask = inet_make_mask(32);
1057                 }
1058                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1059                 ret = inet_set_ifa(dev, ifa);
1060                 break;
1061
1062         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1063                 ret = 0;
1064                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1065                         inet_del_ifa(in_dev, ifap, 0);
1066                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1067                         inet_insert_ifa(ifa);
1068                 }
1069                 break;
1070
1071         case SIOCSIFDSTADDR:    /* Set the destination address */
1072                 ret = 0;
1073                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1074                         break;
1075                 ret = -EINVAL;
1076                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1077                         break;
1078                 ret = 0;
1079                 inet_del_ifa(in_dev, ifap, 0);
1080                 ifa->ifa_address = sin->sin_addr.s_addr;
1081                 inet_insert_ifa(ifa);
1082                 break;
1083
1084         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1085
1086                 /*
1087                  *      The mask we set must be legal.
1088                  */
1089                 ret = -EINVAL;
1090                 if (bad_mask(sin->sin_addr.s_addr, 0))
1091                         break;
1092                 ret = 0;
1093                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1094                         __be32 old_mask = ifa->ifa_mask;
1095                         inet_del_ifa(in_dev, ifap, 0);
1096                         ifa->ifa_mask = sin->sin_addr.s_addr;
1097                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1098
1099                         /* See if current broadcast address matches
1100                          * with current netmask, then recalculate
1101                          * the broadcast address. Otherwise it's a
1102                          * funny address, so don't touch it since
1103                          * the user seems to know what (s)he's doing...
1104                          */
1105                         if ((dev->flags & IFF_BROADCAST) &&
1106                             (ifa->ifa_prefixlen < 31) &&
1107                             (ifa->ifa_broadcast ==
1108                              (ifa->ifa_local|~old_mask))) {
1109                                 ifa->ifa_broadcast = (ifa->ifa_local |
1110                                                       ~sin->sin_addr.s_addr);
1111                         }
1112                         inet_insert_ifa(ifa);
1113                 }
1114                 break;
1115         }
1116 done:
1117         rtnl_unlock();
1118 out:
1119         return ret;
1120 rarok:
1121         rtnl_unlock();
1122         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1123         goto out;
1124 }
1125
1126 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1127 {
1128         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1129         struct in_ifaddr *ifa;
1130         struct ifreq ifr;
1131         int done = 0;
1132
1133         if (!in_dev)
1134                 goto out;
1135
1136         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1137                 if (!buf) {
1138                         done += sizeof(ifr);
1139                         continue;
1140                 }
1141                 if (len < (int) sizeof(ifr))
1142                         break;
1143                 memset(&ifr, 0, sizeof(struct ifreq));
1144                 strcpy(ifr.ifr_name, ifa->ifa_label);
1145
1146                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1147                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1148                                                                 ifa->ifa_local;
1149
1150                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1151                         done = -EFAULT;
1152                         break;
1153                 }
1154                 buf  += sizeof(struct ifreq);
1155                 len  -= sizeof(struct ifreq);
1156                 done += sizeof(struct ifreq);
1157         }
1158 out:
1159         return done;
1160 }
1161
1162 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1163 {
1164         __be32 addr = 0;
1165         struct in_device *in_dev;
1166         struct net *net = dev_net(dev);
1167
1168         rcu_read_lock();
1169         in_dev = __in_dev_get_rcu(dev);
1170         if (!in_dev)
1171                 goto no_in_dev;
1172
1173         for_primary_ifa(in_dev) {
1174                 if (ifa->ifa_scope > scope)
1175                         continue;
1176                 if (!dst || inet_ifa_match(dst, ifa)) {
1177                         addr = ifa->ifa_local;
1178                         break;
1179                 }
1180                 if (!addr)
1181                         addr = ifa->ifa_local;
1182         } endfor_ifa(in_dev);
1183
1184         if (addr)
1185                 goto out_unlock;
1186 no_in_dev:
1187
1188         /* Not loopback addresses on loopback should be preferred
1189            in this case. It is importnat that lo is the first interface
1190            in dev_base list.
1191          */
1192         for_each_netdev_rcu(net, dev) {
1193                 in_dev = __in_dev_get_rcu(dev);
1194                 if (!in_dev)
1195                         continue;
1196
1197                 for_primary_ifa(in_dev) {
1198                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1199                             ifa->ifa_scope <= scope) {
1200                                 addr = ifa->ifa_local;
1201                                 goto out_unlock;
1202                         }
1203                 } endfor_ifa(in_dev);
1204         }
1205 out_unlock:
1206         rcu_read_unlock();
1207         return addr;
1208 }
1209 EXPORT_SYMBOL(inet_select_addr);
1210
1211 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1212                               __be32 local, int scope)
1213 {
1214         int same = 0;
1215         __be32 addr = 0;
1216
1217         for_ifa(in_dev) {
1218                 if (!addr &&
1219                     (local == ifa->ifa_local || !local) &&
1220                     ifa->ifa_scope <= scope) {
1221                         addr = ifa->ifa_local;
1222                         if (same)
1223                                 break;
1224                 }
1225                 if (!same) {
1226                         same = (!local || inet_ifa_match(local, ifa)) &&
1227                                 (!dst || inet_ifa_match(dst, ifa));
1228                         if (same && addr) {
1229                                 if (local || !dst)
1230                                         break;
1231                                 /* Is the selected addr into dst subnet? */
1232                                 if (inet_ifa_match(addr, ifa))
1233                                         break;
1234                                 /* No, then can we use new local src? */
1235                                 if (ifa->ifa_scope <= scope) {
1236                                         addr = ifa->ifa_local;
1237                                         break;
1238                                 }
1239                                 /* search for large dst subnet for addr */
1240                                 same = 0;
1241                         }
1242                 }
1243         } endfor_ifa(in_dev);
1244
1245         return same ? addr : 0;
1246 }
1247
1248 /*
1249  * Confirm that local IP address exists using wildcards:
1250  * - net: netns to check, cannot be NULL
1251  * - in_dev: only on this interface, NULL=any interface
1252  * - dst: only in the same subnet as dst, 0=any dst
1253  * - local: address, 0=autoselect the local address
1254  * - scope: maximum allowed scope value for the local address
1255  */
1256 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1257                          __be32 dst, __be32 local, int scope)
1258 {
1259         __be32 addr = 0;
1260         struct net_device *dev;
1261
1262         if (in_dev != NULL)
1263                 return confirm_addr_indev(in_dev, dst, local, scope);
1264
1265         rcu_read_lock();
1266         for_each_netdev_rcu(net, dev) {
1267                 in_dev = __in_dev_get_rcu(dev);
1268                 if (in_dev) {
1269                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1270                         if (addr)
1271                                 break;
1272                 }
1273         }
1274         rcu_read_unlock();
1275
1276         return addr;
1277 }
1278 EXPORT_SYMBOL(inet_confirm_addr);
1279
1280 /*
1281  *      Device notifier
1282  */
1283
1284 int register_inetaddr_notifier(struct notifier_block *nb)
1285 {
1286         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1287 }
1288 EXPORT_SYMBOL(register_inetaddr_notifier);
1289
1290 int unregister_inetaddr_notifier(struct notifier_block *nb)
1291 {
1292         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1293 }
1294 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1295
1296 /* Rename ifa_labels for a device name change. Make some effort to preserve
1297  * existing alias numbering and to create unique labels if possible.
1298 */
1299 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1300 {
1301         struct in_ifaddr *ifa;
1302         int named = 0;
1303
1304         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1305                 char old[IFNAMSIZ], *dot;
1306
1307                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1308                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1309                 if (named++ == 0)
1310                         goto skip;
1311                 dot = strchr(old, ':');
1312                 if (dot == NULL) {
1313                         sprintf(old, ":%d", named);
1314                         dot = old;
1315                 }
1316                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1317                         strcat(ifa->ifa_label, dot);
1318                 else
1319                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1320 skip:
1321                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1322         }
1323 }
1324
1325 static bool inetdev_valid_mtu(unsigned int mtu)
1326 {
1327         return mtu >= 68;
1328 }
1329
1330 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1331                                         struct in_device *in_dev)
1332
1333 {
1334         struct in_ifaddr *ifa;
1335
1336         for (ifa = in_dev->ifa_list; ifa;
1337              ifa = ifa->ifa_next) {
1338                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1339                          ifa->ifa_local, dev,
1340                          ifa->ifa_local, NULL,
1341                          dev->dev_addr, NULL);
1342         }
1343 }
1344
1345 /* Called only under RTNL semaphore */
1346
1347 static int inetdev_event(struct notifier_block *this, unsigned long event,
1348                          void *ptr)
1349 {
1350         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1351         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1352
1353         ASSERT_RTNL();
1354
1355         if (!in_dev) {
1356                 if (event == NETDEV_REGISTER) {
1357                         in_dev = inetdev_init(dev);
1358                         if (IS_ERR(in_dev))
1359                                 return notifier_from_errno(PTR_ERR(in_dev));
1360                         if (dev->flags & IFF_LOOPBACK) {
1361                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1362                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1363                         }
1364                 } else if (event == NETDEV_CHANGEMTU) {
1365                         /* Re-enabling IP */
1366                         if (inetdev_valid_mtu(dev->mtu))
1367                                 in_dev = inetdev_init(dev);
1368                 }
1369                 goto out;
1370         }
1371
1372         switch (event) {
1373         case NETDEV_REGISTER:
1374                 pr_debug("%s: bug\n", __func__);
1375                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1376                 break;
1377         case NETDEV_UP:
1378                 if (!inetdev_valid_mtu(dev->mtu))
1379                         break;
1380                 if (dev->flags & IFF_LOOPBACK) {
1381                         struct in_ifaddr *ifa = inet_alloc_ifa();
1382
1383                         if (ifa) {
1384                                 INIT_HLIST_NODE(&ifa->hash);
1385                                 ifa->ifa_local =
1386                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1387                                 ifa->ifa_prefixlen = 8;
1388                                 ifa->ifa_mask = inet_make_mask(8);
1389                                 in_dev_hold(in_dev);
1390                                 ifa->ifa_dev = in_dev;
1391                                 ifa->ifa_scope = RT_SCOPE_HOST;
1392                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1393                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1394                                                  INFINITY_LIFE_TIME);
1395                                 ipv4_devconf_setall(in_dev);
1396                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1397                                 inet_insert_ifa(ifa);
1398                         }
1399                 }
1400                 ip_mc_up(in_dev);
1401                 /* fall through */
1402         case NETDEV_CHANGEADDR:
1403                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1404                         break;
1405                 /* fall through */
1406         case NETDEV_NOTIFY_PEERS:
1407                 /* Send gratuitous ARP to notify of link change */
1408                 inetdev_send_gratuitous_arp(dev, in_dev);
1409                 break;
1410         case NETDEV_DOWN:
1411                 ip_mc_down(in_dev);
1412                 break;
1413         case NETDEV_PRE_TYPE_CHANGE:
1414                 ip_mc_unmap(in_dev);
1415                 break;
1416         case NETDEV_POST_TYPE_CHANGE:
1417                 ip_mc_remap(in_dev);
1418                 break;
1419         case NETDEV_CHANGEMTU:
1420                 if (inetdev_valid_mtu(dev->mtu))
1421                         break;
1422                 /* disable IP when MTU is not enough */
1423         case NETDEV_UNREGISTER:
1424                 inetdev_destroy(in_dev);
1425                 break;
1426         case NETDEV_CHANGENAME:
1427                 /* Do not notify about label change, this event is
1428                  * not interesting to applications using netlink.
1429                  */
1430                 inetdev_changename(dev, in_dev);
1431
1432                 devinet_sysctl_unregister(in_dev);
1433                 devinet_sysctl_register(in_dev);
1434                 break;
1435         }
1436 out:
1437         return NOTIFY_DONE;
1438 }
1439
1440 static struct notifier_block ip_netdev_notifier = {
1441         .notifier_call = inetdev_event,
1442 };
1443
1444 static size_t inet_nlmsg_size(void)
1445 {
1446         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1447                + nla_total_size(4) /* IFA_ADDRESS */
1448                + nla_total_size(4) /* IFA_LOCAL */
1449                + nla_total_size(4) /* IFA_BROADCAST */
1450                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1451                + nla_total_size(4)  /* IFA_FLAGS */
1452                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1453 }
1454
1455 static inline u32 cstamp_delta(unsigned long cstamp)
1456 {
1457         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1458 }
1459
1460 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1461                          unsigned long tstamp, u32 preferred, u32 valid)
1462 {
1463         struct ifa_cacheinfo ci;
1464
1465         ci.cstamp = cstamp_delta(cstamp);
1466         ci.tstamp = cstamp_delta(tstamp);
1467         ci.ifa_prefered = preferred;
1468         ci.ifa_valid = valid;
1469
1470         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1471 }
1472
1473 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1474                             u32 portid, u32 seq, int event, unsigned int flags)
1475 {
1476         struct ifaddrmsg *ifm;
1477         struct nlmsghdr  *nlh;
1478         u32 preferred, valid;
1479
1480         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1481         if (nlh == NULL)
1482                 return -EMSGSIZE;
1483
1484         ifm = nlmsg_data(nlh);
1485         ifm->ifa_family = AF_INET;
1486         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1487         ifm->ifa_flags = ifa->ifa_flags;
1488         ifm->ifa_scope = ifa->ifa_scope;
1489         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1490
1491         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1492                 preferred = ifa->ifa_preferred_lft;
1493                 valid = ifa->ifa_valid_lft;
1494                 if (preferred != INFINITY_LIFE_TIME) {
1495                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1496
1497                         if (preferred > tval)
1498                                 preferred -= tval;
1499                         else
1500                                 preferred = 0;
1501                         if (valid != INFINITY_LIFE_TIME) {
1502                                 if (valid > tval)
1503                                         valid -= tval;
1504                                 else
1505                                         valid = 0;
1506                         }
1507                 }
1508         } else {
1509                 preferred = INFINITY_LIFE_TIME;
1510                 valid = INFINITY_LIFE_TIME;
1511         }
1512         if ((ifa->ifa_address &&
1513              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1514             (ifa->ifa_local &&
1515              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1516             (ifa->ifa_broadcast &&
1517              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1518             (ifa->ifa_label[0] &&
1519              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1520             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1521             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1522                           preferred, valid))
1523                 goto nla_put_failure;
1524
1525         nlmsg_end(skb, nlh);
1526         return 0;
1527
1528 nla_put_failure:
1529         nlmsg_cancel(skb, nlh);
1530         return -EMSGSIZE;
1531 }
1532
1533 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1534 {
1535         struct net *net = sock_net(skb->sk);
1536         int h, s_h;
1537         int idx, s_idx;
1538         int ip_idx, s_ip_idx;
1539         struct net_device *dev;
1540         struct in_device *in_dev;
1541         struct in_ifaddr *ifa;
1542         struct hlist_head *head;
1543
1544         s_h = cb->args[0];
1545         s_idx = idx = cb->args[1];
1546         s_ip_idx = ip_idx = cb->args[2];
1547
1548         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1549                 idx = 0;
1550                 head = &net->dev_index_head[h];
1551                 rcu_read_lock();
1552                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1553                           net->dev_base_seq;
1554                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1555                         if (idx < s_idx)
1556                                 goto cont;
1557                         if (h > s_h || idx > s_idx)
1558                                 s_ip_idx = 0;
1559                         in_dev = __in_dev_get_rcu(dev);
1560                         if (!in_dev)
1561                                 goto cont;
1562
1563                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1564                              ifa = ifa->ifa_next, ip_idx++) {
1565                                 if (ip_idx < s_ip_idx)
1566                                         continue;
1567                                 if (inet_fill_ifaddr(skb, ifa,
1568                                              NETLINK_CB(cb->skb).portid,
1569                                              cb->nlh->nlmsg_seq,
1570                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1571                                         rcu_read_unlock();
1572                                         goto done;
1573                                 }
1574                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1575                         }
1576 cont:
1577                         idx++;
1578                 }
1579                 rcu_read_unlock();
1580         }
1581
1582 done:
1583         cb->args[0] = h;
1584         cb->args[1] = idx;
1585         cb->args[2] = ip_idx;
1586
1587         return skb->len;
1588 }
1589
1590 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1591                       u32 portid)
1592 {
1593         struct sk_buff *skb;
1594         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1595         int err = -ENOBUFS;
1596         struct net *net;
1597
1598         net = dev_net(ifa->ifa_dev->dev);
1599         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1600         if (skb == NULL)
1601                 goto errout;
1602
1603         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1604         if (err < 0) {
1605                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1606                 WARN_ON(err == -EMSGSIZE);
1607                 kfree_skb(skb);
1608                 goto errout;
1609         }
1610         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1611         return;
1612 errout:
1613         if (err < 0)
1614                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1615 }
1616
1617 static size_t inet_get_link_af_size(const struct net_device *dev)
1618 {
1619         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1620
1621         if (!in_dev)
1622                 return 0;
1623
1624         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1625 }
1626
1627 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1628 {
1629         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1630         struct nlattr *nla;
1631         int i;
1632
1633         if (!in_dev)
1634                 return -ENODATA;
1635
1636         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1637         if (nla == NULL)
1638                 return -EMSGSIZE;
1639
1640         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1641                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1642
1643         return 0;
1644 }
1645
1646 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1647         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1648 };
1649
1650 static int inet_validate_link_af(const struct net_device *dev,
1651                                  const struct nlattr *nla)
1652 {
1653         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1654         int err, rem;
1655
1656         if (dev && !__in_dev_get_rtnl(dev))
1657                 return -EAFNOSUPPORT;
1658
1659         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1660         if (err < 0)
1661                 return err;
1662
1663         if (tb[IFLA_INET_CONF]) {
1664                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1665                         int cfgid = nla_type(a);
1666
1667                         if (nla_len(a) < 4)
1668                                 return -EINVAL;
1669
1670                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1671                                 return -EINVAL;
1672                 }
1673         }
1674
1675         return 0;
1676 }
1677
1678 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1679 {
1680         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1681         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1682         int rem;
1683
1684         if (!in_dev)
1685                 return -EAFNOSUPPORT;
1686
1687         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1688                 BUG();
1689
1690         if (tb[IFLA_INET_CONF]) {
1691                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1692                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1693         }
1694
1695         return 0;
1696 }
1697
1698 static int inet_netconf_msgsize_devconf(int type)
1699 {
1700         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1701                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1702
1703         /* type -1 is used for ALL */
1704         if (type == -1 || type == NETCONFA_FORWARDING)
1705                 size += nla_total_size(4);
1706         if (type == -1 || type == NETCONFA_RP_FILTER)
1707                 size += nla_total_size(4);
1708         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1709                 size += nla_total_size(4);
1710         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1711                 size += nla_total_size(4);
1712
1713         return size;
1714 }
1715
1716 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1717                                      struct ipv4_devconf *devconf, u32 portid,
1718                                      u32 seq, int event, unsigned int flags,
1719                                      int type)
1720 {
1721         struct nlmsghdr  *nlh;
1722         struct netconfmsg *ncm;
1723
1724         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1725                         flags);
1726         if (nlh == NULL)
1727                 return -EMSGSIZE;
1728
1729         ncm = nlmsg_data(nlh);
1730         ncm->ncm_family = AF_INET;
1731
1732         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1733                 goto nla_put_failure;
1734
1735         /* type -1 is used for ALL */
1736         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1737             nla_put_s32(skb, NETCONFA_FORWARDING,
1738                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1739                 goto nla_put_failure;
1740         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1741             nla_put_s32(skb, NETCONFA_RP_FILTER,
1742                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1743                 goto nla_put_failure;
1744         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1745             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1746                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1747                 goto nla_put_failure;
1748         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1749             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1750                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1751                 goto nla_put_failure;
1752
1753         nlmsg_end(skb, nlh);
1754         return 0;
1755
1756 nla_put_failure:
1757         nlmsg_cancel(skb, nlh);
1758         return -EMSGSIZE;
1759 }
1760
1761 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1762                                  struct ipv4_devconf *devconf)
1763 {
1764         struct sk_buff *skb;
1765         int err = -ENOBUFS;
1766
1767         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1768         if (skb == NULL)
1769                 goto errout;
1770
1771         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1772                                         RTM_NEWNETCONF, 0, type);
1773         if (err < 0) {
1774                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1775                 WARN_ON(err == -EMSGSIZE);
1776                 kfree_skb(skb);
1777                 goto errout;
1778         }
1779         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1780         return;
1781 errout:
1782         if (err < 0)
1783                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1784 }
1785
1786 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1787         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1788         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1789         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1790         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1791 };
1792
1793 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1794                                     struct nlmsghdr *nlh)
1795 {
1796         struct net *net = sock_net(in_skb->sk);
1797         struct nlattr *tb[NETCONFA_MAX+1];
1798         struct netconfmsg *ncm;
1799         struct sk_buff *skb;
1800         struct ipv4_devconf *devconf;
1801         struct in_device *in_dev;
1802         struct net_device *dev;
1803         int ifindex;
1804         int err;
1805
1806         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1807                           devconf_ipv4_policy);
1808         if (err < 0)
1809                 goto errout;
1810
1811         err = EINVAL;
1812         if (!tb[NETCONFA_IFINDEX])
1813                 goto errout;
1814
1815         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1816         switch (ifindex) {
1817         case NETCONFA_IFINDEX_ALL:
1818                 devconf = net->ipv4.devconf_all;
1819                 break;
1820         case NETCONFA_IFINDEX_DEFAULT:
1821                 devconf = net->ipv4.devconf_dflt;
1822                 break;
1823         default:
1824                 dev = __dev_get_by_index(net, ifindex);
1825                 if (dev == NULL)
1826                         goto errout;
1827                 in_dev = __in_dev_get_rtnl(dev);
1828                 if (in_dev == NULL)
1829                         goto errout;
1830                 devconf = &in_dev->cnf;
1831                 break;
1832         }
1833
1834         err = -ENOBUFS;
1835         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1836         if (skb == NULL)
1837                 goto errout;
1838
1839         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1840                                         NETLINK_CB(in_skb).portid,
1841                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1842                                         -1);
1843         if (err < 0) {
1844                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1845                 WARN_ON(err == -EMSGSIZE);
1846                 kfree_skb(skb);
1847                 goto errout;
1848         }
1849         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1850 errout:
1851         return err;
1852 }
1853
1854 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1855                                      struct netlink_callback *cb)
1856 {
1857         struct net *net = sock_net(skb->sk);
1858         int h, s_h;
1859         int idx, s_idx;
1860         struct net_device *dev;
1861         struct in_device *in_dev;
1862         struct hlist_head *head;
1863
1864         s_h = cb->args[0];
1865         s_idx = idx = cb->args[1];
1866
1867         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1868                 idx = 0;
1869                 head = &net->dev_index_head[h];
1870                 rcu_read_lock();
1871                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1872                           net->dev_base_seq;
1873                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1874                         if (idx < s_idx)
1875                                 goto cont;
1876                         in_dev = __in_dev_get_rcu(dev);
1877                         if (!in_dev)
1878                                 goto cont;
1879
1880                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1881                                                       &in_dev->cnf,
1882                                                       NETLINK_CB(cb->skb).portid,
1883                                                       cb->nlh->nlmsg_seq,
1884                                                       RTM_NEWNETCONF,
1885                                                       NLM_F_MULTI,
1886                                                       -1) <= 0) {
1887                                 rcu_read_unlock();
1888                                 goto done;
1889                         }
1890                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1891 cont:
1892                         idx++;
1893                 }
1894                 rcu_read_unlock();
1895         }
1896         if (h == NETDEV_HASHENTRIES) {
1897                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1898                                               net->ipv4.devconf_all,
1899                                               NETLINK_CB(cb->skb).portid,
1900                                               cb->nlh->nlmsg_seq,
1901                                               RTM_NEWNETCONF, NLM_F_MULTI,
1902                                               -1) <= 0)
1903                         goto done;
1904                 else
1905                         h++;
1906         }
1907         if (h == NETDEV_HASHENTRIES + 1) {
1908                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1909                                               net->ipv4.devconf_dflt,
1910                                               NETLINK_CB(cb->skb).portid,
1911                                               cb->nlh->nlmsg_seq,
1912                                               RTM_NEWNETCONF, NLM_F_MULTI,
1913                                               -1) <= 0)
1914                         goto done;
1915                 else
1916                         h++;
1917         }
1918 done:
1919         cb->args[0] = h;
1920         cb->args[1] = idx;
1921
1922         return skb->len;
1923 }
1924
1925 #ifdef CONFIG_SYSCTL
1926
1927 static void devinet_copy_dflt_conf(struct net *net, int i)
1928 {
1929         struct net_device *dev;
1930
1931         rcu_read_lock();
1932         for_each_netdev_rcu(net, dev) {
1933                 struct in_device *in_dev;
1934
1935                 in_dev = __in_dev_get_rcu(dev);
1936                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1937                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1938         }
1939         rcu_read_unlock();
1940 }
1941
1942 /* called with RTNL locked */
1943 static void inet_forward_change(struct net *net)
1944 {
1945         struct net_device *dev;
1946         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1947
1948         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1949         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1950         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1951                                     NETCONFA_IFINDEX_ALL,
1952                                     net->ipv4.devconf_all);
1953         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1954                                     NETCONFA_IFINDEX_DEFAULT,
1955                                     net->ipv4.devconf_dflt);
1956
1957         for_each_netdev(net, dev) {
1958                 struct in_device *in_dev;
1959                 if (on)
1960                         dev_disable_lro(dev);
1961                 rcu_read_lock();
1962                 in_dev = __in_dev_get_rcu(dev);
1963                 if (in_dev) {
1964                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1965                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1966                                                     dev->ifindex, &in_dev->cnf);
1967                 }
1968                 rcu_read_unlock();
1969         }
1970 }
1971
1972 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1973 {
1974         if (cnf == net->ipv4.devconf_dflt)
1975                 return NETCONFA_IFINDEX_DEFAULT;
1976         else if (cnf == net->ipv4.devconf_all)
1977                 return NETCONFA_IFINDEX_ALL;
1978         else {
1979                 struct in_device *idev
1980                         = container_of(cnf, struct in_device, cnf);
1981                 return idev->dev->ifindex;
1982         }
1983 }
1984
1985 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1986                              void __user *buffer,
1987                              size_t *lenp, loff_t *ppos)
1988 {
1989         int old_value = *(int *)ctl->data;
1990         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1991         int new_value = *(int *)ctl->data;
1992
1993         if (write) {
1994                 struct ipv4_devconf *cnf = ctl->extra1;
1995                 struct net *net = ctl->extra2;
1996                 int i = (int *)ctl->data - cnf->data;
1997                 int ifindex;
1998
1999                 set_bit(i, cnf->state);
2000
2001                 if (cnf == net->ipv4.devconf_dflt)
2002                         devinet_copy_dflt_conf(net, i);
2003                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2004                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2005                         if ((new_value == 0) && (old_value != 0))
2006                                 rt_cache_flush(net);
2007
2008                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2009                     new_value != old_value) {
2010                         ifindex = devinet_conf_ifindex(net, cnf);
2011                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2012                                                     ifindex, cnf);
2013                 }
2014                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2015                     new_value != old_value) {
2016                         ifindex = devinet_conf_ifindex(net, cnf);
2017                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2018                                                     ifindex, cnf);
2019                 }
2020         }
2021
2022         return ret;
2023 }
2024
2025 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2026                                   void __user *buffer,
2027                                   size_t *lenp, loff_t *ppos)
2028 {
2029         int *valp = ctl->data;
2030         int val = *valp;
2031         loff_t pos = *ppos;
2032         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2033
2034         if (write && *valp != val) {
2035                 struct net *net = ctl->extra2;
2036
2037                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2038                         if (!rtnl_trylock()) {
2039                                 /* Restore the original values before restarting */
2040                                 *valp = val;
2041                                 *ppos = pos;
2042                                 return restart_syscall();
2043                         }
2044                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2045                                 inet_forward_change(net);
2046                         } else {
2047                                 struct ipv4_devconf *cnf = ctl->extra1;
2048                                 struct in_device *idev =
2049                                         container_of(cnf, struct in_device, cnf);
2050                                 if (*valp)
2051                                         dev_disable_lro(idev->dev);
2052                                 inet_netconf_notify_devconf(net,
2053                                                             NETCONFA_FORWARDING,
2054                                                             idev->dev->ifindex,
2055                                                             cnf);
2056                         }
2057                         rtnl_unlock();
2058                         rt_cache_flush(net);
2059                 } else
2060                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2061                                                     NETCONFA_IFINDEX_DEFAULT,
2062                                                     net->ipv4.devconf_dflt);
2063         }
2064
2065         return ret;
2066 }
2067
2068 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2069                                 void __user *buffer,
2070                                 size_t *lenp, loff_t *ppos)
2071 {
2072         int *valp = ctl->data;
2073         int val = *valp;
2074         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2075         struct net *net = ctl->extra2;
2076
2077         if (write && *valp != val)
2078                 rt_cache_flush(net);
2079
2080         return ret;
2081 }
2082
2083 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2084         { \
2085                 .procname       = name, \
2086                 .data           = ipv4_devconf.data + \
2087                                   IPV4_DEVCONF_ ## attr - 1, \
2088                 .maxlen         = sizeof(int), \
2089                 .mode           = mval, \
2090                 .proc_handler   = proc, \
2091                 .extra1         = &ipv4_devconf, \
2092         }
2093
2094 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2095         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2096
2097 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2098         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2099
2100 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2101         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2102
2103 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2104         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2105
2106 static struct devinet_sysctl_table {
2107         struct ctl_table_header *sysctl_header;
2108         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2109 } devinet_sysctl = {
2110         .devinet_vars = {
2111                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2112                                              devinet_sysctl_forward),
2113                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2114
2115                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2116                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2117                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2118                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2119                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2120                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2121                                         "accept_source_route"),
2122                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2123                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2124                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2125                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2126                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2127                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2128                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2129                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2130                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2131                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2132                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2133                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2134                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2135                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2136                                         "force_igmp_version"),
2137                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2138                                         "igmpv2_unsolicited_report_interval"),
2139                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2140                                         "igmpv3_unsolicited_report_interval"),
2141
2142                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2143                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2144                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2145                                               "promote_secondaries"),
2146                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2147                                               "route_localnet"),
2148         },
2149 };
2150
2151 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2152                                         struct ipv4_devconf *p)
2153 {
2154         int i;
2155         struct devinet_sysctl_table *t;
2156         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2157
2158         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2159         if (!t)
2160                 goto out;
2161
2162         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2163                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2164                 t->devinet_vars[i].extra1 = p;
2165                 t->devinet_vars[i].extra2 = net;
2166         }
2167
2168         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2169
2170         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2171         if (!t->sysctl_header)
2172                 goto free;
2173
2174         p->sysctl = t;
2175         return 0;
2176
2177 free:
2178         kfree(t);
2179 out:
2180         return -ENOBUFS;
2181 }
2182
2183 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2184 {
2185         struct devinet_sysctl_table *t = cnf->sysctl;
2186
2187         if (t == NULL)
2188                 return;
2189
2190         cnf->sysctl = NULL;
2191         unregister_net_sysctl_table(t->sysctl_header);
2192         kfree(t);
2193 }
2194
2195 static int devinet_sysctl_register(struct in_device *idev)
2196 {
2197         int err;
2198
2199         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2200                 return -EINVAL;
2201
2202         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2203         if (err)
2204                 return err;
2205         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2206                                         &idev->cnf);
2207         if (err)
2208                 neigh_sysctl_unregister(idev->arp_parms);
2209         return err;
2210 }
2211
2212 static void devinet_sysctl_unregister(struct in_device *idev)
2213 {
2214         __devinet_sysctl_unregister(&idev->cnf);
2215         neigh_sysctl_unregister(idev->arp_parms);
2216 }
2217
2218 static struct ctl_table ctl_forward_entry[] = {
2219         {
2220                 .procname       = "ip_forward",
2221                 .data           = &ipv4_devconf.data[
2222                                         IPV4_DEVCONF_FORWARDING - 1],
2223                 .maxlen         = sizeof(int),
2224                 .mode           = 0644,
2225                 .proc_handler   = devinet_sysctl_forward,
2226                 .extra1         = &ipv4_devconf,
2227                 .extra2         = &init_net,
2228         },
2229         { },
2230 };
2231 #endif
2232
2233 static __net_init int devinet_init_net(struct net *net)
2234 {
2235         int err;
2236         struct ipv4_devconf *all, *dflt;
2237 #ifdef CONFIG_SYSCTL
2238         struct ctl_table *tbl = ctl_forward_entry;
2239         struct ctl_table_header *forw_hdr;
2240 #endif
2241
2242         err = -ENOMEM;
2243         all = &ipv4_devconf;
2244         dflt = &ipv4_devconf_dflt;
2245
2246         if (!net_eq(net, &init_net)) {
2247                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2248                 if (all == NULL)
2249                         goto err_alloc_all;
2250
2251                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2252                 if (dflt == NULL)
2253                         goto err_alloc_dflt;
2254
2255 #ifdef CONFIG_SYSCTL
2256                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2257                 if (tbl == NULL)
2258                         goto err_alloc_ctl;
2259
2260                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2261                 tbl[0].extra1 = all;
2262                 tbl[0].extra2 = net;
2263 #endif
2264         }
2265
2266 #ifdef CONFIG_SYSCTL
2267         err = __devinet_sysctl_register(net, "all", all);
2268         if (err < 0)
2269                 goto err_reg_all;
2270
2271         err = __devinet_sysctl_register(net, "default", dflt);
2272         if (err < 0)
2273                 goto err_reg_dflt;
2274
2275         err = -ENOMEM;
2276         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2277         if (forw_hdr == NULL)
2278                 goto err_reg_ctl;
2279         net->ipv4.forw_hdr = forw_hdr;
2280 #endif
2281
2282         net->ipv4.devconf_all = all;
2283         net->ipv4.devconf_dflt = dflt;
2284         return 0;
2285
2286 #ifdef CONFIG_SYSCTL
2287 err_reg_ctl:
2288         __devinet_sysctl_unregister(dflt);
2289 err_reg_dflt:
2290         __devinet_sysctl_unregister(all);
2291 err_reg_all:
2292         if (tbl != ctl_forward_entry)
2293                 kfree(tbl);
2294 err_alloc_ctl:
2295 #endif
2296         if (dflt != &ipv4_devconf_dflt)
2297                 kfree(dflt);
2298 err_alloc_dflt:
2299         if (all != &ipv4_devconf)
2300                 kfree(all);
2301 err_alloc_all:
2302         return err;
2303 }
2304
2305 static __net_exit void devinet_exit_net(struct net *net)
2306 {
2307 #ifdef CONFIG_SYSCTL
2308         struct ctl_table *tbl;
2309
2310         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2311         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2312         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2313         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2314         kfree(tbl);
2315 #endif
2316         kfree(net->ipv4.devconf_dflt);
2317         kfree(net->ipv4.devconf_all);
2318 }
2319
2320 static __net_initdata struct pernet_operations devinet_ops = {
2321         .init = devinet_init_net,
2322         .exit = devinet_exit_net,
2323 };
2324
2325 static struct rtnl_af_ops inet_af_ops = {
2326         .family           = AF_INET,
2327         .fill_link_af     = inet_fill_link_af,
2328         .get_link_af_size = inet_get_link_af_size,
2329         .validate_link_af = inet_validate_link_af,
2330         .set_link_af      = inet_set_link_af,
2331 };
2332
2333 void __init devinet_init(void)
2334 {
2335         int i;
2336
2337         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2338                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2339
2340         register_pernet_subsys(&devinet_ops);
2341
2342         register_gifconf(PF_INET, inet_gifconf);
2343         register_netdevice_notifier(&ip_netdev_notifier);
2344
2345         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2346
2347         rtnl_af_register(&inet_af_ops);
2348
2349         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2350         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2351         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2352         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2353                       inet_netconf_dump_devconf, NULL);
2354 }
2355