ipv4: L3 hash-based multipath
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         /* 1. Deleting primary ifaddr forces deletion all secondaries
338          * unless alias promotion is set
339          **/
340
341         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344                 while ((ifa = *ifap1) != NULL) {
345                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346                             ifa1->ifa_scope <= ifa->ifa_scope)
347                                 last_prim = ifa;
348
349                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350                             ifa1->ifa_mask != ifa->ifa_mask ||
351                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
352                                 ifap1 = &ifa->ifa_next;
353                                 prev_prom = ifa;
354                                 continue;
355                         }
356
357                         if (!do_promote) {
358                                 inet_hash_remove(ifa);
359                                 *ifap1 = ifa->ifa_next;
360
361                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362                                 blocking_notifier_call_chain(&inetaddr_chain,
363                                                 NETDEV_DOWN, ifa);
364                                 inet_free_ifa(ifa);
365                         } else {
366                                 promote = ifa;
367                                 break;
368                         }
369                 }
370         }
371
372         /* On promotion all secondaries from subnet are changing
373          * the primary IP, we must remove all their routes silently
374          * and later to add them back with new prefsrc. Do this
375          * while all addresses are on the device list.
376          */
377         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378                 if (ifa1->ifa_mask == ifa->ifa_mask &&
379                     inet_ifa_match(ifa1->ifa_address, ifa))
380                         fib_del_ifaddr(ifa, ifa1);
381         }
382
383         /* 2. Unlink it */
384
385         *ifap = ifa1->ifa_next;
386         inet_hash_remove(ifa1);
387
388         /* 3. Announce address deletion */
389
390         /* Send message first, then call notifier.
391            At first sight, FIB update triggered by notifier
392            will refer to already deleted ifaddr, that could confuse
393            netlink listeners. It is not true: look, gated sees
394            that route deleted and if it still thinks that ifaddr
395            is valid, it will try to restore deleted routes... Grr.
396            So that, this order is correct.
397          */
398         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401         if (promote) {
402                 struct in_ifaddr *next_sec = promote->ifa_next;
403
404                 if (prev_prom) {
405                         prev_prom->ifa_next = promote->ifa_next;
406                         promote->ifa_next = last_prim->ifa_next;
407                         last_prim->ifa_next = promote;
408                 }
409
410                 promote->ifa_flags &= ~IFA_F_SECONDARY;
411                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412                 blocking_notifier_call_chain(&inetaddr_chain,
413                                 NETDEV_UP, promote);
414                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415                         if (ifa1->ifa_mask != ifa->ifa_mask ||
416                             !inet_ifa_match(ifa1->ifa_address, ifa))
417                                         continue;
418                         fib_add_ifaddr(ifa);
419                 }
420
421         }
422         if (destroy)
423                 inet_free_ifa(ifa1);
424 }
425
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427                          int destroy)
428 {
429         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431
432 static void check_lifetime(struct work_struct *work);
433
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437                              u32 portid)
438 {
439         struct in_device *in_dev = ifa->ifa_dev;
440         struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442         ASSERT_RTNL();
443
444         if (!ifa->ifa_local) {
445                 inet_free_ifa(ifa);
446                 return 0;
447         }
448
449         ifa->ifa_flags &= ~IFA_F_SECONDARY;
450         last_primary = &in_dev->ifa_list;
451
452         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453              ifap = &ifa1->ifa_next) {
454                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455                     ifa->ifa_scope <= ifa1->ifa_scope)
456                         last_primary = &ifa1->ifa_next;
457                 if (ifa1->ifa_mask == ifa->ifa_mask &&
458                     inet_ifa_match(ifa1->ifa_address, ifa)) {
459                         if (ifa1->ifa_local == ifa->ifa_local) {
460                                 inet_free_ifa(ifa);
461                                 return -EEXIST;
462                         }
463                         if (ifa1->ifa_scope != ifa->ifa_scope) {
464                                 inet_free_ifa(ifa);
465                                 return -EINVAL;
466                         }
467                         ifa->ifa_flags |= IFA_F_SECONDARY;
468                 }
469         }
470
471         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472                 prandom_seed((__force u32) ifa->ifa_local);
473                 ifap = last_primary;
474         }
475
476         ifa->ifa_next = *ifap;
477         *ifap = ifa;
478
479         inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481         cancel_delayed_work(&check_lifetime_work);
482         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484         /* Send message first, then call notifier.
485            Notifier will trigger FIB update, so that
486            listeners of netlink will know about new ifaddr */
487         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490         return 0;
491 }
492
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495         return __inet_insert_ifa(ifa, NULL, 0);
496 }
497
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500         struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502         ASSERT_RTNL();
503
504         if (!in_dev) {
505                 inet_free_ifa(ifa);
506                 return -ENOBUFS;
507         }
508         ipv4_devconf_setall(in_dev);
509         neigh_parms_data_state_setall(in_dev->arp_parms);
510         if (ifa->ifa_dev != in_dev) {
511                 WARN_ON(ifa->ifa_dev);
512                 in_dev_hold(in_dev);
513                 ifa->ifa_dev = in_dev;
514         }
515         if (ipv4_is_loopback(ifa->ifa_local))
516                 ifa->ifa_scope = RT_SCOPE_HOST;
517         return inet_insert_ifa(ifa);
518 }
519
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525         struct net_device *dev;
526         struct in_device *in_dev = NULL;
527
528         rcu_read_lock();
529         dev = dev_get_by_index_rcu(net, ifindex);
530         if (dev)
531                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532         rcu_read_unlock();
533         return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536
537 /* Called only from RTNL semaphored context. No locks. */
538
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540                                     __be32 mask)
541 {
542         ASSERT_RTNL();
543
544         for_primary_ifa(in_dev) {
545                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546                         return ifa;
547         } endfor_ifa(in_dev);
548         return NULL;
549 }
550
551 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
552 {
553         struct ip_mreqn mreq = {
554                 .imr_multiaddr.s_addr = ifa->ifa_address,
555                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
556         };
557         int ret;
558
559         ASSERT_RTNL();
560
561         lock_sock(sk);
562         if (join)
563                 ret = ip_mc_join_group(sk, &mreq);
564         else
565                 ret = ip_mc_leave_group(sk, &mreq);
566         release_sock(sk);
567
568         return ret;
569 }
570
571 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
572 {
573         struct net *net = sock_net(skb->sk);
574         struct nlattr *tb[IFA_MAX+1];
575         struct in_device *in_dev;
576         struct ifaddrmsg *ifm;
577         struct in_ifaddr *ifa, **ifap;
578         int err = -EINVAL;
579
580         ASSERT_RTNL();
581
582         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
583         if (err < 0)
584                 goto errout;
585
586         ifm = nlmsg_data(nlh);
587         in_dev = inetdev_by_index(net, ifm->ifa_index);
588         if (!in_dev) {
589                 err = -ENODEV;
590                 goto errout;
591         }
592
593         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
594              ifap = &ifa->ifa_next) {
595                 if (tb[IFA_LOCAL] &&
596                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
597                         continue;
598
599                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
600                         continue;
601
602                 if (tb[IFA_ADDRESS] &&
603                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
604                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
605                         continue;
606
607                 if (ipv4_is_multicast(ifa->ifa_address))
608                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
609                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
610                 return 0;
611         }
612
613         err = -EADDRNOTAVAIL;
614 errout:
615         return err;
616 }
617
618 #define INFINITY_LIFE_TIME      0xFFFFFFFF
619
620 static void check_lifetime(struct work_struct *work)
621 {
622         unsigned long now, next, next_sec, next_sched;
623         struct in_ifaddr *ifa;
624         struct hlist_node *n;
625         int i;
626
627         now = jiffies;
628         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
629
630         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
631                 bool change_needed = false;
632
633                 rcu_read_lock();
634                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
635                         unsigned long age;
636
637                         if (ifa->ifa_flags & IFA_F_PERMANENT)
638                                 continue;
639
640                         /* We try to batch several events at once. */
641                         age = (now - ifa->ifa_tstamp +
642                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
643
644                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
645                             age >= ifa->ifa_valid_lft) {
646                                 change_needed = true;
647                         } else if (ifa->ifa_preferred_lft ==
648                                    INFINITY_LIFE_TIME) {
649                                 continue;
650                         } else if (age >= ifa->ifa_preferred_lft) {
651                                 if (time_before(ifa->ifa_tstamp +
652                                                 ifa->ifa_valid_lft * HZ, next))
653                                         next = ifa->ifa_tstamp +
654                                                ifa->ifa_valid_lft * HZ;
655
656                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
657                                         change_needed = true;
658                         } else if (time_before(ifa->ifa_tstamp +
659                                                ifa->ifa_preferred_lft * HZ,
660                                                next)) {
661                                 next = ifa->ifa_tstamp +
662                                        ifa->ifa_preferred_lft * HZ;
663                         }
664                 }
665                 rcu_read_unlock();
666                 if (!change_needed)
667                         continue;
668                 rtnl_lock();
669                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
670                         unsigned long age;
671
672                         if (ifa->ifa_flags & IFA_F_PERMANENT)
673                                 continue;
674
675                         /* We try to batch several events at once. */
676                         age = (now - ifa->ifa_tstamp +
677                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
678
679                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
680                             age >= ifa->ifa_valid_lft) {
681                                 struct in_ifaddr **ifap;
682
683                                 for (ifap = &ifa->ifa_dev->ifa_list;
684                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
685                                         if (*ifap == ifa) {
686                                                 inet_del_ifa(ifa->ifa_dev,
687                                                              ifap, 1);
688                                                 break;
689                                         }
690                                 }
691                         } else if (ifa->ifa_preferred_lft !=
692                                    INFINITY_LIFE_TIME &&
693                                    age >= ifa->ifa_preferred_lft &&
694                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
695                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
696                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
697                         }
698                 }
699                 rtnl_unlock();
700         }
701
702         next_sec = round_jiffies_up(next);
703         next_sched = next;
704
705         /* If rounded timeout is accurate enough, accept it. */
706         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
707                 next_sched = next_sec;
708
709         now = jiffies;
710         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
711         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
712                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
713
714         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
715                         next_sched - now);
716 }
717
718 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
719                              __u32 prefered_lft)
720 {
721         unsigned long timeout;
722
723         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
724
725         timeout = addrconf_timeout_fixup(valid_lft, HZ);
726         if (addrconf_finite_timeout(timeout))
727                 ifa->ifa_valid_lft = timeout;
728         else
729                 ifa->ifa_flags |= IFA_F_PERMANENT;
730
731         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
732         if (addrconf_finite_timeout(timeout)) {
733                 if (timeout == 0)
734                         ifa->ifa_flags |= IFA_F_DEPRECATED;
735                 ifa->ifa_preferred_lft = timeout;
736         }
737         ifa->ifa_tstamp = jiffies;
738         if (!ifa->ifa_cstamp)
739                 ifa->ifa_cstamp = ifa->ifa_tstamp;
740 }
741
742 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
743                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
744 {
745         struct nlattr *tb[IFA_MAX+1];
746         struct in_ifaddr *ifa;
747         struct ifaddrmsg *ifm;
748         struct net_device *dev;
749         struct in_device *in_dev;
750         int err;
751
752         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
753         if (err < 0)
754                 goto errout;
755
756         ifm = nlmsg_data(nlh);
757         err = -EINVAL;
758         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
759                 goto errout;
760
761         dev = __dev_get_by_index(net, ifm->ifa_index);
762         err = -ENODEV;
763         if (!dev)
764                 goto errout;
765
766         in_dev = __in_dev_get_rtnl(dev);
767         err = -ENOBUFS;
768         if (!in_dev)
769                 goto errout;
770
771         ifa = inet_alloc_ifa();
772         if (!ifa)
773                 /*
774                  * A potential indev allocation can be left alive, it stays
775                  * assigned to its device and is destroy with it.
776                  */
777                 goto errout;
778
779         ipv4_devconf_setall(in_dev);
780         neigh_parms_data_state_setall(in_dev->arp_parms);
781         in_dev_hold(in_dev);
782
783         if (!tb[IFA_ADDRESS])
784                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
785
786         INIT_HLIST_NODE(&ifa->hash);
787         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
788         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
789         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
790                                          ifm->ifa_flags;
791         ifa->ifa_scope = ifm->ifa_scope;
792         ifa->ifa_dev = in_dev;
793
794         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
795         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
796
797         if (tb[IFA_BROADCAST])
798                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
799
800         if (tb[IFA_LABEL])
801                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
802         else
803                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
804
805         if (tb[IFA_CACHEINFO]) {
806                 struct ifa_cacheinfo *ci;
807
808                 ci = nla_data(tb[IFA_CACHEINFO]);
809                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
810                         err = -EINVAL;
811                         goto errout_free;
812                 }
813                 *pvalid_lft = ci->ifa_valid;
814                 *pprefered_lft = ci->ifa_prefered;
815         }
816
817         return ifa;
818
819 errout_free:
820         inet_free_ifa(ifa);
821 errout:
822         return ERR_PTR(err);
823 }
824
825 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
826 {
827         struct in_device *in_dev = ifa->ifa_dev;
828         struct in_ifaddr *ifa1, **ifap;
829
830         if (!ifa->ifa_local)
831                 return NULL;
832
833         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
834              ifap = &ifa1->ifa_next) {
835                 if (ifa1->ifa_mask == ifa->ifa_mask &&
836                     inet_ifa_match(ifa1->ifa_address, ifa) &&
837                     ifa1->ifa_local == ifa->ifa_local)
838                         return ifa1;
839         }
840         return NULL;
841 }
842
843 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
844 {
845         struct net *net = sock_net(skb->sk);
846         struct in_ifaddr *ifa;
847         struct in_ifaddr *ifa_existing;
848         __u32 valid_lft = INFINITY_LIFE_TIME;
849         __u32 prefered_lft = INFINITY_LIFE_TIME;
850
851         ASSERT_RTNL();
852
853         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
854         if (IS_ERR(ifa))
855                 return PTR_ERR(ifa);
856
857         ifa_existing = find_matching_ifa(ifa);
858         if (!ifa_existing) {
859                 /* It would be best to check for !NLM_F_CREATE here but
860                  * userspace already relies on not having to provide this.
861                  */
862                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
863                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
864                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
865                                                true, ifa);
866
867                         if (ret < 0) {
868                                 inet_free_ifa(ifa);
869                                 return ret;
870                         }
871                 }
872                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
873         } else {
874                 inet_free_ifa(ifa);
875
876                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
877                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
878                         return -EEXIST;
879                 ifa = ifa_existing;
880                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
881                 cancel_delayed_work(&check_lifetime_work);
882                 queue_delayed_work(system_power_efficient_wq,
883                                 &check_lifetime_work, 0);
884                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885         }
886         return 0;
887 }
888
889 /*
890  *      Determine a default network mask, based on the IP address.
891  */
892
893 static int inet_abc_len(__be32 addr)
894 {
895         int rc = -1;    /* Something else, probably a multicast. */
896
897         if (ipv4_is_zeronet(addr))
898                 rc = 0;
899         else {
900                 __u32 haddr = ntohl(addr);
901
902                 if (IN_CLASSA(haddr))
903                         rc = 8;
904                 else if (IN_CLASSB(haddr))
905                         rc = 16;
906                 else if (IN_CLASSC(haddr))
907                         rc = 24;
908         }
909
910         return rc;
911 }
912
913
914 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
915 {
916         struct ifreq ifr;
917         struct sockaddr_in sin_orig;
918         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
919         struct in_device *in_dev;
920         struct in_ifaddr **ifap = NULL;
921         struct in_ifaddr *ifa = NULL;
922         struct net_device *dev;
923         char *colon;
924         int ret = -EFAULT;
925         int tryaddrmatch = 0;
926
927         /*
928          *      Fetch the caller's info block into kernel space
929          */
930
931         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
932                 goto out;
933         ifr.ifr_name[IFNAMSIZ - 1] = 0;
934
935         /* save original address for comparison */
936         memcpy(&sin_orig, sin, sizeof(*sin));
937
938         colon = strchr(ifr.ifr_name, ':');
939         if (colon)
940                 *colon = 0;
941
942         dev_load(net, ifr.ifr_name);
943
944         switch (cmd) {
945         case SIOCGIFADDR:       /* Get interface address */
946         case SIOCGIFBRDADDR:    /* Get the broadcast address */
947         case SIOCGIFDSTADDR:    /* Get the destination address */
948         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
949                 /* Note that these ioctls will not sleep,
950                    so that we do not impose a lock.
951                    One day we will be forced to put shlock here (I mean SMP)
952                  */
953                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
954                 memset(sin, 0, sizeof(*sin));
955                 sin->sin_family = AF_INET;
956                 break;
957
958         case SIOCSIFFLAGS:
959                 ret = -EPERM;
960                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
961                         goto out;
962                 break;
963         case SIOCSIFADDR:       /* Set interface address (and family) */
964         case SIOCSIFBRDADDR:    /* Set the broadcast address */
965         case SIOCSIFDSTADDR:    /* Set the destination address */
966         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
967                 ret = -EPERM;
968                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
969                         goto out;
970                 ret = -EINVAL;
971                 if (sin->sin_family != AF_INET)
972                         goto out;
973                 break;
974         default:
975                 ret = -EINVAL;
976                 goto out;
977         }
978
979         rtnl_lock();
980
981         ret = -ENODEV;
982         dev = __dev_get_by_name(net, ifr.ifr_name);
983         if (!dev)
984                 goto done;
985
986         if (colon)
987                 *colon = ':';
988
989         in_dev = __in_dev_get_rtnl(dev);
990         if (in_dev) {
991                 if (tryaddrmatch) {
992                         /* Matthias Andree */
993                         /* compare label and address (4.4BSD style) */
994                         /* note: we only do this for a limited set of ioctls
995                            and only if the original address family was AF_INET.
996                            This is checked above. */
997                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
998                              ifap = &ifa->ifa_next) {
999                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1000                                     sin_orig.sin_addr.s_addr ==
1001                                                         ifa->ifa_local) {
1002                                         break; /* found */
1003                                 }
1004                         }
1005                 }
1006                 /* we didn't get a match, maybe the application is
1007                    4.3BSD-style and passed in junk so we fall back to
1008                    comparing just the label */
1009                 if (!ifa) {
1010                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1011                              ifap = &ifa->ifa_next)
1012                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1013                                         break;
1014                 }
1015         }
1016
1017         ret = -EADDRNOTAVAIL;
1018         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1019                 goto done;
1020
1021         switch (cmd) {
1022         case SIOCGIFADDR:       /* Get interface address */
1023                 sin->sin_addr.s_addr = ifa->ifa_local;
1024                 goto rarok;
1025
1026         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1027                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1028                 goto rarok;
1029
1030         case SIOCGIFDSTADDR:    /* Get the destination address */
1031                 sin->sin_addr.s_addr = ifa->ifa_address;
1032                 goto rarok;
1033
1034         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1035                 sin->sin_addr.s_addr = ifa->ifa_mask;
1036                 goto rarok;
1037
1038         case SIOCSIFFLAGS:
1039                 if (colon) {
1040                         ret = -EADDRNOTAVAIL;
1041                         if (!ifa)
1042                                 break;
1043                         ret = 0;
1044                         if (!(ifr.ifr_flags & IFF_UP))
1045                                 inet_del_ifa(in_dev, ifap, 1);
1046                         break;
1047                 }
1048                 ret = dev_change_flags(dev, ifr.ifr_flags);
1049                 break;
1050
1051         case SIOCSIFADDR:       /* Set interface address (and family) */
1052                 ret = -EINVAL;
1053                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1054                         break;
1055
1056                 if (!ifa) {
1057                         ret = -ENOBUFS;
1058                         ifa = inet_alloc_ifa();
1059                         if (!ifa)
1060                                 break;
1061                         INIT_HLIST_NODE(&ifa->hash);
1062                         if (colon)
1063                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1064                         else
1065                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1066                 } else {
1067                         ret = 0;
1068                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1069                                 break;
1070                         inet_del_ifa(in_dev, ifap, 0);
1071                         ifa->ifa_broadcast = 0;
1072                         ifa->ifa_scope = 0;
1073                 }
1074
1075                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1076
1077                 if (!(dev->flags & IFF_POINTOPOINT)) {
1078                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1079                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1080                         if ((dev->flags & IFF_BROADCAST) &&
1081                             ifa->ifa_prefixlen < 31)
1082                                 ifa->ifa_broadcast = ifa->ifa_address |
1083                                                      ~ifa->ifa_mask;
1084                 } else {
1085                         ifa->ifa_prefixlen = 32;
1086                         ifa->ifa_mask = inet_make_mask(32);
1087                 }
1088                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1089                 ret = inet_set_ifa(dev, ifa);
1090                 break;
1091
1092         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1093                 ret = 0;
1094                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1095                         inet_del_ifa(in_dev, ifap, 0);
1096                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1097                         inet_insert_ifa(ifa);
1098                 }
1099                 break;
1100
1101         case SIOCSIFDSTADDR:    /* Set the destination address */
1102                 ret = 0;
1103                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1104                         break;
1105                 ret = -EINVAL;
1106                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1107                         break;
1108                 ret = 0;
1109                 inet_del_ifa(in_dev, ifap, 0);
1110                 ifa->ifa_address = sin->sin_addr.s_addr;
1111                 inet_insert_ifa(ifa);
1112                 break;
1113
1114         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1115
1116                 /*
1117                  *      The mask we set must be legal.
1118                  */
1119                 ret = -EINVAL;
1120                 if (bad_mask(sin->sin_addr.s_addr, 0))
1121                         break;
1122                 ret = 0;
1123                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1124                         __be32 old_mask = ifa->ifa_mask;
1125                         inet_del_ifa(in_dev, ifap, 0);
1126                         ifa->ifa_mask = sin->sin_addr.s_addr;
1127                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1128
1129                         /* See if current broadcast address matches
1130                          * with current netmask, then recalculate
1131                          * the broadcast address. Otherwise it's a
1132                          * funny address, so don't touch it since
1133                          * the user seems to know what (s)he's doing...
1134                          */
1135                         if ((dev->flags & IFF_BROADCAST) &&
1136                             (ifa->ifa_prefixlen < 31) &&
1137                             (ifa->ifa_broadcast ==
1138                              (ifa->ifa_local|~old_mask))) {
1139                                 ifa->ifa_broadcast = (ifa->ifa_local |
1140                                                       ~sin->sin_addr.s_addr);
1141                         }
1142                         inet_insert_ifa(ifa);
1143                 }
1144                 break;
1145         }
1146 done:
1147         rtnl_unlock();
1148 out:
1149         return ret;
1150 rarok:
1151         rtnl_unlock();
1152         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1153         goto out;
1154 }
1155
1156 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1157 {
1158         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1159         struct in_ifaddr *ifa;
1160         struct ifreq ifr;
1161         int done = 0;
1162
1163         if (!in_dev)
1164                 goto out;
1165
1166         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1167                 if (!buf) {
1168                         done += sizeof(ifr);
1169                         continue;
1170                 }
1171                 if (len < (int) sizeof(ifr))
1172                         break;
1173                 memset(&ifr, 0, sizeof(struct ifreq));
1174                 strcpy(ifr.ifr_name, ifa->ifa_label);
1175
1176                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1177                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1178                                                                 ifa->ifa_local;
1179
1180                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1181                         done = -EFAULT;
1182                         break;
1183                 }
1184                 buf  += sizeof(struct ifreq);
1185                 len  -= sizeof(struct ifreq);
1186                 done += sizeof(struct ifreq);
1187         }
1188 out:
1189         return done;
1190 }
1191
1192 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1193 {
1194         __be32 addr = 0;
1195         struct in_device *in_dev;
1196         struct net *net = dev_net(dev);
1197
1198         rcu_read_lock();
1199         in_dev = __in_dev_get_rcu(dev);
1200         if (!in_dev)
1201                 goto no_in_dev;
1202
1203         for_primary_ifa(in_dev) {
1204                 if (ifa->ifa_scope > scope)
1205                         continue;
1206                 if (!dst || inet_ifa_match(dst, ifa)) {
1207                         addr = ifa->ifa_local;
1208                         break;
1209                 }
1210                 if (!addr)
1211                         addr = ifa->ifa_local;
1212         } endfor_ifa(in_dev);
1213
1214         if (addr)
1215                 goto out_unlock;
1216 no_in_dev:
1217
1218         /* Not loopback addresses on loopback should be preferred
1219            in this case. It is important that lo is the first interface
1220            in dev_base list.
1221          */
1222         for_each_netdev_rcu(net, dev) {
1223                 in_dev = __in_dev_get_rcu(dev);
1224                 if (!in_dev)
1225                         continue;
1226
1227                 for_primary_ifa(in_dev) {
1228                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1229                             ifa->ifa_scope <= scope) {
1230                                 addr = ifa->ifa_local;
1231                                 goto out_unlock;
1232                         }
1233                 } endfor_ifa(in_dev);
1234         }
1235 out_unlock:
1236         rcu_read_unlock();
1237         return addr;
1238 }
1239 EXPORT_SYMBOL(inet_select_addr);
1240
1241 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1242                               __be32 local, int scope)
1243 {
1244         int same = 0;
1245         __be32 addr = 0;
1246
1247         for_ifa(in_dev) {
1248                 if (!addr &&
1249                     (local == ifa->ifa_local || !local) &&
1250                     ifa->ifa_scope <= scope) {
1251                         addr = ifa->ifa_local;
1252                         if (same)
1253                                 break;
1254                 }
1255                 if (!same) {
1256                         same = (!local || inet_ifa_match(local, ifa)) &&
1257                                 (!dst || inet_ifa_match(dst, ifa));
1258                         if (same && addr) {
1259                                 if (local || !dst)
1260                                         break;
1261                                 /* Is the selected addr into dst subnet? */
1262                                 if (inet_ifa_match(addr, ifa))
1263                                         break;
1264                                 /* No, then can we use new local src? */
1265                                 if (ifa->ifa_scope <= scope) {
1266                                         addr = ifa->ifa_local;
1267                                         break;
1268                                 }
1269                                 /* search for large dst subnet for addr */
1270                                 same = 0;
1271                         }
1272                 }
1273         } endfor_ifa(in_dev);
1274
1275         return same ? addr : 0;
1276 }
1277
1278 /*
1279  * Confirm that local IP address exists using wildcards:
1280  * - net: netns to check, cannot be NULL
1281  * - in_dev: only on this interface, NULL=any interface
1282  * - dst: only in the same subnet as dst, 0=any dst
1283  * - local: address, 0=autoselect the local address
1284  * - scope: maximum allowed scope value for the local address
1285  */
1286 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1287                          __be32 dst, __be32 local, int scope)
1288 {
1289         __be32 addr = 0;
1290         struct net_device *dev;
1291
1292         if (in_dev)
1293                 return confirm_addr_indev(in_dev, dst, local, scope);
1294
1295         rcu_read_lock();
1296         for_each_netdev_rcu(net, dev) {
1297                 in_dev = __in_dev_get_rcu(dev);
1298                 if (in_dev) {
1299                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1300                         if (addr)
1301                                 break;
1302                 }
1303         }
1304         rcu_read_unlock();
1305
1306         return addr;
1307 }
1308 EXPORT_SYMBOL(inet_confirm_addr);
1309
1310 /*
1311  *      Device notifier
1312  */
1313
1314 int register_inetaddr_notifier(struct notifier_block *nb)
1315 {
1316         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1317 }
1318 EXPORT_SYMBOL(register_inetaddr_notifier);
1319
1320 int unregister_inetaddr_notifier(struct notifier_block *nb)
1321 {
1322         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1323 }
1324 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1325
1326 /* Rename ifa_labels for a device name change. Make some effort to preserve
1327  * existing alias numbering and to create unique labels if possible.
1328 */
1329 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1330 {
1331         struct in_ifaddr *ifa;
1332         int named = 0;
1333
1334         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1335                 char old[IFNAMSIZ], *dot;
1336
1337                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1338                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1339                 if (named++ == 0)
1340                         goto skip;
1341                 dot = strchr(old, ':');
1342                 if (!dot) {
1343                         sprintf(old, ":%d", named);
1344                         dot = old;
1345                 }
1346                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1347                         strcat(ifa->ifa_label, dot);
1348                 else
1349                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1350 skip:
1351                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1352         }
1353 }
1354
1355 static bool inetdev_valid_mtu(unsigned int mtu)
1356 {
1357         return mtu >= 68;
1358 }
1359
1360 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1361                                         struct in_device *in_dev)
1362
1363 {
1364         struct in_ifaddr *ifa;
1365
1366         for (ifa = in_dev->ifa_list; ifa;
1367              ifa = ifa->ifa_next) {
1368                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1369                          ifa->ifa_local, dev,
1370                          ifa->ifa_local, NULL,
1371                          dev->dev_addr, NULL);
1372         }
1373 }
1374
1375 /* Called only under RTNL semaphore */
1376
1377 static int inetdev_event(struct notifier_block *this, unsigned long event,
1378                          void *ptr)
1379 {
1380         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1381         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1382
1383         ASSERT_RTNL();
1384
1385         if (!in_dev) {
1386                 if (event == NETDEV_REGISTER) {
1387                         in_dev = inetdev_init(dev);
1388                         if (IS_ERR(in_dev))
1389                                 return notifier_from_errno(PTR_ERR(in_dev));
1390                         if (dev->flags & IFF_LOOPBACK) {
1391                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1392                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1393                         }
1394                 } else if (event == NETDEV_CHANGEMTU) {
1395                         /* Re-enabling IP */
1396                         if (inetdev_valid_mtu(dev->mtu))
1397                                 in_dev = inetdev_init(dev);
1398                 }
1399                 goto out;
1400         }
1401
1402         switch (event) {
1403         case NETDEV_REGISTER:
1404                 pr_debug("%s: bug\n", __func__);
1405                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1406                 break;
1407         case NETDEV_UP:
1408                 if (!inetdev_valid_mtu(dev->mtu))
1409                         break;
1410                 if (dev->flags & IFF_LOOPBACK) {
1411                         struct in_ifaddr *ifa = inet_alloc_ifa();
1412
1413                         if (ifa) {
1414                                 INIT_HLIST_NODE(&ifa->hash);
1415                                 ifa->ifa_local =
1416                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1417                                 ifa->ifa_prefixlen = 8;
1418                                 ifa->ifa_mask = inet_make_mask(8);
1419                                 in_dev_hold(in_dev);
1420                                 ifa->ifa_dev = in_dev;
1421                                 ifa->ifa_scope = RT_SCOPE_HOST;
1422                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1423                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1424                                                  INFINITY_LIFE_TIME);
1425                                 ipv4_devconf_setall(in_dev);
1426                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1427                                 inet_insert_ifa(ifa);
1428                         }
1429                 }
1430                 ip_mc_up(in_dev);
1431                 /* fall through */
1432         case NETDEV_CHANGEADDR:
1433                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1434                         break;
1435                 /* fall through */
1436         case NETDEV_NOTIFY_PEERS:
1437                 /* Send gratuitous ARP to notify of link change */
1438                 inetdev_send_gratuitous_arp(dev, in_dev);
1439                 break;
1440         case NETDEV_DOWN:
1441                 ip_mc_down(in_dev);
1442                 break;
1443         case NETDEV_PRE_TYPE_CHANGE:
1444                 ip_mc_unmap(in_dev);
1445                 break;
1446         case NETDEV_POST_TYPE_CHANGE:
1447                 ip_mc_remap(in_dev);
1448                 break;
1449         case NETDEV_CHANGEMTU:
1450                 if (inetdev_valid_mtu(dev->mtu))
1451                         break;
1452                 /* disable IP when MTU is not enough */
1453         case NETDEV_UNREGISTER:
1454                 inetdev_destroy(in_dev);
1455                 break;
1456         case NETDEV_CHANGENAME:
1457                 /* Do not notify about label change, this event is
1458                  * not interesting to applications using netlink.
1459                  */
1460                 inetdev_changename(dev, in_dev);
1461
1462                 devinet_sysctl_unregister(in_dev);
1463                 devinet_sysctl_register(in_dev);
1464                 break;
1465         }
1466 out:
1467         return NOTIFY_DONE;
1468 }
1469
1470 static struct notifier_block ip_netdev_notifier = {
1471         .notifier_call = inetdev_event,
1472 };
1473
1474 static size_t inet_nlmsg_size(void)
1475 {
1476         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1477                + nla_total_size(4) /* IFA_ADDRESS */
1478                + nla_total_size(4) /* IFA_LOCAL */
1479                + nla_total_size(4) /* IFA_BROADCAST */
1480                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1481                + nla_total_size(4)  /* IFA_FLAGS */
1482                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1483 }
1484
1485 static inline u32 cstamp_delta(unsigned long cstamp)
1486 {
1487         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1488 }
1489
1490 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1491                          unsigned long tstamp, u32 preferred, u32 valid)
1492 {
1493         struct ifa_cacheinfo ci;
1494
1495         ci.cstamp = cstamp_delta(cstamp);
1496         ci.tstamp = cstamp_delta(tstamp);
1497         ci.ifa_prefered = preferred;
1498         ci.ifa_valid = valid;
1499
1500         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1501 }
1502
1503 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1504                             u32 portid, u32 seq, int event, unsigned int flags)
1505 {
1506         struct ifaddrmsg *ifm;
1507         struct nlmsghdr  *nlh;
1508         u32 preferred, valid;
1509
1510         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1511         if (!nlh)
1512                 return -EMSGSIZE;
1513
1514         ifm = nlmsg_data(nlh);
1515         ifm->ifa_family = AF_INET;
1516         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1517         ifm->ifa_flags = ifa->ifa_flags;
1518         ifm->ifa_scope = ifa->ifa_scope;
1519         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1520
1521         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1522                 preferred = ifa->ifa_preferred_lft;
1523                 valid = ifa->ifa_valid_lft;
1524                 if (preferred != INFINITY_LIFE_TIME) {
1525                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1526
1527                         if (preferred > tval)
1528                                 preferred -= tval;
1529                         else
1530                                 preferred = 0;
1531                         if (valid != INFINITY_LIFE_TIME) {
1532                                 if (valid > tval)
1533                                         valid -= tval;
1534                                 else
1535                                         valid = 0;
1536                         }
1537                 }
1538         } else {
1539                 preferred = INFINITY_LIFE_TIME;
1540                 valid = INFINITY_LIFE_TIME;
1541         }
1542         if ((ifa->ifa_address &&
1543              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1544             (ifa->ifa_local &&
1545              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1546             (ifa->ifa_broadcast &&
1547              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1548             (ifa->ifa_label[0] &&
1549              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1550             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1551             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1552                           preferred, valid))
1553                 goto nla_put_failure;
1554
1555         nlmsg_end(skb, nlh);
1556         return 0;
1557
1558 nla_put_failure:
1559         nlmsg_cancel(skb, nlh);
1560         return -EMSGSIZE;
1561 }
1562
1563 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1564 {
1565         struct net *net = sock_net(skb->sk);
1566         int h, s_h;
1567         int idx, s_idx;
1568         int ip_idx, s_ip_idx;
1569         struct net_device *dev;
1570         struct in_device *in_dev;
1571         struct in_ifaddr *ifa;
1572         struct hlist_head *head;
1573
1574         s_h = cb->args[0];
1575         s_idx = idx = cb->args[1];
1576         s_ip_idx = ip_idx = cb->args[2];
1577
1578         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1579                 idx = 0;
1580                 head = &net->dev_index_head[h];
1581                 rcu_read_lock();
1582                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1583                           net->dev_base_seq;
1584                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1585                         if (idx < s_idx)
1586                                 goto cont;
1587                         if (h > s_h || idx > s_idx)
1588                                 s_ip_idx = 0;
1589                         in_dev = __in_dev_get_rcu(dev);
1590                         if (!in_dev)
1591                                 goto cont;
1592
1593                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1594                              ifa = ifa->ifa_next, ip_idx++) {
1595                                 if (ip_idx < s_ip_idx)
1596                                         continue;
1597                                 if (inet_fill_ifaddr(skb, ifa,
1598                                              NETLINK_CB(cb->skb).portid,
1599                                              cb->nlh->nlmsg_seq,
1600                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1601                                         rcu_read_unlock();
1602                                         goto done;
1603                                 }
1604                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1605                         }
1606 cont:
1607                         idx++;
1608                 }
1609                 rcu_read_unlock();
1610         }
1611
1612 done:
1613         cb->args[0] = h;
1614         cb->args[1] = idx;
1615         cb->args[2] = ip_idx;
1616
1617         return skb->len;
1618 }
1619
1620 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1621                       u32 portid)
1622 {
1623         struct sk_buff *skb;
1624         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1625         int err = -ENOBUFS;
1626         struct net *net;
1627
1628         net = dev_net(ifa->ifa_dev->dev);
1629         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1630         if (!skb)
1631                 goto errout;
1632
1633         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1634         if (err < 0) {
1635                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1636                 WARN_ON(err == -EMSGSIZE);
1637                 kfree_skb(skb);
1638                 goto errout;
1639         }
1640         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1641         return;
1642 errout:
1643         if (err < 0)
1644                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1645 }
1646
1647 static size_t inet_get_link_af_size(const struct net_device *dev)
1648 {
1649         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1650
1651         if (!in_dev)
1652                 return 0;
1653
1654         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1655 }
1656
1657 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1658                              u32 ext_filter_mask)
1659 {
1660         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1661         struct nlattr *nla;
1662         int i;
1663
1664         if (!in_dev)
1665                 return -ENODATA;
1666
1667         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1668         if (!nla)
1669                 return -EMSGSIZE;
1670
1671         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1672                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1673
1674         return 0;
1675 }
1676
1677 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1678         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1679 };
1680
1681 static int inet_validate_link_af(const struct net_device *dev,
1682                                  const struct nlattr *nla)
1683 {
1684         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1685         int err, rem;
1686
1687         if (dev && !__in_dev_get_rtnl(dev))
1688                 return -EAFNOSUPPORT;
1689
1690         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1691         if (err < 0)
1692                 return err;
1693
1694         if (tb[IFLA_INET_CONF]) {
1695                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1696                         int cfgid = nla_type(a);
1697
1698                         if (nla_len(a) < 4)
1699                                 return -EINVAL;
1700
1701                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1702                                 return -EINVAL;
1703                 }
1704         }
1705
1706         return 0;
1707 }
1708
1709 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1710 {
1711         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1712         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1713         int rem;
1714
1715         if (!in_dev)
1716                 return -EAFNOSUPPORT;
1717
1718         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1719                 BUG();
1720
1721         if (tb[IFLA_INET_CONF]) {
1722                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1723                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1724         }
1725
1726         return 0;
1727 }
1728
1729 static int inet_netconf_msgsize_devconf(int type)
1730 {
1731         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1732                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1733
1734         /* type -1 is used for ALL */
1735         if (type == -1 || type == NETCONFA_FORWARDING)
1736                 size += nla_total_size(4);
1737         if (type == -1 || type == NETCONFA_RP_FILTER)
1738                 size += nla_total_size(4);
1739         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1740                 size += nla_total_size(4);
1741         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1742                 size += nla_total_size(4);
1743         if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1744                 size += nla_total_size(4);
1745
1746         return size;
1747 }
1748
1749 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1750                                      struct ipv4_devconf *devconf, u32 portid,
1751                                      u32 seq, int event, unsigned int flags,
1752                                      int type)
1753 {
1754         struct nlmsghdr  *nlh;
1755         struct netconfmsg *ncm;
1756
1757         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1758                         flags);
1759         if (!nlh)
1760                 return -EMSGSIZE;
1761
1762         ncm = nlmsg_data(nlh);
1763         ncm->ncm_family = AF_INET;
1764
1765         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1766                 goto nla_put_failure;
1767
1768         /* type -1 is used for ALL */
1769         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1770             nla_put_s32(skb, NETCONFA_FORWARDING,
1771                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1772                 goto nla_put_failure;
1773         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1774             nla_put_s32(skb, NETCONFA_RP_FILTER,
1775                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1776                 goto nla_put_failure;
1777         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1778             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1779                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1780                 goto nla_put_failure;
1781         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1782             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1783                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1784                 goto nla_put_failure;
1785         if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1786             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1787                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1788                 goto nla_put_failure;
1789
1790         nlmsg_end(skb, nlh);
1791         return 0;
1792
1793 nla_put_failure:
1794         nlmsg_cancel(skb, nlh);
1795         return -EMSGSIZE;
1796 }
1797
1798 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1799                                  struct ipv4_devconf *devconf)
1800 {
1801         struct sk_buff *skb;
1802         int err = -ENOBUFS;
1803
1804         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1805         if (!skb)
1806                 goto errout;
1807
1808         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1809                                         RTM_NEWNETCONF, 0, type);
1810         if (err < 0) {
1811                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1812                 WARN_ON(err == -EMSGSIZE);
1813                 kfree_skb(skb);
1814                 goto errout;
1815         }
1816         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1817         return;
1818 errout:
1819         if (err < 0)
1820                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1821 }
1822
1823 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1824         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1825         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1826         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1827         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1828         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1829 };
1830
1831 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1832                                     struct nlmsghdr *nlh)
1833 {
1834         struct net *net = sock_net(in_skb->sk);
1835         struct nlattr *tb[NETCONFA_MAX+1];
1836         struct netconfmsg *ncm;
1837         struct sk_buff *skb;
1838         struct ipv4_devconf *devconf;
1839         struct in_device *in_dev;
1840         struct net_device *dev;
1841         int ifindex;
1842         int err;
1843
1844         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1845                           devconf_ipv4_policy);
1846         if (err < 0)
1847                 goto errout;
1848
1849         err = EINVAL;
1850         if (!tb[NETCONFA_IFINDEX])
1851                 goto errout;
1852
1853         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1854         switch (ifindex) {
1855         case NETCONFA_IFINDEX_ALL:
1856                 devconf = net->ipv4.devconf_all;
1857                 break;
1858         case NETCONFA_IFINDEX_DEFAULT:
1859                 devconf = net->ipv4.devconf_dflt;
1860                 break;
1861         default:
1862                 dev = __dev_get_by_index(net, ifindex);
1863                 if (!dev)
1864                         goto errout;
1865                 in_dev = __in_dev_get_rtnl(dev);
1866                 if (!in_dev)
1867                         goto errout;
1868                 devconf = &in_dev->cnf;
1869                 break;
1870         }
1871
1872         err = -ENOBUFS;
1873         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1874         if (!skb)
1875                 goto errout;
1876
1877         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1878                                         NETLINK_CB(in_skb).portid,
1879                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1880                                         -1);
1881         if (err < 0) {
1882                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1883                 WARN_ON(err == -EMSGSIZE);
1884                 kfree_skb(skb);
1885                 goto errout;
1886         }
1887         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1888 errout:
1889         return err;
1890 }
1891
1892 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1893                                      struct netlink_callback *cb)
1894 {
1895         struct net *net = sock_net(skb->sk);
1896         int h, s_h;
1897         int idx, s_idx;
1898         struct net_device *dev;
1899         struct in_device *in_dev;
1900         struct hlist_head *head;
1901
1902         s_h = cb->args[0];
1903         s_idx = idx = cb->args[1];
1904
1905         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1906                 idx = 0;
1907                 head = &net->dev_index_head[h];
1908                 rcu_read_lock();
1909                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1910                           net->dev_base_seq;
1911                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1912                         if (idx < s_idx)
1913                                 goto cont;
1914                         in_dev = __in_dev_get_rcu(dev);
1915                         if (!in_dev)
1916                                 goto cont;
1917
1918                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1919                                                       &in_dev->cnf,
1920                                                       NETLINK_CB(cb->skb).portid,
1921                                                       cb->nlh->nlmsg_seq,
1922                                                       RTM_NEWNETCONF,
1923                                                       NLM_F_MULTI,
1924                                                       -1) < 0) {
1925                                 rcu_read_unlock();
1926                                 goto done;
1927                         }
1928                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1929 cont:
1930                         idx++;
1931                 }
1932                 rcu_read_unlock();
1933         }
1934         if (h == NETDEV_HASHENTRIES) {
1935                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1936                                               net->ipv4.devconf_all,
1937                                               NETLINK_CB(cb->skb).portid,
1938                                               cb->nlh->nlmsg_seq,
1939                                               RTM_NEWNETCONF, NLM_F_MULTI,
1940                                               -1) < 0)
1941                         goto done;
1942                 else
1943                         h++;
1944         }
1945         if (h == NETDEV_HASHENTRIES + 1) {
1946                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1947                                               net->ipv4.devconf_dflt,
1948                                               NETLINK_CB(cb->skb).portid,
1949                                               cb->nlh->nlmsg_seq,
1950                                               RTM_NEWNETCONF, NLM_F_MULTI,
1951                                               -1) < 0)
1952                         goto done;
1953                 else
1954                         h++;
1955         }
1956 done:
1957         cb->args[0] = h;
1958         cb->args[1] = idx;
1959
1960         return skb->len;
1961 }
1962
1963 #ifdef CONFIG_SYSCTL
1964
1965 static void devinet_copy_dflt_conf(struct net *net, int i)
1966 {
1967         struct net_device *dev;
1968
1969         rcu_read_lock();
1970         for_each_netdev_rcu(net, dev) {
1971                 struct in_device *in_dev;
1972
1973                 in_dev = __in_dev_get_rcu(dev);
1974                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1975                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1976         }
1977         rcu_read_unlock();
1978 }
1979
1980 /* called with RTNL locked */
1981 static void inet_forward_change(struct net *net)
1982 {
1983         struct net_device *dev;
1984         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1985
1986         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1987         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1988         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1989                                     NETCONFA_IFINDEX_ALL,
1990                                     net->ipv4.devconf_all);
1991         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1992                                     NETCONFA_IFINDEX_DEFAULT,
1993                                     net->ipv4.devconf_dflt);
1994
1995         for_each_netdev(net, dev) {
1996                 struct in_device *in_dev;
1997                 if (on)
1998                         dev_disable_lro(dev);
1999                 rcu_read_lock();
2000                 in_dev = __in_dev_get_rcu(dev);
2001                 if (in_dev) {
2002                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2003                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2004                                                     dev->ifindex, &in_dev->cnf);
2005                 }
2006                 rcu_read_unlock();
2007         }
2008 }
2009
2010 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2011 {
2012         if (cnf == net->ipv4.devconf_dflt)
2013                 return NETCONFA_IFINDEX_DEFAULT;
2014         else if (cnf == net->ipv4.devconf_all)
2015                 return NETCONFA_IFINDEX_ALL;
2016         else {
2017                 struct in_device *idev
2018                         = container_of(cnf, struct in_device, cnf);
2019                 return idev->dev->ifindex;
2020         }
2021 }
2022
2023 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2024                              void __user *buffer,
2025                              size_t *lenp, loff_t *ppos)
2026 {
2027         int old_value = *(int *)ctl->data;
2028         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2029         int new_value = *(int *)ctl->data;
2030
2031         if (write) {
2032                 struct ipv4_devconf *cnf = ctl->extra1;
2033                 struct net *net = ctl->extra2;
2034                 int i = (int *)ctl->data - cnf->data;
2035                 int ifindex;
2036
2037                 set_bit(i, cnf->state);
2038
2039                 if (cnf == net->ipv4.devconf_dflt)
2040                         devinet_copy_dflt_conf(net, i);
2041                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2042                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2043                         if ((new_value == 0) && (old_value != 0))
2044                                 rt_cache_flush(net);
2045
2046                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2047                     new_value != old_value) {
2048                         ifindex = devinet_conf_ifindex(net, cnf);
2049                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2050                                                     ifindex, cnf);
2051                 }
2052                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2053                     new_value != old_value) {
2054                         ifindex = devinet_conf_ifindex(net, cnf);
2055                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2056                                                     ifindex, cnf);
2057                 }
2058                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2059                     new_value != old_value) {
2060                         ifindex = devinet_conf_ifindex(net, cnf);
2061                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2062                                                     ifindex, cnf);
2063                 }
2064         }
2065
2066         return ret;
2067 }
2068
2069 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2070                                   void __user *buffer,
2071                                   size_t *lenp, loff_t *ppos)
2072 {
2073         int *valp = ctl->data;
2074         int val = *valp;
2075         loff_t pos = *ppos;
2076         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2077
2078         if (write && *valp != val) {
2079                 struct net *net = ctl->extra2;
2080
2081                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2082                         if (!rtnl_trylock()) {
2083                                 /* Restore the original values before restarting */
2084                                 *valp = val;
2085                                 *ppos = pos;
2086                                 return restart_syscall();
2087                         }
2088                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2089                                 inet_forward_change(net);
2090                         } else {
2091                                 struct ipv4_devconf *cnf = ctl->extra1;
2092                                 struct in_device *idev =
2093                                         container_of(cnf, struct in_device, cnf);
2094                                 if (*valp)
2095                                         dev_disable_lro(idev->dev);
2096                                 inet_netconf_notify_devconf(net,
2097                                                             NETCONFA_FORWARDING,
2098                                                             idev->dev->ifindex,
2099                                                             cnf);
2100                         }
2101                         rtnl_unlock();
2102                         rt_cache_flush(net);
2103                 } else
2104                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2105                                                     NETCONFA_IFINDEX_DEFAULT,
2106                                                     net->ipv4.devconf_dflt);
2107         }
2108
2109         return ret;
2110 }
2111
2112 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2113                                 void __user *buffer,
2114                                 size_t *lenp, loff_t *ppos)
2115 {
2116         int *valp = ctl->data;
2117         int val = *valp;
2118         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2119         struct net *net = ctl->extra2;
2120
2121         if (write && *valp != val)
2122                 rt_cache_flush(net);
2123
2124         return ret;
2125 }
2126
2127 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2128         { \
2129                 .procname       = name, \
2130                 .data           = ipv4_devconf.data + \
2131                                   IPV4_DEVCONF_ ## attr - 1, \
2132                 .maxlen         = sizeof(int), \
2133                 .mode           = mval, \
2134                 .proc_handler   = proc, \
2135                 .extra1         = &ipv4_devconf, \
2136         }
2137
2138 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2139         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2140
2141 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2142         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2143
2144 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2145         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2146
2147 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2148         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2149
2150 static struct devinet_sysctl_table {
2151         struct ctl_table_header *sysctl_header;
2152         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2153 } devinet_sysctl = {
2154         .devinet_vars = {
2155                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2156                                              devinet_sysctl_forward),
2157                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2158
2159                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2160                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2161                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2162                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2163                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2164                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2165                                         "accept_source_route"),
2166                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2167                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2168                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2169                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2170                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2171                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2172                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2173                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2174                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2175                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2176                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2177                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2178                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2179                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2180                                         "force_igmp_version"),
2181                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2182                                         "igmpv2_unsolicited_report_interval"),
2183                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2184                                         "igmpv3_unsolicited_report_interval"),
2185                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2186                                         "ignore_routes_with_linkdown"),
2187
2188                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2189                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2190                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2191                                               "promote_secondaries"),
2192                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2193                                               "route_localnet"),
2194         },
2195 };
2196
2197 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2198                                         struct ipv4_devconf *p)
2199 {
2200         int i;
2201         struct devinet_sysctl_table *t;
2202         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2203
2204         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2205         if (!t)
2206                 goto out;
2207
2208         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2209                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2210                 t->devinet_vars[i].extra1 = p;
2211                 t->devinet_vars[i].extra2 = net;
2212         }
2213
2214         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2215
2216         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2217         if (!t->sysctl_header)
2218                 goto free;
2219
2220         p->sysctl = t;
2221         return 0;
2222
2223 free:
2224         kfree(t);
2225 out:
2226         return -ENOBUFS;
2227 }
2228
2229 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2230 {
2231         struct devinet_sysctl_table *t = cnf->sysctl;
2232
2233         if (!t)
2234                 return;
2235
2236         cnf->sysctl = NULL;
2237         unregister_net_sysctl_table(t->sysctl_header);
2238         kfree(t);
2239 }
2240
2241 static int devinet_sysctl_register(struct in_device *idev)
2242 {
2243         int err;
2244
2245         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2246                 return -EINVAL;
2247
2248         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2249         if (err)
2250                 return err;
2251         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2252                                         &idev->cnf);
2253         if (err)
2254                 neigh_sysctl_unregister(idev->arp_parms);
2255         return err;
2256 }
2257
2258 static void devinet_sysctl_unregister(struct in_device *idev)
2259 {
2260         __devinet_sysctl_unregister(&idev->cnf);
2261         neigh_sysctl_unregister(idev->arp_parms);
2262 }
2263
2264 static struct ctl_table ctl_forward_entry[] = {
2265         {
2266                 .procname       = "ip_forward",
2267                 .data           = &ipv4_devconf.data[
2268                                         IPV4_DEVCONF_FORWARDING - 1],
2269                 .maxlen         = sizeof(int),
2270                 .mode           = 0644,
2271                 .proc_handler   = devinet_sysctl_forward,
2272                 .extra1         = &ipv4_devconf,
2273                 .extra2         = &init_net,
2274         },
2275         { },
2276 };
2277 #endif
2278
2279 static __net_init int devinet_init_net(struct net *net)
2280 {
2281         int err;
2282         struct ipv4_devconf *all, *dflt;
2283 #ifdef CONFIG_SYSCTL
2284         struct ctl_table *tbl = ctl_forward_entry;
2285         struct ctl_table_header *forw_hdr;
2286 #endif
2287
2288         err = -ENOMEM;
2289         all = &ipv4_devconf;
2290         dflt = &ipv4_devconf_dflt;
2291
2292         if (!net_eq(net, &init_net)) {
2293                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2294                 if (!all)
2295                         goto err_alloc_all;
2296
2297                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2298                 if (!dflt)
2299                         goto err_alloc_dflt;
2300
2301 #ifdef CONFIG_SYSCTL
2302                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2303                 if (!tbl)
2304                         goto err_alloc_ctl;
2305
2306                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2307                 tbl[0].extra1 = all;
2308                 tbl[0].extra2 = net;
2309 #endif
2310         }
2311
2312 #ifdef CONFIG_SYSCTL
2313         err = __devinet_sysctl_register(net, "all", all);
2314         if (err < 0)
2315                 goto err_reg_all;
2316
2317         err = __devinet_sysctl_register(net, "default", dflt);
2318         if (err < 0)
2319                 goto err_reg_dflt;
2320
2321         err = -ENOMEM;
2322         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2323         if (!forw_hdr)
2324                 goto err_reg_ctl;
2325         net->ipv4.forw_hdr = forw_hdr;
2326 #endif
2327
2328         net->ipv4.devconf_all = all;
2329         net->ipv4.devconf_dflt = dflt;
2330         return 0;
2331
2332 #ifdef CONFIG_SYSCTL
2333 err_reg_ctl:
2334         __devinet_sysctl_unregister(dflt);
2335 err_reg_dflt:
2336         __devinet_sysctl_unregister(all);
2337 err_reg_all:
2338         if (tbl != ctl_forward_entry)
2339                 kfree(tbl);
2340 err_alloc_ctl:
2341 #endif
2342         if (dflt != &ipv4_devconf_dflt)
2343                 kfree(dflt);
2344 err_alloc_dflt:
2345         if (all != &ipv4_devconf)
2346                 kfree(all);
2347 err_alloc_all:
2348         return err;
2349 }
2350
2351 static __net_exit void devinet_exit_net(struct net *net)
2352 {
2353 #ifdef CONFIG_SYSCTL
2354         struct ctl_table *tbl;
2355
2356         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2357         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2358         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2359         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2360         kfree(tbl);
2361 #endif
2362         kfree(net->ipv4.devconf_dflt);
2363         kfree(net->ipv4.devconf_all);
2364 }
2365
2366 static __net_initdata struct pernet_operations devinet_ops = {
2367         .init = devinet_init_net,
2368         .exit = devinet_exit_net,
2369 };
2370
2371 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2372         .family           = AF_INET,
2373         .fill_link_af     = inet_fill_link_af,
2374         .get_link_af_size = inet_get_link_af_size,
2375         .validate_link_af = inet_validate_link_af,
2376         .set_link_af      = inet_set_link_af,
2377 };
2378
2379 void __init devinet_init(void)
2380 {
2381         int i;
2382
2383         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2384                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2385
2386         register_pernet_subsys(&devinet_ops);
2387
2388         register_gifconf(PF_INET, inet_gifconf);
2389         register_netdevice_notifier(&ip_netdev_notifier);
2390
2391         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2392
2393         rtnl_af_register(&inet_af_ops);
2394
2395         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2396         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2397         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2398         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2399                       inet_netconf_dump_devconf, NULL);
2400 }
2401