Merge commit 'ed30f24e8d07d30aa3e69d1f508f4d7bd2e8ea14' of git://git.linaro.org/landi...
[firefly-linux-kernel-4.4.55.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         skb_queue_purge(&neigh->arp_queue);
712         neigh->arp_queue_len_bytes = 0;
713
714         if (dev->netdev_ops->ndo_neigh_destroy)
715                 dev->netdev_ops->ndo_neigh_destroy(neigh);
716
717         dev_put(dev);
718         neigh_parms_put(neigh->parms);
719
720         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
721
722         atomic_dec(&neigh->tbl->entries);
723         kfree_rcu(neigh, rcu);
724 }
725 EXPORT_SYMBOL(neigh_destroy);
726
727 /* Neighbour state is suspicious;
728    disable fast path.
729
730    Called with write_locked neigh.
731  */
732 static void neigh_suspect(struct neighbour *neigh)
733 {
734         neigh_dbg(2, "neigh %p is suspected\n", neigh);
735
736         neigh->output = neigh->ops->output;
737 }
738
739 /* Neighbour state is OK;
740    enable fast path.
741
742    Called with write_locked neigh.
743  */
744 static void neigh_connect(struct neighbour *neigh)
745 {
746         neigh_dbg(2, "neigh %p is connected\n", neigh);
747
748         neigh->output = neigh->ops->connected_output;
749 }
750
751 static void neigh_periodic_work(struct work_struct *work)
752 {
753         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
754         struct neighbour *n;
755         struct neighbour __rcu **np;
756         unsigned int i;
757         struct neigh_hash_table *nht;
758
759         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
760
761         write_lock_bh(&tbl->lock);
762         nht = rcu_dereference_protected(tbl->nht,
763                                         lockdep_is_held(&tbl->lock));
764
765         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
766                 goto out;
767
768         /*
769          *      periodically recompute ReachableTime from random function
770          */
771
772         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
773                 struct neigh_parms *p;
774                 tbl->last_rand = jiffies;
775                 for (p = &tbl->parms; p; p = p->next)
776                         p->reachable_time =
777                                 neigh_rand_reach_time(p->base_reachable_time);
778         }
779
780         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
781                 np = &nht->hash_buckets[i];
782
783                 while ((n = rcu_dereference_protected(*np,
784                                 lockdep_is_held(&tbl->lock))) != NULL) {
785                         unsigned int state;
786
787                         write_lock(&n->lock);
788
789                         state = n->nud_state;
790                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
791                                 write_unlock(&n->lock);
792                                 goto next_elt;
793                         }
794
795                         if (time_before(n->used, n->confirmed))
796                                 n->used = n->confirmed;
797
798                         if (atomic_read(&n->refcnt) == 1 &&
799                             (state == NUD_FAILED ||
800                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
801                                 *np = n->next;
802                                 n->dead = 1;
803                                 write_unlock(&n->lock);
804                                 neigh_cleanup_and_release(n);
805                                 continue;
806                         }
807                         write_unlock(&n->lock);
808
809 next_elt:
810                         np = &n->next;
811                 }
812                 /*
813                  * It's fine to release lock here, even if hash table
814                  * grows while we are preempted.
815                  */
816                 write_unlock_bh(&tbl->lock);
817                 cond_resched();
818                 write_lock_bh(&tbl->lock);
819                 nht = rcu_dereference_protected(tbl->nht,
820                                                 lockdep_is_held(&tbl->lock));
821         }
822 out:
823         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
824          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
825          * base_reachable_time.
826          */
827         schedule_delayed_work(&tbl->gc_work,
828                               tbl->parms.base_reachable_time >> 1);
829         write_unlock_bh(&tbl->lock);
830 }
831
832 static __inline__ int neigh_max_probes(struct neighbour *n)
833 {
834         struct neigh_parms *p = n->parms;
835         return (n->nud_state & NUD_PROBE) ?
836                 p->ucast_probes :
837                 p->ucast_probes + p->app_probes + p->mcast_probes;
838 }
839
840 static void neigh_invalidate(struct neighbour *neigh)
841         __releases(neigh->lock)
842         __acquires(neigh->lock)
843 {
844         struct sk_buff *skb;
845
846         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
847         neigh_dbg(2, "neigh %p is failed\n", neigh);
848         neigh->updated = jiffies;
849
850         /* It is very thin place. report_unreachable is very complicated
851            routine. Particularly, it can hit the same neighbour entry!
852
853            So that, we try to be accurate and avoid dead loop. --ANK
854          */
855         while (neigh->nud_state == NUD_FAILED &&
856                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
857                 write_unlock(&neigh->lock);
858                 neigh->ops->error_report(neigh, skb);
859                 write_lock(&neigh->lock);
860         }
861         skb_queue_purge(&neigh->arp_queue);
862         neigh->arp_queue_len_bytes = 0;
863 }
864
865 static void neigh_probe(struct neighbour *neigh)
866         __releases(neigh->lock)
867 {
868         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
869         /* keep skb alive even if arp_queue overflows */
870         if (skb)
871                 skb = skb_copy(skb, GFP_ATOMIC);
872         write_unlock(&neigh->lock);
873         neigh->ops->solicit(neigh, skb);
874         atomic_inc(&neigh->probes);
875         kfree_skb(skb);
876 }
877
878 /* Called when a timer expires for a neighbour entry. */
879
880 static void neigh_timer_handler(unsigned long arg)
881 {
882         unsigned long now, next;
883         struct neighbour *neigh = (struct neighbour *)arg;
884         unsigned int state;
885         int notify = 0;
886
887         write_lock(&neigh->lock);
888
889         state = neigh->nud_state;
890         now = jiffies;
891         next = now + HZ;
892
893         if (!(state & NUD_IN_TIMER))
894                 goto out;
895
896         if (state & NUD_REACHABLE) {
897                 if (time_before_eq(now,
898                                    neigh->confirmed + neigh->parms->reachable_time)) {
899                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
900                         next = neigh->confirmed + neigh->parms->reachable_time;
901                 } else if (time_before_eq(now,
902                                           neigh->used + neigh->parms->delay_probe_time)) {
903                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
904                         neigh->nud_state = NUD_DELAY;
905                         neigh->updated = jiffies;
906                         neigh_suspect(neigh);
907                         next = now + neigh->parms->delay_probe_time;
908                 } else {
909                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
910                         neigh->nud_state = NUD_STALE;
911                         neigh->updated = jiffies;
912                         neigh_suspect(neigh);
913                         notify = 1;
914                 }
915         } else if (state & NUD_DELAY) {
916                 if (time_before_eq(now,
917                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
918                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
919                         neigh->nud_state = NUD_REACHABLE;
920                         neigh->updated = jiffies;
921                         neigh_connect(neigh);
922                         notify = 1;
923                         next = neigh->confirmed + neigh->parms->reachable_time;
924                 } else {
925                         neigh_dbg(2, "neigh %p is probed\n", neigh);
926                         neigh->nud_state = NUD_PROBE;
927                         neigh->updated = jiffies;
928                         atomic_set(&neigh->probes, 0);
929                         next = now + neigh->parms->retrans_time;
930                 }
931         } else {
932                 /* NUD_PROBE|NUD_INCOMPLETE */
933                 next = now + neigh->parms->retrans_time;
934         }
935
936         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
937             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
938                 neigh->nud_state = NUD_FAILED;
939                 notify = 1;
940                 neigh_invalidate(neigh);
941         }
942
943         if (neigh->nud_state & NUD_IN_TIMER) {
944                 if (time_before(next, jiffies + HZ/2))
945                         next = jiffies + HZ/2;
946                 if (!mod_timer(&neigh->timer, next))
947                         neigh_hold(neigh);
948         }
949         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
950                 neigh_probe(neigh);
951         } else {
952 out:
953                 write_unlock(&neigh->lock);
954         }
955
956         if (notify)
957                 neigh_update_notify(neigh);
958
959         neigh_release(neigh);
960 }
961
962 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
963 {
964         int rc;
965         bool immediate_probe = false;
966
967         write_lock_bh(&neigh->lock);
968
969         rc = 0;
970         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
971                 goto out_unlock_bh;
972
973         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
974                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
975                         unsigned long next, now = jiffies;
976
977                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
978                         neigh->nud_state     = NUD_INCOMPLETE;
979                         neigh->updated = now;
980                         next = now + max(neigh->parms->retrans_time, HZ/2);
981                         neigh_add_timer(neigh, next);
982                         immediate_probe = true;
983                 } else {
984                         neigh->nud_state = NUD_FAILED;
985                         neigh->updated = jiffies;
986                         write_unlock_bh(&neigh->lock);
987
988                         kfree_skb(skb);
989                         return 1;
990                 }
991         } else if (neigh->nud_state & NUD_STALE) {
992                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
993                 neigh->nud_state = NUD_DELAY;
994                 neigh->updated = jiffies;
995                 neigh_add_timer(neigh,
996                                 jiffies + neigh->parms->delay_probe_time);
997         }
998
999         if (neigh->nud_state == NUD_INCOMPLETE) {
1000                 if (skb) {
1001                         while (neigh->arp_queue_len_bytes + skb->truesize >
1002                                neigh->parms->queue_len_bytes) {
1003                                 struct sk_buff *buff;
1004
1005                                 buff = __skb_dequeue(&neigh->arp_queue);
1006                                 if (!buff)
1007                                         break;
1008                                 neigh->arp_queue_len_bytes -= buff->truesize;
1009                                 kfree_skb(buff);
1010                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1011                         }
1012                         skb_dst_force(skb);
1013                         __skb_queue_tail(&neigh->arp_queue, skb);
1014                         neigh->arp_queue_len_bytes += skb->truesize;
1015                 }
1016                 rc = 1;
1017         }
1018 out_unlock_bh:
1019         if (immediate_probe)
1020                 neigh_probe(neigh);
1021         else
1022                 write_unlock(&neigh->lock);
1023         local_bh_enable();
1024         return rc;
1025 }
1026 EXPORT_SYMBOL(__neigh_event_send);
1027
1028 static void neigh_update_hhs(struct neighbour *neigh)
1029 {
1030         struct hh_cache *hh;
1031         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1032                 = NULL;
1033
1034         if (neigh->dev->header_ops)
1035                 update = neigh->dev->header_ops->cache_update;
1036
1037         if (update) {
1038                 hh = &neigh->hh;
1039                 if (hh->hh_len) {
1040                         write_seqlock_bh(&hh->hh_lock);
1041                         update(hh, neigh->dev, neigh->ha);
1042                         write_sequnlock_bh(&hh->hh_lock);
1043                 }
1044         }
1045 }
1046
1047
1048
1049 /* Generic update routine.
1050    -- lladdr is new lladdr or NULL, if it is not supplied.
1051    -- new    is new state.
1052    -- flags
1053         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1054                                 if it is different.
1055         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1056                                 lladdr instead of overriding it
1057                                 if it is different.
1058                                 It also allows to retain current state
1059                                 if lladdr is unchanged.
1060         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1061
1062         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1063                                 NTF_ROUTER flag.
1064         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1065                                 a router.
1066
1067    Caller MUST hold reference count on the entry.
1068  */
1069
1070 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071                  u32 flags)
1072 {
1073         u8 old;
1074         int err;
1075         int notify = 0;
1076         struct net_device *dev;
1077         int update_isrouter = 0;
1078
1079         write_lock_bh(&neigh->lock);
1080
1081         dev    = neigh->dev;
1082         old    = neigh->nud_state;
1083         err    = -EPERM;
1084
1085         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1086             (old & (NUD_NOARP | NUD_PERMANENT)))
1087                 goto out;
1088
1089         if (!(new & NUD_VALID)) {
1090                 neigh_del_timer(neigh);
1091                 if (old & NUD_CONNECTED)
1092                         neigh_suspect(neigh);
1093                 neigh->nud_state = new;
1094                 err = 0;
1095                 notify = old & NUD_VALID;
1096                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1097                     (new & NUD_FAILED)) {
1098                         neigh_invalidate(neigh);
1099                         notify = 1;
1100                 }
1101                 goto out;
1102         }
1103
1104         /* Compare new lladdr with cached one */
1105         if (!dev->addr_len) {
1106                 /* First case: device needs no address. */
1107                 lladdr = neigh->ha;
1108         } else if (lladdr) {
1109                 /* The second case: if something is already cached
1110                    and a new address is proposed:
1111                    - compare new & old
1112                    - if they are different, check override flag
1113                  */
1114                 if ((old & NUD_VALID) &&
1115                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1116                         lladdr = neigh->ha;
1117         } else {
1118                 /* No address is supplied; if we know something,
1119                    use it, otherwise discard the request.
1120                  */
1121                 err = -EINVAL;
1122                 if (!(old & NUD_VALID))
1123                         goto out;
1124                 lladdr = neigh->ha;
1125         }
1126
1127         if (new & NUD_CONNECTED)
1128                 neigh->confirmed = jiffies;
1129         neigh->updated = jiffies;
1130
1131         /* If entry was valid and address is not changed,
1132            do not change entry state, if new one is STALE.
1133          */
1134         err = 0;
1135         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1136         if (old & NUD_VALID) {
1137                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1138                         update_isrouter = 0;
1139                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1140                             (old & NUD_CONNECTED)) {
1141                                 lladdr = neigh->ha;
1142                                 new = NUD_STALE;
1143                         } else
1144                                 goto out;
1145                 } else {
1146                         if (lladdr == neigh->ha && new == NUD_STALE &&
1147                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1148                              (old & NUD_CONNECTED))
1149                             )
1150                                 new = old;
1151                 }
1152         }
1153
1154         if (new != old) {
1155                 neigh_del_timer(neigh);
1156                 if (new & NUD_IN_TIMER)
1157                         neigh_add_timer(neigh, (jiffies +
1158                                                 ((new & NUD_REACHABLE) ?
1159                                                  neigh->parms->reachable_time :
1160                                                  0)));
1161                 neigh->nud_state = new;
1162         }
1163
1164         if (lladdr != neigh->ha) {
1165                 write_seqlock(&neigh->ha_lock);
1166                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1167                 write_sequnlock(&neigh->ha_lock);
1168                 neigh_update_hhs(neigh);
1169                 if (!(new & NUD_CONNECTED))
1170                         neigh->confirmed = jiffies -
1171                                       (neigh->parms->base_reachable_time << 1);
1172                 notify = 1;
1173         }
1174         if (new == old)
1175                 goto out;
1176         if (new & NUD_CONNECTED)
1177                 neigh_connect(neigh);
1178         else
1179                 neigh_suspect(neigh);
1180         if (!(old & NUD_VALID)) {
1181                 struct sk_buff *skb;
1182
1183                 /* Again: avoid dead loop if something went wrong */
1184
1185                 while (neigh->nud_state & NUD_VALID &&
1186                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1187                         struct dst_entry *dst = skb_dst(skb);
1188                         struct neighbour *n2, *n1 = neigh;
1189                         write_unlock_bh(&neigh->lock);
1190
1191                         rcu_read_lock();
1192
1193                         /* Why not just use 'neigh' as-is?  The problem is that
1194                          * things such as shaper, eql, and sch_teql can end up
1195                          * using alternative, different, neigh objects to output
1196                          * the packet in the output path.  So what we need to do
1197                          * here is re-lookup the top-level neigh in the path so
1198                          * we can reinject the packet there.
1199                          */
1200                         n2 = NULL;
1201                         if (dst) {
1202                                 n2 = dst_neigh_lookup_skb(dst, skb);
1203                                 if (n2)
1204                                         n1 = n2;
1205                         }
1206                         n1->output(n1, skb);
1207                         if (n2)
1208                                 neigh_release(n2);
1209                         rcu_read_unlock();
1210
1211                         write_lock_bh(&neigh->lock);
1212                 }
1213                 skb_queue_purge(&neigh->arp_queue);
1214                 neigh->arp_queue_len_bytes = 0;
1215         }
1216 out:
1217         if (update_isrouter) {
1218                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1219                         (neigh->flags | NTF_ROUTER) :
1220                         (neigh->flags & ~NTF_ROUTER);
1221         }
1222         write_unlock_bh(&neigh->lock);
1223
1224         if (notify)
1225                 neigh_update_notify(neigh);
1226
1227         return err;
1228 }
1229 EXPORT_SYMBOL(neigh_update);
1230
1231 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1232                                  u8 *lladdr, void *saddr,
1233                                  struct net_device *dev)
1234 {
1235         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1236                                                  lladdr || !dev->addr_len);
1237         if (neigh)
1238                 neigh_update(neigh, lladdr, NUD_STALE,
1239                              NEIGH_UPDATE_F_OVERRIDE);
1240         return neigh;
1241 }
1242 EXPORT_SYMBOL(neigh_event_ns);
1243
1244 /* called with read_lock_bh(&n->lock); */
1245 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1246 {
1247         struct net_device *dev = dst->dev;
1248         __be16 prot = dst->ops->protocol;
1249         struct hh_cache *hh = &n->hh;
1250
1251         write_lock_bh(&n->lock);
1252
1253         /* Only one thread can come in here and initialize the
1254          * hh_cache entry.
1255          */
1256         if (!hh->hh_len)
1257                 dev->header_ops->cache(n, hh, prot);
1258
1259         write_unlock_bh(&n->lock);
1260 }
1261
1262 /* This function can be used in contexts, where only old dev_queue_xmit
1263  * worked, f.e. if you want to override normal output path (eql, shaper),
1264  * but resolution is not made yet.
1265  */
1266
1267 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1268 {
1269         struct net_device *dev = skb->dev;
1270
1271         __skb_pull(skb, skb_network_offset(skb));
1272
1273         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1274                             skb->len) < 0 &&
1275             dev->header_ops->rebuild(skb))
1276                 return 0;
1277
1278         return dev_queue_xmit(skb);
1279 }
1280 EXPORT_SYMBOL(neigh_compat_output);
1281
1282 /* Slow and careful. */
1283
1284 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1285 {
1286         struct dst_entry *dst = skb_dst(skb);
1287         int rc = 0;
1288
1289         if (!dst)
1290                 goto discard;
1291
1292         if (!neigh_event_send(neigh, skb)) {
1293                 int err;
1294                 struct net_device *dev = neigh->dev;
1295                 unsigned int seq;
1296
1297                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1298                         neigh_hh_init(neigh, dst);
1299
1300                 do {
1301                         __skb_pull(skb, skb_network_offset(skb));
1302                         seq = read_seqbegin(&neigh->ha_lock);
1303                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1304                                               neigh->ha, NULL, skb->len);
1305                 } while (read_seqretry(&neigh->ha_lock, seq));
1306
1307                 if (err >= 0)
1308                         rc = dev_queue_xmit(skb);
1309                 else
1310                         goto out_kfree_skb;
1311         }
1312 out:
1313         return rc;
1314 discard:
1315         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1316 out_kfree_skb:
1317         rc = -EINVAL;
1318         kfree_skb(skb);
1319         goto out;
1320 }
1321 EXPORT_SYMBOL(neigh_resolve_output);
1322
1323 /* As fast as possible without hh cache */
1324
1325 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1326 {
1327         struct net_device *dev = neigh->dev;
1328         unsigned int seq;
1329         int err;
1330
1331         do {
1332                 __skb_pull(skb, skb_network_offset(skb));
1333                 seq = read_seqbegin(&neigh->ha_lock);
1334                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335                                       neigh->ha, NULL, skb->len);
1336         } while (read_seqretry(&neigh->ha_lock, seq));
1337
1338         if (err >= 0)
1339                 err = dev_queue_xmit(skb);
1340         else {
1341                 err = -EINVAL;
1342                 kfree_skb(skb);
1343         }
1344         return err;
1345 }
1346 EXPORT_SYMBOL(neigh_connected_output);
1347
1348 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1349 {
1350         return dev_queue_xmit(skb);
1351 }
1352 EXPORT_SYMBOL(neigh_direct_output);
1353
1354 static void neigh_proxy_process(unsigned long arg)
1355 {
1356         struct neigh_table *tbl = (struct neigh_table *)arg;
1357         long sched_next = 0;
1358         unsigned long now = jiffies;
1359         struct sk_buff *skb, *n;
1360
1361         spin_lock(&tbl->proxy_queue.lock);
1362
1363         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364                 long tdif = NEIGH_CB(skb)->sched_next - now;
1365
1366                 if (tdif <= 0) {
1367                         struct net_device *dev = skb->dev;
1368
1369                         __skb_unlink(skb, &tbl->proxy_queue);
1370                         if (tbl->proxy_redo && netif_running(dev)) {
1371                                 rcu_read_lock();
1372                                 tbl->proxy_redo(skb);
1373                                 rcu_read_unlock();
1374                         } else {
1375                                 kfree_skb(skb);
1376                         }
1377
1378                         dev_put(dev);
1379                 } else if (!sched_next || tdif < sched_next)
1380                         sched_next = tdif;
1381         }
1382         del_timer(&tbl->proxy_timer);
1383         if (sched_next)
1384                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385         spin_unlock(&tbl->proxy_queue.lock);
1386 }
1387
1388 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389                     struct sk_buff *skb)
1390 {
1391         unsigned long now = jiffies;
1392         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1393
1394         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1395                 kfree_skb(skb);
1396                 return;
1397         }
1398
1399         NEIGH_CB(skb)->sched_next = sched_next;
1400         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1401
1402         spin_lock(&tbl->proxy_queue.lock);
1403         if (del_timer(&tbl->proxy_timer)) {
1404                 if (time_before(tbl->proxy_timer.expires, sched_next))
1405                         sched_next = tbl->proxy_timer.expires;
1406         }
1407         skb_dst_drop(skb);
1408         dev_hold(skb->dev);
1409         __skb_queue_tail(&tbl->proxy_queue, skb);
1410         mod_timer(&tbl->proxy_timer, sched_next);
1411         spin_unlock(&tbl->proxy_queue.lock);
1412 }
1413 EXPORT_SYMBOL(pneigh_enqueue);
1414
1415 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1416                                                       struct net *net, int ifindex)
1417 {
1418         struct neigh_parms *p;
1419
1420         for (p = &tbl->parms; p; p = p->next) {
1421                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422                     (!p->dev && !ifindex))
1423                         return p;
1424         }
1425
1426         return NULL;
1427 }
1428
1429 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430                                       struct neigh_table *tbl)
1431 {
1432         struct neigh_parms *p, *ref;
1433         struct net *net = dev_net(dev);
1434         const struct net_device_ops *ops = dev->netdev_ops;
1435
1436         ref = lookup_neigh_parms(tbl, net, 0);
1437         if (!ref)
1438                 return NULL;
1439
1440         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441         if (p) {
1442                 p->tbl            = tbl;
1443                 atomic_set(&p->refcnt, 1);
1444                 p->reachable_time =
1445                                 neigh_rand_reach_time(p->base_reachable_time);
1446
1447                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1448                         kfree(p);
1449                         return NULL;
1450                 }
1451
1452                 dev_hold(dev);
1453                 p->dev = dev;
1454                 write_pnet(&p->net, hold_net(net));
1455                 p->sysctl_table = NULL;
1456                 write_lock_bh(&tbl->lock);
1457                 p->next         = tbl->parms.next;
1458                 tbl->parms.next = p;
1459                 write_unlock_bh(&tbl->lock);
1460         }
1461         return p;
1462 }
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1464
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1466 {
1467         struct neigh_parms *parms =
1468                 container_of(head, struct neigh_parms, rcu_head);
1469
1470         neigh_parms_put(parms);
1471 }
1472
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474 {
1475         struct neigh_parms **p;
1476
1477         if (!parms || parms == &tbl->parms)
1478                 return;
1479         write_lock_bh(&tbl->lock);
1480         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481                 if (*p == parms) {
1482                         *p = parms->next;
1483                         parms->dead = 1;
1484                         write_unlock_bh(&tbl->lock);
1485                         if (parms->dev)
1486                                 dev_put(parms->dev);
1487                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488                         return;
1489                 }
1490         }
1491         write_unlock_bh(&tbl->lock);
1492         neigh_dbg(1, "%s: not found\n", __func__);
1493 }
1494 EXPORT_SYMBOL(neigh_parms_release);
1495
1496 static void neigh_parms_destroy(struct neigh_parms *parms)
1497 {
1498         release_net(neigh_parms_net(parms));
1499         kfree(parms);
1500 }
1501
1502 static struct lock_class_key neigh_table_proxy_queue_class;
1503
1504 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505 {
1506         unsigned long now = jiffies;
1507         unsigned long phsize;
1508
1509         write_pnet(&tbl->parms.net, &init_net);
1510         atomic_set(&tbl->parms.refcnt, 1);
1511         tbl->parms.reachable_time =
1512                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513
1514         tbl->stats = alloc_percpu(struct neigh_statistics);
1515         if (!tbl->stats)
1516                 panic("cannot create neighbour cache statistics");
1517
1518 #ifdef CONFIG_PROC_FS
1519         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520                               &neigh_stat_seq_fops, tbl))
1521                 panic("cannot create neighbour proc dir entry");
1522 #endif
1523
1524         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525
1526         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528
1529         if (!tbl->nht || !tbl->phash_buckets)
1530                 panic("cannot allocate neighbour cache hashes");
1531
1532         if (!tbl->entry_size)
1533                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1534                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1535         else
1536                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1537
1538         rwlock_init(&tbl->lock);
1539         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1540         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1541         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542         skb_queue_head_init_class(&tbl->proxy_queue,
1543                         &neigh_table_proxy_queue_class);
1544
1545         tbl->last_flush = now;
1546         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1547 }
1548
1549 void neigh_table_init(struct neigh_table *tbl)
1550 {
1551         struct neigh_table *tmp;
1552
1553         neigh_table_init_no_netlink(tbl);
1554         write_lock(&neigh_tbl_lock);
1555         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1556                 if (tmp->family == tbl->family)
1557                         break;
1558         }
1559         tbl->next       = neigh_tables;
1560         neigh_tables    = tbl;
1561         write_unlock(&neigh_tbl_lock);
1562
1563         if (unlikely(tmp)) {
1564                 pr_err("Registering multiple tables for family %d\n",
1565                        tbl->family);
1566                 dump_stack();
1567         }
1568 }
1569 EXPORT_SYMBOL(neigh_table_init);
1570
1571 int neigh_table_clear(struct neigh_table *tbl)
1572 {
1573         struct neigh_table **tp;
1574
1575         /* It is not clean... Fix it to unload IPv6 module safely */
1576         cancel_delayed_work_sync(&tbl->gc_work);
1577         del_timer_sync(&tbl->proxy_timer);
1578         pneigh_queue_purge(&tbl->proxy_queue);
1579         neigh_ifdown(tbl, NULL);
1580         if (atomic_read(&tbl->entries))
1581                 pr_crit("neighbour leakage\n");
1582         write_lock(&neigh_tbl_lock);
1583         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1584                 if (*tp == tbl) {
1585                         *tp = tbl->next;
1586                         break;
1587                 }
1588         }
1589         write_unlock(&neigh_tbl_lock);
1590
1591         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1592                  neigh_hash_free_rcu);
1593         tbl->nht = NULL;
1594
1595         kfree(tbl->phash_buckets);
1596         tbl->phash_buckets = NULL;
1597
1598         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1599
1600         free_percpu(tbl->stats);
1601         tbl->stats = NULL;
1602
1603         return 0;
1604 }
1605 EXPORT_SYMBOL(neigh_table_clear);
1606
1607 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1608 {
1609         struct net *net = sock_net(skb->sk);
1610         struct ndmsg *ndm;
1611         struct nlattr *dst_attr;
1612         struct neigh_table *tbl;
1613         struct net_device *dev = NULL;
1614         int err = -EINVAL;
1615
1616         ASSERT_RTNL();
1617         if (nlmsg_len(nlh) < sizeof(*ndm))
1618                 goto out;
1619
1620         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1621         if (dst_attr == NULL)
1622                 goto out;
1623
1624         ndm = nlmsg_data(nlh);
1625         if (ndm->ndm_ifindex) {
1626                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1627                 if (dev == NULL) {
1628                         err = -ENODEV;
1629                         goto out;
1630                 }
1631         }
1632
1633         read_lock(&neigh_tbl_lock);
1634         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1635                 struct neighbour *neigh;
1636
1637                 if (tbl->family != ndm->ndm_family)
1638                         continue;
1639                 read_unlock(&neigh_tbl_lock);
1640
1641                 if (nla_len(dst_attr) < tbl->key_len)
1642                         goto out;
1643
1644                 if (ndm->ndm_flags & NTF_PROXY) {
1645                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1646                         goto out;
1647                 }
1648
1649                 if (dev == NULL)
1650                         goto out;
1651
1652                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1653                 if (neigh == NULL) {
1654                         err = -ENOENT;
1655                         goto out;
1656                 }
1657
1658                 err = neigh_update(neigh, NULL, NUD_FAILED,
1659                                    NEIGH_UPDATE_F_OVERRIDE |
1660                                    NEIGH_UPDATE_F_ADMIN);
1661                 neigh_release(neigh);
1662                 goto out;
1663         }
1664         read_unlock(&neigh_tbl_lock);
1665         err = -EAFNOSUPPORT;
1666
1667 out:
1668         return err;
1669 }
1670
1671 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1672 {
1673         struct net *net = sock_net(skb->sk);
1674         struct ndmsg *ndm;
1675         struct nlattr *tb[NDA_MAX+1];
1676         struct neigh_table *tbl;
1677         struct net_device *dev = NULL;
1678         int err;
1679
1680         ASSERT_RTNL();
1681         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1682         if (err < 0)
1683                 goto out;
1684
1685         err = -EINVAL;
1686         if (tb[NDA_DST] == NULL)
1687                 goto out;
1688
1689         ndm = nlmsg_data(nlh);
1690         if (ndm->ndm_ifindex) {
1691                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1692                 if (dev == NULL) {
1693                         err = -ENODEV;
1694                         goto out;
1695                 }
1696
1697                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1698                         goto out;
1699         }
1700
1701         read_lock(&neigh_tbl_lock);
1702         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1703                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1704                 struct neighbour *neigh;
1705                 void *dst, *lladdr;
1706
1707                 if (tbl->family != ndm->ndm_family)
1708                         continue;
1709                 read_unlock(&neigh_tbl_lock);
1710
1711                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1712                         goto out;
1713                 dst = nla_data(tb[NDA_DST]);
1714                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1715
1716                 if (ndm->ndm_flags & NTF_PROXY) {
1717                         struct pneigh_entry *pn;
1718
1719                         err = -ENOBUFS;
1720                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1721                         if (pn) {
1722                                 pn->flags = ndm->ndm_flags;
1723                                 err = 0;
1724                         }
1725                         goto out;
1726                 }
1727
1728                 if (dev == NULL)
1729                         goto out;
1730
1731                 neigh = neigh_lookup(tbl, dst, dev);
1732                 if (neigh == NULL) {
1733                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1734                                 err = -ENOENT;
1735                                 goto out;
1736                         }
1737
1738                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1739                         if (IS_ERR(neigh)) {
1740                                 err = PTR_ERR(neigh);
1741                                 goto out;
1742                         }
1743                 } else {
1744                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1745                                 err = -EEXIST;
1746                                 neigh_release(neigh);
1747                                 goto out;
1748                         }
1749
1750                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1751                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1752                 }
1753
1754                 if (ndm->ndm_flags & NTF_USE) {
1755                         neigh_event_send(neigh, NULL);
1756                         err = 0;
1757                 } else
1758                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1759                 neigh_release(neigh);
1760                 goto out;
1761         }
1762
1763         read_unlock(&neigh_tbl_lock);
1764         err = -EAFNOSUPPORT;
1765 out:
1766         return err;
1767 }
1768
1769 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1770 {
1771         struct nlattr *nest;
1772
1773         nest = nla_nest_start(skb, NDTA_PARMS);
1774         if (nest == NULL)
1775                 return -ENOBUFS;
1776
1777         if ((parms->dev &&
1778              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1779             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1780             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1781             /* approximative value for deprecated QUEUE_LEN (in packets) */
1782             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1785             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1786             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1787             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1788             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1789             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1790                           parms->base_reachable_time) ||
1791             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1792             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1793                           parms->delay_probe_time) ||
1794             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1795             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1796             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1797             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1798                 goto nla_put_failure;
1799         return nla_nest_end(skb, nest);
1800
1801 nla_put_failure:
1802         nla_nest_cancel(skb, nest);
1803         return -EMSGSIZE;
1804 }
1805
1806 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1807                               u32 pid, u32 seq, int type, int flags)
1808 {
1809         struct nlmsghdr *nlh;
1810         struct ndtmsg *ndtmsg;
1811
1812         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1813         if (nlh == NULL)
1814                 return -EMSGSIZE;
1815
1816         ndtmsg = nlmsg_data(nlh);
1817
1818         read_lock_bh(&tbl->lock);
1819         ndtmsg->ndtm_family = tbl->family;
1820         ndtmsg->ndtm_pad1   = 0;
1821         ndtmsg->ndtm_pad2   = 0;
1822
1823         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1824             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1825             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1826             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1827             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1828                 goto nla_put_failure;
1829         {
1830                 unsigned long now = jiffies;
1831                 unsigned int flush_delta = now - tbl->last_flush;
1832                 unsigned int rand_delta = now - tbl->last_rand;
1833                 struct neigh_hash_table *nht;
1834                 struct ndt_config ndc = {
1835                         .ndtc_key_len           = tbl->key_len,
1836                         .ndtc_entry_size        = tbl->entry_size,
1837                         .ndtc_entries           = atomic_read(&tbl->entries),
1838                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1839                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1840                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1841                 };
1842
1843                 rcu_read_lock_bh();
1844                 nht = rcu_dereference_bh(tbl->nht);
1845                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1846                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1847                 rcu_read_unlock_bh();
1848
1849                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1850                         goto nla_put_failure;
1851         }
1852
1853         {
1854                 int cpu;
1855                 struct ndt_stats ndst;
1856
1857                 memset(&ndst, 0, sizeof(ndst));
1858
1859                 for_each_possible_cpu(cpu) {
1860                         struct neigh_statistics *st;
1861
1862                         st = per_cpu_ptr(tbl->stats, cpu);
1863                         ndst.ndts_allocs                += st->allocs;
1864                         ndst.ndts_destroys              += st->destroys;
1865                         ndst.ndts_hash_grows            += st->hash_grows;
1866                         ndst.ndts_res_failed            += st->res_failed;
1867                         ndst.ndts_lookups               += st->lookups;
1868                         ndst.ndts_hits                  += st->hits;
1869                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1870                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1871                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1872                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1873                 }
1874
1875                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1876                         goto nla_put_failure;
1877         }
1878
1879         BUG_ON(tbl->parms.dev);
1880         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1881                 goto nla_put_failure;
1882
1883         read_unlock_bh(&tbl->lock);
1884         return nlmsg_end(skb, nlh);
1885
1886 nla_put_failure:
1887         read_unlock_bh(&tbl->lock);
1888         nlmsg_cancel(skb, nlh);
1889         return -EMSGSIZE;
1890 }
1891
1892 static int neightbl_fill_param_info(struct sk_buff *skb,
1893                                     struct neigh_table *tbl,
1894                                     struct neigh_parms *parms,
1895                                     u32 pid, u32 seq, int type,
1896                                     unsigned int flags)
1897 {
1898         struct ndtmsg *ndtmsg;
1899         struct nlmsghdr *nlh;
1900
1901         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1902         if (nlh == NULL)
1903                 return -EMSGSIZE;
1904
1905         ndtmsg = nlmsg_data(nlh);
1906
1907         read_lock_bh(&tbl->lock);
1908         ndtmsg->ndtm_family = tbl->family;
1909         ndtmsg->ndtm_pad1   = 0;
1910         ndtmsg->ndtm_pad2   = 0;
1911
1912         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1913             neightbl_fill_parms(skb, parms) < 0)
1914                 goto errout;
1915
1916         read_unlock_bh(&tbl->lock);
1917         return nlmsg_end(skb, nlh);
1918 errout:
1919         read_unlock_bh(&tbl->lock);
1920         nlmsg_cancel(skb, nlh);
1921         return -EMSGSIZE;
1922 }
1923
1924 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1925         [NDTA_NAME]             = { .type = NLA_STRING },
1926         [NDTA_THRESH1]          = { .type = NLA_U32 },
1927         [NDTA_THRESH2]          = { .type = NLA_U32 },
1928         [NDTA_THRESH3]          = { .type = NLA_U32 },
1929         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1930         [NDTA_PARMS]            = { .type = NLA_NESTED },
1931 };
1932
1933 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1934         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1935         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1936         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1937         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1938         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1939         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1940         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1941         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1942         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1943         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1944         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1945         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1946         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1947 };
1948
1949 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1950 {
1951         struct net *net = sock_net(skb->sk);
1952         struct neigh_table *tbl;
1953         struct ndtmsg *ndtmsg;
1954         struct nlattr *tb[NDTA_MAX+1];
1955         int err;
1956
1957         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1958                           nl_neightbl_policy);
1959         if (err < 0)
1960                 goto errout;
1961
1962         if (tb[NDTA_NAME] == NULL) {
1963                 err = -EINVAL;
1964                 goto errout;
1965         }
1966
1967         ndtmsg = nlmsg_data(nlh);
1968         read_lock(&neigh_tbl_lock);
1969         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1970                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1971                         continue;
1972
1973                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1974                         break;
1975         }
1976
1977         if (tbl == NULL) {
1978                 err = -ENOENT;
1979                 goto errout_locked;
1980         }
1981
1982         /*
1983          * We acquire tbl->lock to be nice to the periodic timers and
1984          * make sure they always see a consistent set of values.
1985          */
1986         write_lock_bh(&tbl->lock);
1987
1988         if (tb[NDTA_PARMS]) {
1989                 struct nlattr *tbp[NDTPA_MAX+1];
1990                 struct neigh_parms *p;
1991                 int i, ifindex = 0;
1992
1993                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1994                                        nl_ntbl_parm_policy);
1995                 if (err < 0)
1996                         goto errout_tbl_lock;
1997
1998                 if (tbp[NDTPA_IFINDEX])
1999                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2000
2001                 p = lookup_neigh_parms(tbl, net, ifindex);
2002                 if (p == NULL) {
2003                         err = -ENOENT;
2004                         goto errout_tbl_lock;
2005                 }
2006
2007                 for (i = 1; i <= NDTPA_MAX; i++) {
2008                         if (tbp[i] == NULL)
2009                                 continue;
2010
2011                         switch (i) {
2012                         case NDTPA_QUEUE_LEN:
2013                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2014                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2015                                 break;
2016                         case NDTPA_QUEUE_LENBYTES:
2017                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2018                                 break;
2019                         case NDTPA_PROXY_QLEN:
2020                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2021                                 break;
2022                         case NDTPA_APP_PROBES:
2023                                 p->app_probes = nla_get_u32(tbp[i]);
2024                                 break;
2025                         case NDTPA_UCAST_PROBES:
2026                                 p->ucast_probes = nla_get_u32(tbp[i]);
2027                                 break;
2028                         case NDTPA_MCAST_PROBES:
2029                                 p->mcast_probes = nla_get_u32(tbp[i]);
2030                                 break;
2031                         case NDTPA_BASE_REACHABLE_TIME:
2032                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2033                                 break;
2034                         case NDTPA_GC_STALETIME:
2035                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2036                                 break;
2037                         case NDTPA_DELAY_PROBE_TIME:
2038                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2039                                 break;
2040                         case NDTPA_RETRANS_TIME:
2041                                 p->retrans_time = nla_get_msecs(tbp[i]);
2042                                 break;
2043                         case NDTPA_ANYCAST_DELAY:
2044                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2045                                 break;
2046                         case NDTPA_PROXY_DELAY:
2047                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2048                                 break;
2049                         case NDTPA_LOCKTIME:
2050                                 p->locktime = nla_get_msecs(tbp[i]);
2051                                 break;
2052                         }
2053                 }
2054         }
2055
2056         if (tb[NDTA_THRESH1])
2057                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2058
2059         if (tb[NDTA_THRESH2])
2060                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2061
2062         if (tb[NDTA_THRESH3])
2063                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2064
2065         if (tb[NDTA_GC_INTERVAL])
2066                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2067
2068         err = 0;
2069
2070 errout_tbl_lock:
2071         write_unlock_bh(&tbl->lock);
2072 errout_locked:
2073         read_unlock(&neigh_tbl_lock);
2074 errout:
2075         return err;
2076 }
2077
2078 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2079 {
2080         struct net *net = sock_net(skb->sk);
2081         int family, tidx, nidx = 0;
2082         int tbl_skip = cb->args[0];
2083         int neigh_skip = cb->args[1];
2084         struct neigh_table *tbl;
2085
2086         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2087
2088         read_lock(&neigh_tbl_lock);
2089         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2090                 struct neigh_parms *p;
2091
2092                 if (tidx < tbl_skip || (family && tbl->family != family))
2093                         continue;
2094
2095                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2096                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2097                                        NLM_F_MULTI) <= 0)
2098                         break;
2099
2100                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2101                         if (!net_eq(neigh_parms_net(p), net))
2102                                 continue;
2103
2104                         if (nidx < neigh_skip)
2105                                 goto next;
2106
2107                         if (neightbl_fill_param_info(skb, tbl, p,
2108                                                      NETLINK_CB(cb->skb).portid,
2109                                                      cb->nlh->nlmsg_seq,
2110                                                      RTM_NEWNEIGHTBL,
2111                                                      NLM_F_MULTI) <= 0)
2112                                 goto out;
2113                 next:
2114                         nidx++;
2115                 }
2116
2117                 neigh_skip = 0;
2118         }
2119 out:
2120         read_unlock(&neigh_tbl_lock);
2121         cb->args[0] = tidx;
2122         cb->args[1] = nidx;
2123
2124         return skb->len;
2125 }
2126
2127 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2128                            u32 pid, u32 seq, int type, unsigned int flags)
2129 {
2130         unsigned long now = jiffies;
2131         struct nda_cacheinfo ci;
2132         struct nlmsghdr *nlh;
2133         struct ndmsg *ndm;
2134
2135         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2136         if (nlh == NULL)
2137                 return -EMSGSIZE;
2138
2139         ndm = nlmsg_data(nlh);
2140         ndm->ndm_family  = neigh->ops->family;
2141         ndm->ndm_pad1    = 0;
2142         ndm->ndm_pad2    = 0;
2143         ndm->ndm_flags   = neigh->flags;
2144         ndm->ndm_type    = neigh->type;
2145         ndm->ndm_ifindex = neigh->dev->ifindex;
2146
2147         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2148                 goto nla_put_failure;
2149
2150         read_lock_bh(&neigh->lock);
2151         ndm->ndm_state   = neigh->nud_state;
2152         if (neigh->nud_state & NUD_VALID) {
2153                 char haddr[MAX_ADDR_LEN];
2154
2155                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2156                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2157                         read_unlock_bh(&neigh->lock);
2158                         goto nla_put_failure;
2159                 }
2160         }
2161
2162         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2163         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2164         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2165         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2166         read_unlock_bh(&neigh->lock);
2167
2168         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2169             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2170                 goto nla_put_failure;
2171
2172         return nlmsg_end(skb, nlh);
2173
2174 nla_put_failure:
2175         nlmsg_cancel(skb, nlh);
2176         return -EMSGSIZE;
2177 }
2178
2179 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2180                             u32 pid, u32 seq, int type, unsigned int flags,
2181                             struct neigh_table *tbl)
2182 {
2183         struct nlmsghdr *nlh;
2184         struct ndmsg *ndm;
2185
2186         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2187         if (nlh == NULL)
2188                 return -EMSGSIZE;
2189
2190         ndm = nlmsg_data(nlh);
2191         ndm->ndm_family  = tbl->family;
2192         ndm->ndm_pad1    = 0;
2193         ndm->ndm_pad2    = 0;
2194         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2195         ndm->ndm_type    = NDA_DST;
2196         ndm->ndm_ifindex = pn->dev->ifindex;
2197         ndm->ndm_state   = NUD_NONE;
2198
2199         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2200                 goto nla_put_failure;
2201
2202         return nlmsg_end(skb, nlh);
2203
2204 nla_put_failure:
2205         nlmsg_cancel(skb, nlh);
2206         return -EMSGSIZE;
2207 }
2208
2209 static void neigh_update_notify(struct neighbour *neigh)
2210 {
2211         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2212         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2213 }
2214
2215 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2216                             struct netlink_callback *cb)
2217 {
2218         struct net *net = sock_net(skb->sk);
2219         struct neighbour *n;
2220         int rc, h, s_h = cb->args[1];
2221         int idx, s_idx = idx = cb->args[2];
2222         struct neigh_hash_table *nht;
2223
2224         rcu_read_lock_bh();
2225         nht = rcu_dereference_bh(tbl->nht);
2226
2227         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2228                 if (h > s_h)
2229                         s_idx = 0;
2230                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2231                      n != NULL;
2232                      n = rcu_dereference_bh(n->next)) {
2233                         if (!net_eq(dev_net(n->dev), net))
2234                                 continue;
2235                         if (idx < s_idx)
2236                                 goto next;
2237                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2238                                             cb->nlh->nlmsg_seq,
2239                                             RTM_NEWNEIGH,
2240                                             NLM_F_MULTI) <= 0) {
2241                                 rc = -1;
2242                                 goto out;
2243                         }
2244 next:
2245                         idx++;
2246                 }
2247         }
2248         rc = skb->len;
2249 out:
2250         rcu_read_unlock_bh();
2251         cb->args[1] = h;
2252         cb->args[2] = idx;
2253         return rc;
2254 }
2255
2256 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2257                              struct netlink_callback *cb)
2258 {
2259         struct pneigh_entry *n;
2260         struct net *net = sock_net(skb->sk);
2261         int rc, h, s_h = cb->args[3];
2262         int idx, s_idx = idx = cb->args[4];
2263
2264         read_lock_bh(&tbl->lock);
2265
2266         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2267                 if (h > s_h)
2268                         s_idx = 0;
2269                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2270                         if (dev_net(n->dev) != net)
2271                                 continue;
2272                         if (idx < s_idx)
2273                                 goto next;
2274                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2275                                             cb->nlh->nlmsg_seq,
2276                                             RTM_NEWNEIGH,
2277                                             NLM_F_MULTI, tbl) <= 0) {
2278                                 read_unlock_bh(&tbl->lock);
2279                                 rc = -1;
2280                                 goto out;
2281                         }
2282                 next:
2283                         idx++;
2284                 }
2285         }
2286
2287         read_unlock_bh(&tbl->lock);
2288         rc = skb->len;
2289 out:
2290         cb->args[3] = h;
2291         cb->args[4] = idx;
2292         return rc;
2293
2294 }
2295
2296 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2297 {
2298         struct neigh_table *tbl;
2299         int t, family, s_t;
2300         int proxy = 0;
2301         int err;
2302
2303         read_lock(&neigh_tbl_lock);
2304         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2305
2306         /* check for full ndmsg structure presence, family member is
2307          * the same for both structures
2308          */
2309         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2310             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2311                 proxy = 1;
2312
2313         s_t = cb->args[0];
2314
2315         for (tbl = neigh_tables, t = 0; tbl;
2316              tbl = tbl->next, t++) {
2317                 if (t < s_t || (family && tbl->family != family))
2318                         continue;
2319                 if (t > s_t)
2320                         memset(&cb->args[1], 0, sizeof(cb->args) -
2321                                                 sizeof(cb->args[0]));
2322                 if (proxy)
2323                         err = pneigh_dump_table(tbl, skb, cb);
2324                 else
2325                         err = neigh_dump_table(tbl, skb, cb);
2326                 if (err < 0)
2327                         break;
2328         }
2329         read_unlock(&neigh_tbl_lock);
2330
2331         cb->args[0] = t;
2332         return skb->len;
2333 }
2334
2335 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2336 {
2337         int chain;
2338         struct neigh_hash_table *nht;
2339
2340         rcu_read_lock_bh();
2341         nht = rcu_dereference_bh(tbl->nht);
2342
2343         read_lock(&tbl->lock); /* avoid resizes */
2344         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2345                 struct neighbour *n;
2346
2347                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2348                      n != NULL;
2349                      n = rcu_dereference_bh(n->next))
2350                         cb(n, cookie);
2351         }
2352         read_unlock(&tbl->lock);
2353         rcu_read_unlock_bh();
2354 }
2355 EXPORT_SYMBOL(neigh_for_each);
2356
2357 /* The tbl->lock must be held as a writer and BH disabled. */
2358 void __neigh_for_each_release(struct neigh_table *tbl,
2359                               int (*cb)(struct neighbour *))
2360 {
2361         int chain;
2362         struct neigh_hash_table *nht;
2363
2364         nht = rcu_dereference_protected(tbl->nht,
2365                                         lockdep_is_held(&tbl->lock));
2366         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2367                 struct neighbour *n;
2368                 struct neighbour __rcu **np;
2369
2370                 np = &nht->hash_buckets[chain];
2371                 while ((n = rcu_dereference_protected(*np,
2372                                         lockdep_is_held(&tbl->lock))) != NULL) {
2373                         int release;
2374
2375                         write_lock(&n->lock);
2376                         release = cb(n);
2377                         if (release) {
2378                                 rcu_assign_pointer(*np,
2379                                         rcu_dereference_protected(n->next,
2380                                                 lockdep_is_held(&tbl->lock)));
2381                                 n->dead = 1;
2382                         } else
2383                                 np = &n->next;
2384                         write_unlock(&n->lock);
2385                         if (release)
2386                                 neigh_cleanup_and_release(n);
2387                 }
2388         }
2389 }
2390 EXPORT_SYMBOL(__neigh_for_each_release);
2391
2392 #ifdef CONFIG_PROC_FS
2393
2394 static struct neighbour *neigh_get_first(struct seq_file *seq)
2395 {
2396         struct neigh_seq_state *state = seq->private;
2397         struct net *net = seq_file_net(seq);
2398         struct neigh_hash_table *nht = state->nht;
2399         struct neighbour *n = NULL;
2400         int bucket = state->bucket;
2401
2402         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2403         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2404                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2405
2406                 while (n) {
2407                         if (!net_eq(dev_net(n->dev), net))
2408                                 goto next;
2409                         if (state->neigh_sub_iter) {
2410                                 loff_t fakep = 0;
2411                                 void *v;
2412
2413                                 v = state->neigh_sub_iter(state, n, &fakep);
2414                                 if (!v)
2415                                         goto next;
2416                         }
2417                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2418                                 break;
2419                         if (n->nud_state & ~NUD_NOARP)
2420                                 break;
2421 next:
2422                         n = rcu_dereference_bh(n->next);
2423                 }
2424
2425                 if (n)
2426                         break;
2427         }
2428         state->bucket = bucket;
2429
2430         return n;
2431 }
2432
2433 static struct neighbour *neigh_get_next(struct seq_file *seq,
2434                                         struct neighbour *n,
2435                                         loff_t *pos)
2436 {
2437         struct neigh_seq_state *state = seq->private;
2438         struct net *net = seq_file_net(seq);
2439         struct neigh_hash_table *nht = state->nht;
2440
2441         if (state->neigh_sub_iter) {
2442                 void *v = state->neigh_sub_iter(state, n, pos);
2443                 if (v)
2444                         return n;
2445         }
2446         n = rcu_dereference_bh(n->next);
2447
2448         while (1) {
2449                 while (n) {
2450                         if (!net_eq(dev_net(n->dev), net))
2451                                 goto next;
2452                         if (state->neigh_sub_iter) {
2453                                 void *v = state->neigh_sub_iter(state, n, pos);
2454                                 if (v)
2455                                         return n;
2456                                 goto next;
2457                         }
2458                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2459                                 break;
2460
2461                         if (n->nud_state & ~NUD_NOARP)
2462                                 break;
2463 next:
2464                         n = rcu_dereference_bh(n->next);
2465                 }
2466
2467                 if (n)
2468                         break;
2469
2470                 if (++state->bucket >= (1 << nht->hash_shift))
2471                         break;
2472
2473                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2474         }
2475
2476         if (n && pos)
2477                 --(*pos);
2478         return n;
2479 }
2480
2481 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2482 {
2483         struct neighbour *n = neigh_get_first(seq);
2484
2485         if (n) {
2486                 --(*pos);
2487                 while (*pos) {
2488                         n = neigh_get_next(seq, n, pos);
2489                         if (!n)
2490                                 break;
2491                 }
2492         }
2493         return *pos ? NULL : n;
2494 }
2495
2496 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2497 {
2498         struct neigh_seq_state *state = seq->private;
2499         struct net *net = seq_file_net(seq);
2500         struct neigh_table *tbl = state->tbl;
2501         struct pneigh_entry *pn = NULL;
2502         int bucket = state->bucket;
2503
2504         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2505         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2506                 pn = tbl->phash_buckets[bucket];
2507                 while (pn && !net_eq(pneigh_net(pn), net))
2508                         pn = pn->next;
2509                 if (pn)
2510                         break;
2511         }
2512         state->bucket = bucket;
2513
2514         return pn;
2515 }
2516
2517 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2518                                             struct pneigh_entry *pn,
2519                                             loff_t *pos)
2520 {
2521         struct neigh_seq_state *state = seq->private;
2522         struct net *net = seq_file_net(seq);
2523         struct neigh_table *tbl = state->tbl;
2524
2525         do {
2526                 pn = pn->next;
2527         } while (pn && !net_eq(pneigh_net(pn), net));
2528
2529         while (!pn) {
2530                 if (++state->bucket > PNEIGH_HASHMASK)
2531                         break;
2532                 pn = tbl->phash_buckets[state->bucket];
2533                 while (pn && !net_eq(pneigh_net(pn), net))
2534                         pn = pn->next;
2535                 if (pn)
2536                         break;
2537         }
2538
2539         if (pn && pos)
2540                 --(*pos);
2541
2542         return pn;
2543 }
2544
2545 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2546 {
2547         struct pneigh_entry *pn = pneigh_get_first(seq);
2548
2549         if (pn) {
2550                 --(*pos);
2551                 while (*pos) {
2552                         pn = pneigh_get_next(seq, pn, pos);
2553                         if (!pn)
2554                                 break;
2555                 }
2556         }
2557         return *pos ? NULL : pn;
2558 }
2559
2560 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2561 {
2562         struct neigh_seq_state *state = seq->private;
2563         void *rc;
2564         loff_t idxpos = *pos;
2565
2566         rc = neigh_get_idx(seq, &idxpos);
2567         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2568                 rc = pneigh_get_idx(seq, &idxpos);
2569
2570         return rc;
2571 }
2572
2573 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2574         __acquires(rcu_bh)
2575 {
2576         struct neigh_seq_state *state = seq->private;
2577
2578         state->tbl = tbl;
2579         state->bucket = 0;
2580         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2581
2582         rcu_read_lock_bh();
2583         state->nht = rcu_dereference_bh(tbl->nht);
2584
2585         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2586 }
2587 EXPORT_SYMBOL(neigh_seq_start);
2588
2589 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2590 {
2591         struct neigh_seq_state *state;
2592         void *rc;
2593
2594         if (v == SEQ_START_TOKEN) {
2595                 rc = neigh_get_first(seq);
2596                 goto out;
2597         }
2598
2599         state = seq->private;
2600         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2601                 rc = neigh_get_next(seq, v, NULL);
2602                 if (rc)
2603                         goto out;
2604                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2605                         rc = pneigh_get_first(seq);
2606         } else {
2607                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2608                 rc = pneigh_get_next(seq, v, NULL);
2609         }
2610 out:
2611         ++(*pos);
2612         return rc;
2613 }
2614 EXPORT_SYMBOL(neigh_seq_next);
2615
2616 void neigh_seq_stop(struct seq_file *seq, void *v)
2617         __releases(rcu_bh)
2618 {
2619         rcu_read_unlock_bh();
2620 }
2621 EXPORT_SYMBOL(neigh_seq_stop);
2622
2623 /* statistics via seq_file */
2624
2625 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2626 {
2627         struct neigh_table *tbl = seq->private;
2628         int cpu;
2629
2630         if (*pos == 0)
2631                 return SEQ_START_TOKEN;
2632
2633         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2634                 if (!cpu_possible(cpu))
2635                         continue;
2636                 *pos = cpu+1;
2637                 return per_cpu_ptr(tbl->stats, cpu);
2638         }
2639         return NULL;
2640 }
2641
2642 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2643 {
2644         struct neigh_table *tbl = seq->private;
2645         int cpu;
2646
2647         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2648                 if (!cpu_possible(cpu))
2649                         continue;
2650                 *pos = cpu+1;
2651                 return per_cpu_ptr(tbl->stats, cpu);
2652         }
2653         return NULL;
2654 }
2655
2656 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2657 {
2658
2659 }
2660
2661 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2662 {
2663         struct neigh_table *tbl = seq->private;
2664         struct neigh_statistics *st = v;
2665
2666         if (v == SEQ_START_TOKEN) {
2667                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2668                 return 0;
2669         }
2670
2671         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2672                         "%08lx %08lx  %08lx %08lx %08lx\n",
2673                    atomic_read(&tbl->entries),
2674
2675                    st->allocs,
2676                    st->destroys,
2677                    st->hash_grows,
2678
2679                    st->lookups,
2680                    st->hits,
2681
2682                    st->res_failed,
2683
2684                    st->rcv_probes_mcast,
2685                    st->rcv_probes_ucast,
2686
2687                    st->periodic_gc_runs,
2688                    st->forced_gc_runs,
2689                    st->unres_discards
2690                    );
2691
2692         return 0;
2693 }
2694
2695 static const struct seq_operations neigh_stat_seq_ops = {
2696         .start  = neigh_stat_seq_start,
2697         .next   = neigh_stat_seq_next,
2698         .stop   = neigh_stat_seq_stop,
2699         .show   = neigh_stat_seq_show,
2700 };
2701
2702 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2703 {
2704         int ret = seq_open(file, &neigh_stat_seq_ops);
2705
2706         if (!ret) {
2707                 struct seq_file *sf = file->private_data;
2708                 sf->private = PDE_DATA(inode);
2709         }
2710         return ret;
2711 };
2712
2713 static const struct file_operations neigh_stat_seq_fops = {
2714         .owner   = THIS_MODULE,
2715         .open    = neigh_stat_seq_open,
2716         .read    = seq_read,
2717         .llseek  = seq_lseek,
2718         .release = seq_release,
2719 };
2720
2721 #endif /* CONFIG_PROC_FS */
2722
2723 static inline size_t neigh_nlmsg_size(void)
2724 {
2725         return NLMSG_ALIGN(sizeof(struct ndmsg))
2726                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2727                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2728                + nla_total_size(sizeof(struct nda_cacheinfo))
2729                + nla_total_size(4); /* NDA_PROBES */
2730 }
2731
2732 static void __neigh_notify(struct neighbour *n, int type, int flags)
2733 {
2734         struct net *net = dev_net(n->dev);
2735         struct sk_buff *skb;
2736         int err = -ENOBUFS;
2737
2738         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2739         if (skb == NULL)
2740                 goto errout;
2741
2742         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2743         if (err < 0) {
2744                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2745                 WARN_ON(err == -EMSGSIZE);
2746                 kfree_skb(skb);
2747                 goto errout;
2748         }
2749         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2750         return;
2751 errout:
2752         if (err < 0)
2753                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2754 }
2755
2756 #ifdef CONFIG_ARPD
2757 void neigh_app_ns(struct neighbour *n)
2758 {
2759         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2760 }
2761 EXPORT_SYMBOL(neigh_app_ns);
2762 #endif /* CONFIG_ARPD */
2763
2764 #ifdef CONFIG_SYSCTL
2765 static int zero;
2766 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2767
2768 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2769                            size_t *lenp, loff_t *ppos)
2770 {
2771         int size, ret;
2772         ctl_table tmp = *ctl;
2773
2774         tmp.extra1 = &zero;
2775         tmp.extra2 = &unres_qlen_max;
2776         tmp.data = &size;
2777
2778         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2779         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2780
2781         if (write && !ret)
2782                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2783         return ret;
2784 }
2785
2786 enum {
2787         NEIGH_VAR_MCAST_PROBE,
2788         NEIGH_VAR_UCAST_PROBE,
2789         NEIGH_VAR_APP_PROBE,
2790         NEIGH_VAR_RETRANS_TIME,
2791         NEIGH_VAR_BASE_REACHABLE_TIME,
2792         NEIGH_VAR_DELAY_PROBE_TIME,
2793         NEIGH_VAR_GC_STALETIME,
2794         NEIGH_VAR_QUEUE_LEN,
2795         NEIGH_VAR_QUEUE_LEN_BYTES,
2796         NEIGH_VAR_PROXY_QLEN,
2797         NEIGH_VAR_ANYCAST_DELAY,
2798         NEIGH_VAR_PROXY_DELAY,
2799         NEIGH_VAR_LOCKTIME,
2800         NEIGH_VAR_RETRANS_TIME_MS,
2801         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2802         NEIGH_VAR_GC_INTERVAL,
2803         NEIGH_VAR_GC_THRESH1,
2804         NEIGH_VAR_GC_THRESH2,
2805         NEIGH_VAR_GC_THRESH3,
2806         NEIGH_VAR_MAX
2807 };
2808
2809 static struct neigh_sysctl_table {
2810         struct ctl_table_header *sysctl_header;
2811         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2812 } neigh_sysctl_template __read_mostly = {
2813         .neigh_vars = {
2814                 [NEIGH_VAR_MCAST_PROBE] = {
2815                         .procname       = "mcast_solicit",
2816                         .maxlen         = sizeof(int),
2817                         .mode           = 0644,
2818                         .proc_handler   = proc_dointvec,
2819                 },
2820                 [NEIGH_VAR_UCAST_PROBE] = {
2821                         .procname       = "ucast_solicit",
2822                         .maxlen         = sizeof(int),
2823                         .mode           = 0644,
2824                         .proc_handler   = proc_dointvec,
2825                 },
2826                 [NEIGH_VAR_APP_PROBE] = {
2827                         .procname       = "app_solicit",
2828                         .maxlen         = sizeof(int),
2829                         .mode           = 0644,
2830                         .proc_handler   = proc_dointvec,
2831                 },
2832                 [NEIGH_VAR_RETRANS_TIME] = {
2833                         .procname       = "retrans_time",
2834                         .maxlen         = sizeof(int),
2835                         .mode           = 0644,
2836                         .proc_handler   = proc_dointvec_userhz_jiffies,
2837                 },
2838                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2839                         .procname       = "base_reachable_time",
2840                         .maxlen         = sizeof(int),
2841                         .mode           = 0644,
2842                         .proc_handler   = proc_dointvec_jiffies,
2843                 },
2844                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2845                         .procname       = "delay_first_probe_time",
2846                         .maxlen         = sizeof(int),
2847                         .mode           = 0644,
2848                         .proc_handler   = proc_dointvec_jiffies,
2849                 },
2850                 [NEIGH_VAR_GC_STALETIME] = {
2851                         .procname       = "gc_stale_time",
2852                         .maxlen         = sizeof(int),
2853                         .mode           = 0644,
2854                         .proc_handler   = proc_dointvec_jiffies,
2855                 },
2856                 [NEIGH_VAR_QUEUE_LEN] = {
2857                         .procname       = "unres_qlen",
2858                         .maxlen         = sizeof(int),
2859                         .mode           = 0644,
2860                         .proc_handler   = proc_unres_qlen,
2861                 },
2862                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2863                         .procname       = "unres_qlen_bytes",
2864                         .maxlen         = sizeof(int),
2865                         .mode           = 0644,
2866                         .extra1         = &zero,
2867                         .proc_handler   = proc_dointvec_minmax,
2868                 },
2869                 [NEIGH_VAR_PROXY_QLEN] = {
2870                         .procname       = "proxy_qlen",
2871                         .maxlen         = sizeof(int),
2872                         .mode           = 0644,
2873                         .proc_handler   = proc_dointvec,
2874                 },
2875                 [NEIGH_VAR_ANYCAST_DELAY] = {
2876                         .procname       = "anycast_delay",
2877                         .maxlen         = sizeof(int),
2878                         .mode           = 0644,
2879                         .proc_handler   = proc_dointvec_userhz_jiffies,
2880                 },
2881                 [NEIGH_VAR_PROXY_DELAY] = {
2882                         .procname       = "proxy_delay",
2883                         .maxlen         = sizeof(int),
2884                         .mode           = 0644,
2885                         .proc_handler   = proc_dointvec_userhz_jiffies,
2886                 },
2887                 [NEIGH_VAR_LOCKTIME] = {
2888                         .procname       = "locktime",
2889                         .maxlen         = sizeof(int),
2890                         .mode           = 0644,
2891                         .proc_handler   = proc_dointvec_userhz_jiffies,
2892                 },
2893                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2894                         .procname       = "retrans_time_ms",
2895                         .maxlen         = sizeof(int),
2896                         .mode           = 0644,
2897                         .proc_handler   = proc_dointvec_ms_jiffies,
2898                 },
2899                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2900                         .procname       = "base_reachable_time_ms",
2901                         .maxlen         = sizeof(int),
2902                         .mode           = 0644,
2903                         .proc_handler   = proc_dointvec_ms_jiffies,
2904                 },
2905                 [NEIGH_VAR_GC_INTERVAL] = {
2906                         .procname       = "gc_interval",
2907                         .maxlen         = sizeof(int),
2908                         .mode           = 0644,
2909                         .proc_handler   = proc_dointvec_jiffies,
2910                 },
2911                 [NEIGH_VAR_GC_THRESH1] = {
2912                         .procname       = "gc_thresh1",
2913                         .maxlen         = sizeof(int),
2914                         .mode           = 0644,
2915                         .proc_handler   = proc_dointvec,
2916                 },
2917                 [NEIGH_VAR_GC_THRESH2] = {
2918                         .procname       = "gc_thresh2",
2919                         .maxlen         = sizeof(int),
2920                         .mode           = 0644,
2921                         .proc_handler   = proc_dointvec,
2922                 },
2923                 [NEIGH_VAR_GC_THRESH3] = {
2924                         .procname       = "gc_thresh3",
2925                         .maxlen         = sizeof(int),
2926                         .mode           = 0644,
2927                         .proc_handler   = proc_dointvec,
2928                 },
2929                 {},
2930         },
2931 };
2932
2933 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2934                           char *p_name, proc_handler *handler)
2935 {
2936         struct neigh_sysctl_table *t;
2937         const char *dev_name_source = NULL;
2938         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2939
2940         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2941         if (!t)
2942                 goto err;
2943
2944         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2945         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2946         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2947         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2948         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2949         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2950         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2951         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2952         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2953         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2954         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2955         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2956         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2957         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2958         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2959
2960         if (dev) {
2961                 dev_name_source = dev->name;
2962                 /* Terminate the table early */
2963                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2964                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2965         } else {
2966                 dev_name_source = "default";
2967                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2968                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2969                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2970                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2971         }
2972
2973
2974         if (handler) {
2975                 /* RetransTime */
2976                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2977                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2978                 /* ReachableTime */
2979                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2980                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2981                 /* RetransTime (in milliseconds)*/
2982                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2983                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2984                 /* ReachableTime (in milliseconds) */
2985                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2986                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2987         }
2988
2989         /* Don't export sysctls to unprivileged users */
2990         if (neigh_parms_net(p)->user_ns != &init_user_ns)
2991                 t->neigh_vars[0].procname = NULL;
2992
2993         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2994                 p_name, dev_name_source);
2995         t->sysctl_header =
2996                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2997         if (!t->sysctl_header)
2998                 goto free;
2999
3000         p->sysctl_table = t;
3001         return 0;
3002
3003 free:
3004         kfree(t);
3005 err:
3006         return -ENOBUFS;
3007 }
3008 EXPORT_SYMBOL(neigh_sysctl_register);
3009
3010 void neigh_sysctl_unregister(struct neigh_parms *p)
3011 {
3012         if (p->sysctl_table) {
3013                 struct neigh_sysctl_table *t = p->sysctl_table;
3014                 p->sysctl_table = NULL;
3015                 unregister_net_sysctl_table(t->sysctl_header);
3016                 kfree(t);
3017         }
3018 }
3019 EXPORT_SYMBOL(neigh_sysctl_unregister);
3020
3021 #endif  /* CONFIG_SYSCTL */
3022
3023 static int __init neigh_init(void)
3024 {
3025         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3026         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3027         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3028
3029         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3030                       NULL);
3031         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3032
3033         return 0;
3034 }
3035
3036 subsys_initcall(neigh_init);
3037