Merge remote-tracking branch 'lsk/v3.10/topic/arm64-be' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -2,
70         RT6_NUD_FAIL_SOFT = -1,
71         RT6_NUD_SUCCEED = 1
72 };
73
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75                                     const struct in6_addr *dest);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int      ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void             ip6_dst_destroy(struct dst_entry *);
81 static void             ip6_dst_ifdown(struct dst_entry *,
82                                        struct net_device *dev, int how);
83 static int               ip6_dst_gc(struct dst_ops *ops);
84
85 static int              ip6_pkt_discard(struct sk_buff *skb);
86 static int              ip6_pkt_discard_out(struct sk_buff *skb);
87 static int              ip6_pkt_prohibit(struct sk_buff *skb);
88 static int              ip6_pkt_prohibit_out(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
91                                            struct sk_buff *skb, u32 mtu);
92 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
93                                         struct sk_buff *skb);
94
95 #ifdef CONFIG_IPV6_ROUTE_INFO
96 static struct rt6_info *rt6_add_route_info(struct net *net,
97                                            const struct in6_addr *prefix, int prefixlen,
98                                            const struct in6_addr *gwaddr, int ifindex,
99                                            unsigned int pref);
100 static struct rt6_info *rt6_get_route_info(struct net *net,
101                                            const struct in6_addr *prefix, int prefixlen,
102                                            const struct in6_addr *gwaddr, int ifindex);
103 #endif
104
105 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
106 {
107         struct rt6_info *rt = (struct rt6_info *) dst;
108         struct inet_peer *peer;
109         u32 *p = NULL;
110
111         if (!(rt->dst.flags & DST_HOST))
112                 return NULL;
113
114         peer = rt6_get_peer_create(rt);
115         if (peer) {
116                 u32 *old_p = __DST_METRICS_PTR(old);
117                 unsigned long prev, new;
118
119                 p = peer->metrics;
120                 if (inet_metrics_new(peer))
121                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123                 new = (unsigned long) p;
124                 prev = cmpxchg(&dst->_metrics, old, new);
125
126                 if (prev != old) {
127                         p = __DST_METRICS_PTR(prev);
128                         if (prev & DST_METRICS_READ_ONLY)
129                                 p = NULL;
130                 }
131         }
132         return p;
133 }
134
135 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
136                                              struct sk_buff *skb,
137                                              const void *daddr)
138 {
139         struct in6_addr *p = &rt->rt6i_gateway;
140
141         if (!ipv6_addr_any(p))
142                 return (const void *) p;
143         else if (skb)
144                 return &ipv6_hdr(skb)->daddr;
145         return daddr;
146 }
147
148 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
149                                           struct sk_buff *skb,
150                                           const void *daddr)
151 {
152         struct rt6_info *rt = (struct rt6_info *) dst;
153         struct neighbour *n;
154
155         daddr = choose_neigh_daddr(rt, skb, daddr);
156         n = __ipv6_neigh_lookup(dst->dev, daddr);
157         if (n)
158                 return n;
159         return neigh_create(&nd_tbl, daddr, dst->dev);
160 }
161
162 static struct dst_ops ip6_dst_ops_template = {
163         .family                 =       AF_INET6,
164         .protocol               =       cpu_to_be16(ETH_P_IPV6),
165         .gc                     =       ip6_dst_gc,
166         .gc_thresh              =       1024,
167         .check                  =       ip6_dst_check,
168         .default_advmss         =       ip6_default_advmss,
169         .mtu                    =       ip6_mtu,
170         .cow_metrics            =       ipv6_cow_metrics,
171         .destroy                =       ip6_dst_destroy,
172         .ifdown                 =       ip6_dst_ifdown,
173         .negative_advice        =       ip6_negative_advice,
174         .link_failure           =       ip6_link_failure,
175         .update_pmtu            =       ip6_rt_update_pmtu,
176         .redirect               =       rt6_do_redirect,
177         .local_out              =       __ip6_local_out,
178         .neigh_lookup           =       ip6_neigh_lookup,
179 };
180
181 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
182 {
183         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185         return mtu ? : dst->dev->mtu;
186 }
187
188 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
189                                          struct sk_buff *skb, u32 mtu)
190 {
191 }
192
193 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
194                                       struct sk_buff *skb)
195 {
196 }
197
198 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
199                                          unsigned long old)
200 {
201         return NULL;
202 }
203
204 static struct dst_ops ip6_dst_blackhole_ops = {
205         .family                 =       AF_INET6,
206         .protocol               =       cpu_to_be16(ETH_P_IPV6),
207         .destroy                =       ip6_dst_destroy,
208         .check                  =       ip6_dst_check,
209         .mtu                    =       ip6_blackhole_mtu,
210         .default_advmss         =       ip6_default_advmss,
211         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
212         .redirect               =       ip6_rt_blackhole_redirect,
213         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
214         .neigh_lookup           =       ip6_neigh_lookup,
215 };
216
217 static const u32 ip6_template_metrics[RTAX_MAX] = {
218         [RTAX_HOPLIMIT - 1] = 0,
219 };
220
221 static const struct rt6_info ip6_null_entry_template = {
222         .dst = {
223                 .__refcnt       = ATOMIC_INIT(1),
224                 .__use          = 1,
225                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
226                 .error          = -ENETUNREACH,
227                 .input          = ip6_pkt_discard,
228                 .output         = ip6_pkt_discard_out,
229         },
230         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
231         .rt6i_protocol  = RTPROT_KERNEL,
232         .rt6i_metric    = ~(u32) 0,
233         .rt6i_ref       = ATOMIC_INIT(1),
234 };
235
236 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
237
238 static const struct rt6_info ip6_prohibit_entry_template = {
239         .dst = {
240                 .__refcnt       = ATOMIC_INIT(1),
241                 .__use          = 1,
242                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
243                 .error          = -EACCES,
244                 .input          = ip6_pkt_prohibit,
245                 .output         = ip6_pkt_prohibit_out,
246         },
247         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
248         .rt6i_protocol  = RTPROT_KERNEL,
249         .rt6i_metric    = ~(u32) 0,
250         .rt6i_ref       = ATOMIC_INIT(1),
251 };
252
253 static const struct rt6_info ip6_blk_hole_entry_template = {
254         .dst = {
255                 .__refcnt       = ATOMIC_INIT(1),
256                 .__use          = 1,
257                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
258                 .error          = -EINVAL,
259                 .input          = dst_discard,
260                 .output         = dst_discard,
261         },
262         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
263         .rt6i_protocol  = RTPROT_KERNEL,
264         .rt6i_metric    = ~(u32) 0,
265         .rt6i_ref       = ATOMIC_INIT(1),
266 };
267
268 #endif
269
270 /* allocate dst with ip6_dst_ops */
271 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
272                                              struct net_device *dev,
273                                              int flags,
274                                              struct fib6_table *table)
275 {
276         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
277                                         0, DST_OBSOLETE_FORCE_CHK, flags);
278
279         if (rt) {
280                 struct dst_entry *dst = &rt->dst;
281
282                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
283                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
284                 rt->rt6i_genid = rt_genid(net);
285                 INIT_LIST_HEAD(&rt->rt6i_siblings);
286                 rt->rt6i_nsiblings = 0;
287         }
288         return rt;
289 }
290
291 static void ip6_dst_destroy(struct dst_entry *dst)
292 {
293         struct rt6_info *rt = (struct rt6_info *)dst;
294         struct inet6_dev *idev = rt->rt6i_idev;
295         struct dst_entry *from = dst->from;
296
297         if (!(rt->dst.flags & DST_HOST))
298                 dst_destroy_metrics_generic(dst);
299
300         if (idev) {
301                 rt->rt6i_idev = NULL;
302                 in6_dev_put(idev);
303         }
304
305         dst->from = NULL;
306         dst_release(from);
307
308         if (rt6_has_peer(rt)) {
309                 struct inet_peer *peer = rt6_peer_ptr(rt);
310                 inet_putpeer(peer);
311         }
312 }
313
314 void rt6_bind_peer(struct rt6_info *rt, int create)
315 {
316         struct inet_peer_base *base;
317         struct inet_peer *peer;
318
319         base = inetpeer_base_ptr(rt->_rt6i_peer);
320         if (!base)
321                 return;
322
323         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
324         if (peer) {
325                 if (!rt6_set_peer(rt, peer))
326                         inet_putpeer(peer);
327         }
328 }
329
330 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
331                            int how)
332 {
333         struct rt6_info *rt = (struct rt6_info *)dst;
334         struct inet6_dev *idev = rt->rt6i_idev;
335         struct net_device *loopback_dev =
336                 dev_net(dev)->loopback_dev;
337
338         if (dev != loopback_dev) {
339                 if (idev && idev->dev == dev) {
340                         struct inet6_dev *loopback_idev =
341                                 in6_dev_get(loopback_dev);
342                         if (loopback_idev) {
343                                 rt->rt6i_idev = loopback_idev;
344                                 in6_dev_put(idev);
345                         }
346                 }
347         }
348 }
349
350 static bool rt6_check_expired(const struct rt6_info *rt)
351 {
352         if (rt->rt6i_flags & RTF_EXPIRES) {
353                 if (time_after(jiffies, rt->dst.expires))
354                         return true;
355         } else if (rt->dst.from) {
356                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
357         }
358         return false;
359 }
360
361 static bool rt6_need_strict(const struct in6_addr *daddr)
362 {
363         return ipv6_addr_type(daddr) &
364                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
365 }
366
367 /* Multipath route selection:
368  *   Hash based function using packet header and flowlabel.
369  * Adapted from fib_info_hashfn()
370  */
371 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
372                                const struct flowi6 *fl6)
373 {
374         unsigned int val = fl6->flowi6_proto;
375
376         val ^= ipv6_addr_hash(&fl6->daddr);
377         val ^= ipv6_addr_hash(&fl6->saddr);
378
379         /* Work only if this not encapsulated */
380         switch (fl6->flowi6_proto) {
381         case IPPROTO_UDP:
382         case IPPROTO_TCP:
383         case IPPROTO_SCTP:
384                 val ^= (__force u16)fl6->fl6_sport;
385                 val ^= (__force u16)fl6->fl6_dport;
386                 break;
387
388         case IPPROTO_ICMPV6:
389                 val ^= (__force u16)fl6->fl6_icmp_type;
390                 val ^= (__force u16)fl6->fl6_icmp_code;
391                 break;
392         }
393         /* RFC6438 recommands to use flowlabel */
394         val ^= (__force u32)fl6->flowlabel;
395
396         /* Perhaps, we need to tune, this function? */
397         val = val ^ (val >> 7) ^ (val >> 12);
398         return val % candidate_count;
399 }
400
401 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
402                                              struct flowi6 *fl6)
403 {
404         struct rt6_info *sibling, *next_sibling;
405         int route_choosen;
406
407         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
408         /* Don't change the route, if route_choosen == 0
409          * (siblings does not include ourself)
410          */
411         if (route_choosen)
412                 list_for_each_entry_safe(sibling, next_sibling,
413                                 &match->rt6i_siblings, rt6i_siblings) {
414                         route_choosen--;
415                         if (route_choosen == 0) {
416                                 match = sibling;
417                                 break;
418                         }
419                 }
420         return match;
421 }
422
423 /*
424  *      Route lookup. Any table->tb6_lock is implied.
425  */
426
427 static inline struct rt6_info *rt6_device_match(struct net *net,
428                                                     struct rt6_info *rt,
429                                                     const struct in6_addr *saddr,
430                                                     int oif,
431                                                     int flags)
432 {
433         struct rt6_info *local = NULL;
434         struct rt6_info *sprt;
435
436         if (!oif && ipv6_addr_any(saddr))
437                 goto out;
438
439         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
440                 struct net_device *dev = sprt->dst.dev;
441
442                 if (oif) {
443                         if (dev->ifindex == oif)
444                                 return sprt;
445                         if (dev->flags & IFF_LOOPBACK) {
446                                 if (!sprt->rt6i_idev ||
447                                     sprt->rt6i_idev->dev->ifindex != oif) {
448                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
449                                                 continue;
450                                         if (local && (!oif ||
451                                                       local->rt6i_idev->dev->ifindex == oif))
452                                                 continue;
453                                 }
454                                 local = sprt;
455                         }
456                 } else {
457                         if (ipv6_chk_addr(net, saddr, dev,
458                                           flags & RT6_LOOKUP_F_IFACE))
459                                 return sprt;
460                 }
461         }
462
463         if (oif) {
464                 if (local)
465                         return local;
466
467                 if (flags & RT6_LOOKUP_F_IFACE)
468                         return net->ipv6.ip6_null_entry;
469         }
470 out:
471         return rt;
472 }
473
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475 struct __rt6_probe_work {
476         struct work_struct work;
477         struct in6_addr target;
478         struct net_device *dev;
479 };
480
481 static void rt6_probe_deferred(struct work_struct *w)
482 {
483         struct in6_addr mcaddr;
484         struct __rt6_probe_work *work =
485                 container_of(w, struct __rt6_probe_work, work);
486
487         addrconf_addr_solict_mult(&work->target, &mcaddr);
488         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
489         dev_put(work->dev);
490         kfree(w);
491 }
492
493 static void rt6_probe(struct rt6_info *rt)
494 {
495         struct neighbour *neigh;
496         /*
497          * Okay, this does not seem to be appropriate
498          * for now, however, we need to check if it
499          * is really so; aka Router Reachability Probing.
500          *
501          * Router Reachability Probe MUST be rate-limited
502          * to no more than one per minute.
503          */
504         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
505                 return;
506         rcu_read_lock_bh();
507         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
508         if (neigh) {
509                 write_lock(&neigh->lock);
510                 if (neigh->nud_state & NUD_VALID)
511                         goto out;
512         }
513
514         if (!neigh ||
515             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
516                 struct __rt6_probe_work *work;
517
518                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
519
520                 if (neigh && work)
521                         neigh->updated = jiffies;
522
523                 if (neigh)
524                         write_unlock(&neigh->lock);
525
526                 if (work) {
527                         INIT_WORK(&work->work, rt6_probe_deferred);
528                         work->target = rt->rt6i_gateway;
529                         dev_hold(rt->dst.dev);
530                         work->dev = rt->dst.dev;
531                         schedule_work(&work->work);
532                 }
533         } else {
534 out:
535                 write_unlock(&neigh->lock);
536         }
537         rcu_read_unlock_bh();
538 }
539 #else
540 static inline void rt6_probe(struct rt6_info *rt)
541 {
542 }
543 #endif
544
545 /*
546  * Default Router Selection (RFC 2461 6.3.6)
547  */
548 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
549 {
550         struct net_device *dev = rt->dst.dev;
551         if (!oif || dev->ifindex == oif)
552                 return 2;
553         if ((dev->flags & IFF_LOOPBACK) &&
554             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
555                 return 1;
556         return 0;
557 }
558
559 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
560 {
561         struct neighbour *neigh;
562         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
563
564         if (rt->rt6i_flags & RTF_NONEXTHOP ||
565             !(rt->rt6i_flags & RTF_GATEWAY))
566                 return RT6_NUD_SUCCEED;
567
568         rcu_read_lock_bh();
569         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570         if (neigh) {
571                 read_lock(&neigh->lock);
572                 if (neigh->nud_state & NUD_VALID)
573                         ret = RT6_NUD_SUCCEED;
574 #ifdef CONFIG_IPV6_ROUTER_PREF
575                 else if (!(neigh->nud_state & NUD_FAILED))
576                         ret = RT6_NUD_SUCCEED;
577 #endif
578                 read_unlock(&neigh->lock);
579         } else {
580                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
581                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
582         }
583         rcu_read_unlock_bh();
584
585         return ret;
586 }
587
588 static int rt6_score_route(struct rt6_info *rt, int oif,
589                            int strict)
590 {
591         int m;
592
593         m = rt6_check_dev(rt, oif);
594         if (!m && (strict & RT6_LOOKUP_F_IFACE))
595                 return RT6_NUD_FAIL_HARD;
596 #ifdef CONFIG_IPV6_ROUTER_PREF
597         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
598 #endif
599         if (strict & RT6_LOOKUP_F_REACHABLE) {
600                 int n = rt6_check_neigh(rt);
601                 if (n < 0)
602                         return n;
603         }
604         return m;
605 }
606
607 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
608                                    int *mpri, struct rt6_info *match,
609                                    bool *do_rr)
610 {
611         int m;
612         bool match_do_rr = false;
613
614         if (rt6_check_expired(rt))
615                 goto out;
616
617         m = rt6_score_route(rt, oif, strict);
618         if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
619                 match_do_rr = true;
620                 m = 0; /* lowest valid score */
621         } else if (m < 0) {
622                 goto out;
623         }
624
625         if (strict & RT6_LOOKUP_F_REACHABLE)
626                 rt6_probe(rt);
627
628         if (m > *mpri) {
629                 *do_rr = match_do_rr;
630                 *mpri = m;
631                 match = rt;
632         }
633 out:
634         return match;
635 }
636
637 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
638                                      struct rt6_info *rr_head,
639                                      u32 metric, int oif, int strict,
640                                      bool *do_rr)
641 {
642         struct rt6_info *rt, *match;
643         int mpri = -1;
644
645         match = NULL;
646         for (rt = rr_head; rt && rt->rt6i_metric == metric;
647              rt = rt->dst.rt6_next)
648                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
649         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
650              rt = rt->dst.rt6_next)
651                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
652
653         return match;
654 }
655
656 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
657 {
658         struct rt6_info *match, *rt0;
659         struct net *net;
660         bool do_rr = false;
661
662         rt0 = fn->rr_ptr;
663         if (!rt0)
664                 fn->rr_ptr = rt0 = fn->leaf;
665
666         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
667                              &do_rr);
668
669         if (do_rr) {
670                 struct rt6_info *next = rt0->dst.rt6_next;
671
672                 /* no entries matched; do round-robin */
673                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
674                         next = fn->leaf;
675
676                 if (next != rt0)
677                         fn->rr_ptr = next;
678         }
679
680         net = dev_net(rt0->dst.dev);
681         return match ? match : net->ipv6.ip6_null_entry;
682 }
683
684 #ifdef CONFIG_IPV6_ROUTE_INFO
685 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
686                   const struct in6_addr *gwaddr)
687 {
688         struct net *net = dev_net(dev);
689         struct route_info *rinfo = (struct route_info *) opt;
690         struct in6_addr prefix_buf, *prefix;
691         unsigned int pref;
692         unsigned long lifetime;
693         struct rt6_info *rt;
694
695         if (len < sizeof(struct route_info)) {
696                 return -EINVAL;
697         }
698
699         /* Sanity check for prefix_len and length */
700         if (rinfo->length > 3) {
701                 return -EINVAL;
702         } else if (rinfo->prefix_len > 128) {
703                 return -EINVAL;
704         } else if (rinfo->prefix_len > 64) {
705                 if (rinfo->length < 2) {
706                         return -EINVAL;
707                 }
708         } else if (rinfo->prefix_len > 0) {
709                 if (rinfo->length < 1) {
710                         return -EINVAL;
711                 }
712         }
713
714         pref = rinfo->route_pref;
715         if (pref == ICMPV6_ROUTER_PREF_INVALID)
716                 return -EINVAL;
717
718         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
719
720         if (rinfo->length == 3)
721                 prefix = (struct in6_addr *)rinfo->prefix;
722         else {
723                 /* this function is safe */
724                 ipv6_addr_prefix(&prefix_buf,
725                                  (struct in6_addr *)rinfo->prefix,
726                                  rinfo->prefix_len);
727                 prefix = &prefix_buf;
728         }
729
730         if (rinfo->prefix_len == 0)
731                 rt = rt6_get_dflt_router(gwaddr, dev);
732         else
733                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
734                                         gwaddr, dev->ifindex);
735
736         if (rt && !lifetime) {
737                 ip6_del_rt(rt);
738                 rt = NULL;
739         }
740
741         if (!rt && lifetime)
742                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
743                                         pref);
744         else if (rt)
745                 rt->rt6i_flags = RTF_ROUTEINFO |
746                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
747
748         if (rt) {
749                 if (!addrconf_finite_timeout(lifetime))
750                         rt6_clean_expires(rt);
751                 else
752                         rt6_set_expires(rt, jiffies + HZ * lifetime);
753
754                 ip6_rt_put(rt);
755         }
756         return 0;
757 }
758 #endif
759
760 #define BACKTRACK(__net, saddr)                 \
761 do { \
762         if (rt == __net->ipv6.ip6_null_entry) { \
763                 struct fib6_node *pn; \
764                 while (1) { \
765                         if (fn->fn_flags & RTN_TL_ROOT) \
766                                 goto out; \
767                         pn = fn->parent; \
768                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
769                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
770                         else \
771                                 fn = pn; \
772                         if (fn->fn_flags & RTN_RTINFO) \
773                                 goto restart; \
774                 } \
775         } \
776 } while (0)
777
778 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
779                                              struct fib6_table *table,
780                                              struct flowi6 *fl6, int flags)
781 {
782         struct fib6_node *fn;
783         struct rt6_info *rt;
784
785         read_lock_bh(&table->tb6_lock);
786         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
787 restart:
788         rt = fn->leaf;
789         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
790         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
791                 rt = rt6_multipath_select(rt, fl6);
792         BACKTRACK(net, &fl6->saddr);
793 out:
794         dst_use(&rt->dst, jiffies);
795         read_unlock_bh(&table->tb6_lock);
796         return rt;
797
798 }
799
800 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
801                                     int flags)
802 {
803         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
804 }
805 EXPORT_SYMBOL_GPL(ip6_route_lookup);
806
807 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
808                             const struct in6_addr *saddr, int oif, int strict)
809 {
810         struct flowi6 fl6 = {
811                 .flowi6_oif = oif,
812                 .daddr = *daddr,
813         };
814         struct dst_entry *dst;
815         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
816
817         if (saddr) {
818                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
819                 flags |= RT6_LOOKUP_F_HAS_SADDR;
820         }
821
822         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
823         if (dst->error == 0)
824                 return (struct rt6_info *) dst;
825
826         dst_release(dst);
827
828         return NULL;
829 }
830
831 EXPORT_SYMBOL(rt6_lookup);
832
833 /* ip6_ins_rt is called with FREE table->tb6_lock.
834    It takes new route entry, the addition fails by any reason the
835    route is freed. In any case, if caller does not hold it, it may
836    be destroyed.
837  */
838
839 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
840 {
841         int err;
842         struct fib6_table *table;
843
844         table = rt->rt6i_table;
845         write_lock_bh(&table->tb6_lock);
846         err = fib6_add(&table->tb6_root, rt, info);
847         write_unlock_bh(&table->tb6_lock);
848
849         return err;
850 }
851
852 int ip6_ins_rt(struct rt6_info *rt)
853 {
854         struct nl_info info = {
855                 .nl_net = dev_net(rt->dst.dev),
856         };
857         return __ip6_ins_rt(rt, &info);
858 }
859
860 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
861                                       const struct in6_addr *daddr,
862                                       const struct in6_addr *saddr)
863 {
864         struct rt6_info *rt;
865
866         /*
867          *      Clone the route.
868          */
869
870         rt = ip6_rt_copy(ort, daddr);
871
872         if (rt) {
873                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
874                         if (ort->rt6i_dst.plen != 128 &&
875                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
876                                 rt->rt6i_flags |= RTF_ANYCAST;
877                 }
878
879                 rt->rt6i_flags |= RTF_CACHE;
880
881 #ifdef CONFIG_IPV6_SUBTREES
882                 if (rt->rt6i_src.plen && saddr) {
883                         rt->rt6i_src.addr = *saddr;
884                         rt->rt6i_src.plen = 128;
885                 }
886 #endif
887         }
888
889         return rt;
890 }
891
892 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
893                                         const struct in6_addr *daddr)
894 {
895         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
896
897         if (rt)
898                 rt->rt6i_flags |= RTF_CACHE;
899         return rt;
900 }
901
902 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
903                                       struct flowi6 *fl6, int flags)
904 {
905         struct fib6_node *fn;
906         struct rt6_info *rt, *nrt;
907         int strict = 0;
908         int attempts = 3;
909         int err;
910         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
911
912         strict |= flags & RT6_LOOKUP_F_IFACE;
913
914 relookup:
915         read_lock_bh(&table->tb6_lock);
916
917 restart_2:
918         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
919
920 restart:
921         rt = rt6_select(fn, oif, strict | reachable);
922         if (rt->rt6i_nsiblings && oif == 0)
923                 rt = rt6_multipath_select(rt, fl6);
924         BACKTRACK(net, &fl6->saddr);
925         if (rt == net->ipv6.ip6_null_entry ||
926             rt->rt6i_flags & RTF_CACHE)
927                 goto out;
928
929         dst_hold(&rt->dst);
930         read_unlock_bh(&table->tb6_lock);
931
932         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
933                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
934         else if (!(rt->dst.flags & DST_HOST))
935                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
936         else
937                 goto out2;
938
939         ip6_rt_put(rt);
940         rt = nrt ? : net->ipv6.ip6_null_entry;
941
942         dst_hold(&rt->dst);
943         if (nrt) {
944                 err = ip6_ins_rt(nrt);
945                 if (!err)
946                         goto out2;
947         }
948
949         if (--attempts <= 0)
950                 goto out2;
951
952         /*
953          * Race condition! In the gap, when table->tb6_lock was
954          * released someone could insert this route.  Relookup.
955          */
956         ip6_rt_put(rt);
957         goto relookup;
958
959 out:
960         if (reachable) {
961                 reachable = 0;
962                 goto restart_2;
963         }
964         dst_hold(&rt->dst);
965         read_unlock_bh(&table->tb6_lock);
966 out2:
967         rt->dst.lastuse = jiffies;
968         rt->dst.__use++;
969
970         return rt;
971 }
972
973 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
974                                             struct flowi6 *fl6, int flags)
975 {
976         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
977 }
978
979 static struct dst_entry *ip6_route_input_lookup(struct net *net,
980                                                 struct net_device *dev,
981                                                 struct flowi6 *fl6, int flags)
982 {
983         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
984                 flags |= RT6_LOOKUP_F_IFACE;
985
986         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
987 }
988
989 void ip6_route_input(struct sk_buff *skb)
990 {
991         const struct ipv6hdr *iph = ipv6_hdr(skb);
992         struct net *net = dev_net(skb->dev);
993         int flags = RT6_LOOKUP_F_HAS_SADDR;
994         struct flowi6 fl6 = {
995                 .flowi6_iif = skb->dev->ifindex,
996                 .daddr = iph->daddr,
997                 .saddr = iph->saddr,
998                 .flowlabel = ip6_flowinfo(iph),
999                 .flowi6_mark = skb->mark,
1000                 .flowi6_proto = iph->nexthdr,
1001         };
1002
1003         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1004 }
1005
1006 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1007                                              struct flowi6 *fl6, int flags)
1008 {
1009         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1010 }
1011
1012 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1013                                     struct flowi6 *fl6)
1014 {
1015         int flags = 0;
1016
1017         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1018
1019         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1020                 flags |= RT6_LOOKUP_F_IFACE;
1021
1022         if (!ipv6_addr_any(&fl6->saddr))
1023                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1024         else if (sk)
1025                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1026
1027         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1028 }
1029
1030 EXPORT_SYMBOL(ip6_route_output);
1031
1032 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1033 {
1034         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1035         struct dst_entry *new = NULL;
1036
1037         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1038         if (rt) {
1039                 new = &rt->dst;
1040
1041                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1042                 rt6_init_peer(rt, net->ipv6.peers);
1043
1044                 new->__use = 1;
1045                 new->input = dst_discard;
1046                 new->output = dst_discard;
1047
1048                 if (dst_metrics_read_only(&ort->dst))
1049                         new->_metrics = ort->dst._metrics;
1050                 else
1051                         dst_copy_metrics(new, &ort->dst);
1052                 rt->rt6i_idev = ort->rt6i_idev;
1053                 if (rt->rt6i_idev)
1054                         in6_dev_hold(rt->rt6i_idev);
1055
1056                 rt->rt6i_gateway = ort->rt6i_gateway;
1057                 rt->rt6i_flags = ort->rt6i_flags;
1058                 rt->rt6i_metric = 0;
1059
1060                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1061 #ifdef CONFIG_IPV6_SUBTREES
1062                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1063 #endif
1064
1065                 dst_free(new);
1066         }
1067
1068         dst_release(dst_orig);
1069         return new ? new : ERR_PTR(-ENOMEM);
1070 }
1071
1072 /*
1073  *      Destination cache support functions
1074  */
1075
1076 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1077 {
1078         struct rt6_info *rt;
1079
1080         rt = (struct rt6_info *) dst;
1081
1082         /* All IPV6 dsts are created with ->obsolete set to the value
1083          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1084          * into this function always.
1085          */
1086         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1087                 return NULL;
1088
1089         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1090                 return NULL;
1091
1092         if (rt6_check_expired(rt))
1093                 return NULL;
1094
1095         return dst;
1096 }
1097
1098 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1099 {
1100         struct rt6_info *rt = (struct rt6_info *) dst;
1101
1102         if (rt) {
1103                 if (rt->rt6i_flags & RTF_CACHE) {
1104                         if (rt6_check_expired(rt)) {
1105                                 ip6_del_rt(rt);
1106                                 dst = NULL;
1107                         }
1108                 } else {
1109                         dst_release(dst);
1110                         dst = NULL;
1111                 }
1112         }
1113         return dst;
1114 }
1115
1116 static void ip6_link_failure(struct sk_buff *skb)
1117 {
1118         struct rt6_info *rt;
1119
1120         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1121
1122         rt = (struct rt6_info *) skb_dst(skb);
1123         if (rt) {
1124                 if (rt->rt6i_flags & RTF_CACHE) {
1125                         dst_hold(&rt->dst);
1126                         if (ip6_del_rt(rt))
1127                                 dst_free(&rt->dst);
1128                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1129                         rt->rt6i_node->fn_sernum = -1;
1130                 }
1131         }
1132 }
1133
1134 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1135                                struct sk_buff *skb, u32 mtu)
1136 {
1137         struct rt6_info *rt6 = (struct rt6_info*)dst;
1138
1139         dst_confirm(dst);
1140         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1141                 struct net *net = dev_net(dst->dev);
1142
1143                 rt6->rt6i_flags |= RTF_MODIFIED;
1144                 if (mtu < IPV6_MIN_MTU) {
1145                         u32 features = dst_metric(dst, RTAX_FEATURES);
1146                         mtu = IPV6_MIN_MTU;
1147                         features |= RTAX_FEATURE_ALLFRAG;
1148                         dst_metric_set(dst, RTAX_FEATURES, features);
1149                 }
1150                 dst_metric_set(dst, RTAX_MTU, mtu);
1151                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1152         }
1153 }
1154
1155 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1156                      int oif, u32 mark)
1157 {
1158         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1159         struct dst_entry *dst;
1160         struct flowi6 fl6;
1161
1162         memset(&fl6, 0, sizeof(fl6));
1163         fl6.flowi6_oif = oif;
1164         fl6.flowi6_mark = mark;
1165         fl6.flowi6_flags = 0;
1166         fl6.daddr = iph->daddr;
1167         fl6.saddr = iph->saddr;
1168         fl6.flowlabel = ip6_flowinfo(iph);
1169
1170         dst = ip6_route_output(net, NULL, &fl6);
1171         if (!dst->error)
1172                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1173         dst_release(dst);
1174 }
1175 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1176
1177 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1178 {
1179         ip6_update_pmtu(skb, sock_net(sk), mtu,
1180                         sk->sk_bound_dev_if, sk->sk_mark);
1181 }
1182 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1183
1184 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1185 {
1186         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1187         struct dst_entry *dst;
1188         struct flowi6 fl6;
1189
1190         memset(&fl6, 0, sizeof(fl6));
1191         fl6.flowi6_oif = oif;
1192         fl6.flowi6_mark = mark;
1193         fl6.flowi6_flags = 0;
1194         fl6.daddr = iph->daddr;
1195         fl6.saddr = iph->saddr;
1196         fl6.flowlabel = ip6_flowinfo(iph);
1197
1198         dst = ip6_route_output(net, NULL, &fl6);
1199         if (!dst->error)
1200                 rt6_do_redirect(dst, NULL, skb);
1201         dst_release(dst);
1202 }
1203 EXPORT_SYMBOL_GPL(ip6_redirect);
1204
1205 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1206 {
1207         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1208 }
1209 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1210
1211 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1212 {
1213         struct net_device *dev = dst->dev;
1214         unsigned int mtu = dst_mtu(dst);
1215         struct net *net = dev_net(dev);
1216
1217         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1218
1219         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1220                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1221
1222         /*
1223          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1224          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1225          * IPV6_MAXPLEN is also valid and means: "any MSS,
1226          * rely only on pmtu discovery"
1227          */
1228         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1229                 mtu = IPV6_MAXPLEN;
1230         return mtu;
1231 }
1232
1233 static unsigned int ip6_mtu(const struct dst_entry *dst)
1234 {
1235         struct inet6_dev *idev;
1236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1237
1238         if (mtu)
1239                 return mtu;
1240
1241         mtu = IPV6_MIN_MTU;
1242
1243         rcu_read_lock();
1244         idev = __in6_dev_get(dst->dev);
1245         if (idev)
1246                 mtu = idev->cnf.mtu6;
1247         rcu_read_unlock();
1248
1249         return mtu;
1250 }
1251
1252 static struct dst_entry *icmp6_dst_gc_list;
1253 static DEFINE_SPINLOCK(icmp6_dst_lock);
1254
1255 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1256                                   struct flowi6 *fl6)
1257 {
1258         struct dst_entry *dst;
1259         struct rt6_info *rt;
1260         struct inet6_dev *idev = in6_dev_get(dev);
1261         struct net *net = dev_net(dev);
1262
1263         if (unlikely(!idev))
1264                 return ERR_PTR(-ENODEV);
1265
1266         rt = ip6_dst_alloc(net, dev, 0, NULL);
1267         if (unlikely(!rt)) {
1268                 in6_dev_put(idev);
1269                 dst = ERR_PTR(-ENOMEM);
1270                 goto out;
1271         }
1272
1273         rt->dst.flags |= DST_HOST;
1274         rt->dst.output  = ip6_output;
1275         atomic_set(&rt->dst.__refcnt, 1);
1276         rt->rt6i_gateway  = fl6->daddr;
1277         rt->rt6i_dst.addr = fl6->daddr;
1278         rt->rt6i_dst.plen = 128;
1279         rt->rt6i_idev     = idev;
1280         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1281
1282         spin_lock_bh(&icmp6_dst_lock);
1283         rt->dst.next = icmp6_dst_gc_list;
1284         icmp6_dst_gc_list = &rt->dst;
1285         spin_unlock_bh(&icmp6_dst_lock);
1286
1287         fib6_force_start_gc(net);
1288
1289         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1290
1291 out:
1292         return dst;
1293 }
1294
1295 int icmp6_dst_gc(void)
1296 {
1297         struct dst_entry *dst, **pprev;
1298         int more = 0;
1299
1300         spin_lock_bh(&icmp6_dst_lock);
1301         pprev = &icmp6_dst_gc_list;
1302
1303         while ((dst = *pprev) != NULL) {
1304                 if (!atomic_read(&dst->__refcnt)) {
1305                         *pprev = dst->next;
1306                         dst_free(dst);
1307                 } else {
1308                         pprev = &dst->next;
1309                         ++more;
1310                 }
1311         }
1312
1313         spin_unlock_bh(&icmp6_dst_lock);
1314
1315         return more;
1316 }
1317
1318 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1319                             void *arg)
1320 {
1321         struct dst_entry *dst, **pprev;
1322
1323         spin_lock_bh(&icmp6_dst_lock);
1324         pprev = &icmp6_dst_gc_list;
1325         while ((dst = *pprev) != NULL) {
1326                 struct rt6_info *rt = (struct rt6_info *) dst;
1327                 if (func(rt, arg)) {
1328                         *pprev = dst->next;
1329                         dst_free(dst);
1330                 } else {
1331                         pprev = &dst->next;
1332                 }
1333         }
1334         spin_unlock_bh(&icmp6_dst_lock);
1335 }
1336
1337 static int ip6_dst_gc(struct dst_ops *ops)
1338 {
1339         unsigned long now = jiffies;
1340         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1341         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1342         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1343         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1344         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1345         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1346         int entries;
1347
1348         entries = dst_entries_get_fast(ops);
1349         if (time_after(rt_last_gc + rt_min_interval, now) &&
1350             entries <= rt_max_size)
1351                 goto out;
1352
1353         net->ipv6.ip6_rt_gc_expire++;
1354         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1355         net->ipv6.ip6_rt_last_gc = now;
1356         entries = dst_entries_get_slow(ops);
1357         if (entries < ops->gc_thresh)
1358                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1359 out:
1360         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1361         return entries > rt_max_size;
1362 }
1363
1364 int ip6_dst_hoplimit(struct dst_entry *dst)
1365 {
1366         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1367         if (hoplimit == 0) {
1368                 struct net_device *dev = dst->dev;
1369                 struct inet6_dev *idev;
1370
1371                 rcu_read_lock();
1372                 idev = __in6_dev_get(dev);
1373                 if (idev)
1374                         hoplimit = idev->cnf.hop_limit;
1375                 else
1376                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1377                 rcu_read_unlock();
1378         }
1379         return hoplimit;
1380 }
1381 EXPORT_SYMBOL(ip6_dst_hoplimit);
1382
1383 /*
1384  *
1385  */
1386
1387 int ip6_route_add(struct fib6_config *cfg)
1388 {
1389         int err;
1390         struct net *net = cfg->fc_nlinfo.nl_net;
1391         struct rt6_info *rt = NULL;
1392         struct net_device *dev = NULL;
1393         struct inet6_dev *idev = NULL;
1394         struct fib6_table *table;
1395         int addr_type;
1396
1397         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1398                 return -EINVAL;
1399 #ifndef CONFIG_IPV6_SUBTREES
1400         if (cfg->fc_src_len)
1401                 return -EINVAL;
1402 #endif
1403         if (cfg->fc_ifindex) {
1404                 err = -ENODEV;
1405                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1406                 if (!dev)
1407                         goto out;
1408                 idev = in6_dev_get(dev);
1409                 if (!idev)
1410                         goto out;
1411         }
1412
1413         if (cfg->fc_metric == 0)
1414                 cfg->fc_metric = IP6_RT_PRIO_USER;
1415
1416         err = -ENOBUFS;
1417         if (cfg->fc_nlinfo.nlh &&
1418             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1419                 table = fib6_get_table(net, cfg->fc_table);
1420                 if (!table) {
1421                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1422                         table = fib6_new_table(net, cfg->fc_table);
1423                 }
1424         } else {
1425                 table = fib6_new_table(net, cfg->fc_table);
1426         }
1427
1428         if (!table)
1429                 goto out;
1430
1431         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1432
1433         if (!rt) {
1434                 err = -ENOMEM;
1435                 goto out;
1436         }
1437
1438         if (cfg->fc_flags & RTF_EXPIRES)
1439                 rt6_set_expires(rt, jiffies +
1440                                 clock_t_to_jiffies(cfg->fc_expires));
1441         else
1442                 rt6_clean_expires(rt);
1443
1444         if (cfg->fc_protocol == RTPROT_UNSPEC)
1445                 cfg->fc_protocol = RTPROT_BOOT;
1446         rt->rt6i_protocol = cfg->fc_protocol;
1447
1448         addr_type = ipv6_addr_type(&cfg->fc_dst);
1449
1450         if (addr_type & IPV6_ADDR_MULTICAST)
1451                 rt->dst.input = ip6_mc_input;
1452         else if (cfg->fc_flags & RTF_LOCAL)
1453                 rt->dst.input = ip6_input;
1454         else
1455                 rt->dst.input = ip6_forward;
1456
1457         rt->dst.output = ip6_output;
1458
1459         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1460         rt->rt6i_dst.plen = cfg->fc_dst_len;
1461         if (rt->rt6i_dst.plen == 128)
1462                rt->dst.flags |= DST_HOST;
1463
1464         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1465                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1466                 if (!metrics) {
1467                         err = -ENOMEM;
1468                         goto out;
1469                 }
1470                 dst_init_metrics(&rt->dst, metrics, 0);
1471         }
1472 #ifdef CONFIG_IPV6_SUBTREES
1473         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1474         rt->rt6i_src.plen = cfg->fc_src_len;
1475 #endif
1476
1477         rt->rt6i_metric = cfg->fc_metric;
1478
1479         /* We cannot add true routes via loopback here,
1480            they would result in kernel looping; promote them to reject routes
1481          */
1482         if ((cfg->fc_flags & RTF_REJECT) ||
1483             (dev && (dev->flags & IFF_LOOPBACK) &&
1484              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1485              !(cfg->fc_flags & RTF_LOCAL))) {
1486                 /* hold loopback dev/idev if we haven't done so. */
1487                 if (dev != net->loopback_dev) {
1488                         if (dev) {
1489                                 dev_put(dev);
1490                                 in6_dev_put(idev);
1491                         }
1492                         dev = net->loopback_dev;
1493                         dev_hold(dev);
1494                         idev = in6_dev_get(dev);
1495                         if (!idev) {
1496                                 err = -ENODEV;
1497                                 goto out;
1498                         }
1499                 }
1500                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1501                 switch (cfg->fc_type) {
1502                 case RTN_BLACKHOLE:
1503                         rt->dst.error = -EINVAL;
1504                         rt->dst.output = dst_discard;
1505                         rt->dst.input = dst_discard;
1506                         break;
1507                 case RTN_PROHIBIT:
1508                         rt->dst.error = -EACCES;
1509                         rt->dst.output = ip6_pkt_prohibit_out;
1510                         rt->dst.input = ip6_pkt_prohibit;
1511                         break;
1512                 case RTN_THROW:
1513                 default:
1514                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1515                                         : -ENETUNREACH;
1516                         rt->dst.output = ip6_pkt_discard_out;
1517                         rt->dst.input = ip6_pkt_discard;
1518                         break;
1519                 }
1520                 goto install_route;
1521         }
1522
1523         if (cfg->fc_flags & RTF_GATEWAY) {
1524                 const struct in6_addr *gw_addr;
1525                 int gwa_type;
1526
1527                 gw_addr = &cfg->fc_gateway;
1528                 rt->rt6i_gateway = *gw_addr;
1529                 gwa_type = ipv6_addr_type(gw_addr);
1530
1531                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1532                         struct rt6_info *grt;
1533
1534                         /* IPv6 strictly inhibits using not link-local
1535                            addresses as nexthop address.
1536                            Otherwise, router will not able to send redirects.
1537                            It is very good, but in some (rare!) circumstances
1538                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1539                            some exceptions. --ANK
1540                          */
1541                         err = -EINVAL;
1542                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1543                                 goto out;
1544
1545                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1546
1547                         err = -EHOSTUNREACH;
1548                         if (!grt)
1549                                 goto out;
1550                         if (dev) {
1551                                 if (dev != grt->dst.dev) {
1552                                         ip6_rt_put(grt);
1553                                         goto out;
1554                                 }
1555                         } else {
1556                                 dev = grt->dst.dev;
1557                                 idev = grt->rt6i_idev;
1558                                 dev_hold(dev);
1559                                 in6_dev_hold(grt->rt6i_idev);
1560                         }
1561                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1562                                 err = 0;
1563                         ip6_rt_put(grt);
1564
1565                         if (err)
1566                                 goto out;
1567                 }
1568                 err = -EINVAL;
1569                 if (!dev || (dev->flags & IFF_LOOPBACK))
1570                         goto out;
1571         }
1572
1573         err = -ENODEV;
1574         if (!dev)
1575                 goto out;
1576
1577         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1578                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1579                         err = -EINVAL;
1580                         goto out;
1581                 }
1582                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1583                 rt->rt6i_prefsrc.plen = 128;
1584         } else
1585                 rt->rt6i_prefsrc.plen = 0;
1586
1587         rt->rt6i_flags = cfg->fc_flags;
1588
1589 install_route:
1590         if (cfg->fc_mx) {
1591                 struct nlattr *nla;
1592                 int remaining;
1593
1594                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1595                         int type = nla_type(nla);
1596
1597                         if (type) {
1598                                 if (type > RTAX_MAX) {
1599                                         err = -EINVAL;
1600                                         goto out;
1601                                 }
1602
1603                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1604                         }
1605                 }
1606         }
1607
1608         rt->dst.dev = dev;
1609         rt->rt6i_idev = idev;
1610         rt->rt6i_table = table;
1611
1612         cfg->fc_nlinfo.nl_net = dev_net(dev);
1613
1614         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1615
1616 out:
1617         if (dev)
1618                 dev_put(dev);
1619         if (idev)
1620                 in6_dev_put(idev);
1621         if (rt)
1622                 dst_free(&rt->dst);
1623         return err;
1624 }
1625
1626 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1627 {
1628         int err;
1629         struct fib6_table *table;
1630         struct net *net = dev_net(rt->dst.dev);
1631
1632         if (rt == net->ipv6.ip6_null_entry) {
1633                 err = -ENOENT;
1634                 goto out;
1635         }
1636
1637         table = rt->rt6i_table;
1638         write_lock_bh(&table->tb6_lock);
1639         err = fib6_del(rt, info);
1640         write_unlock_bh(&table->tb6_lock);
1641
1642 out:
1643         ip6_rt_put(rt);
1644         return err;
1645 }
1646
1647 int ip6_del_rt(struct rt6_info *rt)
1648 {
1649         struct nl_info info = {
1650                 .nl_net = dev_net(rt->dst.dev),
1651         };
1652         return __ip6_del_rt(rt, &info);
1653 }
1654
1655 static int ip6_route_del(struct fib6_config *cfg)
1656 {
1657         struct fib6_table *table;
1658         struct fib6_node *fn;
1659         struct rt6_info *rt;
1660         int err = -ESRCH;
1661
1662         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1663         if (!table)
1664                 return err;
1665
1666         read_lock_bh(&table->tb6_lock);
1667
1668         fn = fib6_locate(&table->tb6_root,
1669                          &cfg->fc_dst, cfg->fc_dst_len,
1670                          &cfg->fc_src, cfg->fc_src_len);
1671
1672         if (fn) {
1673                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1674                         if (cfg->fc_ifindex &&
1675                             (!rt->dst.dev ||
1676                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1677                                 continue;
1678                         if (cfg->fc_flags & RTF_GATEWAY &&
1679                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1680                                 continue;
1681                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1682                                 continue;
1683                         dst_hold(&rt->dst);
1684                         read_unlock_bh(&table->tb6_lock);
1685
1686                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1687                 }
1688         }
1689         read_unlock_bh(&table->tb6_lock);
1690
1691         return err;
1692 }
1693
1694 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1695 {
1696         struct net *net = dev_net(skb->dev);
1697         struct netevent_redirect netevent;
1698         struct rt6_info *rt, *nrt = NULL;
1699         struct ndisc_options ndopts;
1700         struct inet6_dev *in6_dev;
1701         struct neighbour *neigh;
1702         struct rd_msg *msg;
1703         int optlen, on_link;
1704         u8 *lladdr;
1705
1706         optlen = skb->tail - skb->transport_header;
1707         optlen -= sizeof(*msg);
1708
1709         if (optlen < 0) {
1710                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1711                 return;
1712         }
1713
1714         msg = (struct rd_msg *)icmp6_hdr(skb);
1715
1716         if (ipv6_addr_is_multicast(&msg->dest)) {
1717                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1718                 return;
1719         }
1720
1721         on_link = 0;
1722         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1723                 on_link = 1;
1724         } else if (ipv6_addr_type(&msg->target) !=
1725                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1726                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1727                 return;
1728         }
1729
1730         in6_dev = __in6_dev_get(skb->dev);
1731         if (!in6_dev)
1732                 return;
1733         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1734                 return;
1735
1736         /* RFC2461 8.1:
1737          *      The IP source address of the Redirect MUST be the same as the current
1738          *      first-hop router for the specified ICMP Destination Address.
1739          */
1740
1741         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1742                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1743                 return;
1744         }
1745
1746         lladdr = NULL;
1747         if (ndopts.nd_opts_tgt_lladdr) {
1748                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1749                                              skb->dev);
1750                 if (!lladdr) {
1751                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1752                         return;
1753                 }
1754         }
1755
1756         rt = (struct rt6_info *) dst;
1757         if (rt == net->ipv6.ip6_null_entry) {
1758                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1759                 return;
1760         }
1761
1762         /* Redirect received -> path was valid.
1763          * Look, redirects are sent only in response to data packets,
1764          * so that this nexthop apparently is reachable. --ANK
1765          */
1766         dst_confirm(&rt->dst);
1767
1768         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1769         if (!neigh)
1770                 return;
1771
1772         /*
1773          *      We have finally decided to accept it.
1774          */
1775
1776         neigh_update(neigh, lladdr, NUD_STALE,
1777                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1778                      NEIGH_UPDATE_F_OVERRIDE|
1779                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1780                                      NEIGH_UPDATE_F_ISROUTER))
1781                      );
1782
1783         nrt = ip6_rt_copy(rt, &msg->dest);
1784         if (!nrt)
1785                 goto out;
1786
1787         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1788         if (on_link)
1789                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1790
1791         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1792
1793         if (ip6_ins_rt(nrt))
1794                 goto out;
1795
1796         netevent.old = &rt->dst;
1797         netevent.new = &nrt->dst;
1798         netevent.daddr = &msg->dest;
1799         netevent.neigh = neigh;
1800         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1801
1802         if (rt->rt6i_flags & RTF_CACHE) {
1803                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1804                 ip6_del_rt(rt);
1805         }
1806
1807 out:
1808         neigh_release(neigh);
1809 }
1810
1811 /*
1812  *      Misc support functions
1813  */
1814
1815 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1816                                     const struct in6_addr *dest)
1817 {
1818         struct net *net = dev_net(ort->dst.dev);
1819         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1820                                             ort->rt6i_table);
1821
1822         if (rt) {
1823                 rt->dst.input = ort->dst.input;
1824                 rt->dst.output = ort->dst.output;
1825                 rt->dst.flags |= DST_HOST;
1826
1827                 rt->rt6i_dst.addr = *dest;
1828                 rt->rt6i_dst.plen = 128;
1829                 dst_copy_metrics(&rt->dst, &ort->dst);
1830                 rt->dst.error = ort->dst.error;
1831                 rt->rt6i_idev = ort->rt6i_idev;
1832                 if (rt->rt6i_idev)
1833                         in6_dev_hold(rt->rt6i_idev);
1834                 rt->dst.lastuse = jiffies;
1835
1836                 if (ort->rt6i_flags & RTF_GATEWAY)
1837                         rt->rt6i_gateway = ort->rt6i_gateway;
1838                 else
1839                         rt->rt6i_gateway = *dest;
1840                 rt->rt6i_flags = ort->rt6i_flags;
1841                 rt6_set_from(rt, ort);
1842                 rt->rt6i_metric = 0;
1843
1844 #ifdef CONFIG_IPV6_SUBTREES
1845                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1846 #endif
1847                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1848                 rt->rt6i_table = ort->rt6i_table;
1849         }
1850         return rt;
1851 }
1852
1853 #ifdef CONFIG_IPV6_ROUTE_INFO
1854 static struct rt6_info *rt6_get_route_info(struct net *net,
1855                                            const struct in6_addr *prefix, int prefixlen,
1856                                            const struct in6_addr *gwaddr, int ifindex)
1857 {
1858         struct fib6_node *fn;
1859         struct rt6_info *rt = NULL;
1860         struct fib6_table *table;
1861
1862         table = fib6_get_table(net, RT6_TABLE_INFO);
1863         if (!table)
1864                 return NULL;
1865
1866         read_lock_bh(&table->tb6_lock);
1867         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1868         if (!fn)
1869                 goto out;
1870
1871         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1872                 if (rt->dst.dev->ifindex != ifindex)
1873                         continue;
1874                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1875                         continue;
1876                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1877                         continue;
1878                 dst_hold(&rt->dst);
1879                 break;
1880         }
1881 out:
1882         read_unlock_bh(&table->tb6_lock);
1883         return rt;
1884 }
1885
1886 static struct rt6_info *rt6_add_route_info(struct net *net,
1887                                            const struct in6_addr *prefix, int prefixlen,
1888                                            const struct in6_addr *gwaddr, int ifindex,
1889                                            unsigned int pref)
1890 {
1891         struct fib6_config cfg = {
1892                 .fc_table       = RT6_TABLE_INFO,
1893                 .fc_metric      = IP6_RT_PRIO_USER,
1894                 .fc_ifindex     = ifindex,
1895                 .fc_dst_len     = prefixlen,
1896                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1897                                   RTF_UP | RTF_PREF(pref),
1898                 .fc_nlinfo.portid = 0,
1899                 .fc_nlinfo.nlh = NULL,
1900                 .fc_nlinfo.nl_net = net,
1901         };
1902
1903         cfg.fc_dst = *prefix;
1904         cfg.fc_gateway = *gwaddr;
1905
1906         /* We should treat it as a default route if prefix length is 0. */
1907         if (!prefixlen)
1908                 cfg.fc_flags |= RTF_DEFAULT;
1909
1910         ip6_route_add(&cfg);
1911
1912         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1913 }
1914 #endif
1915
1916 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1917 {
1918         struct rt6_info *rt;
1919         struct fib6_table *table;
1920
1921         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1922         if (!table)
1923                 return NULL;
1924
1925         read_lock_bh(&table->tb6_lock);
1926         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1927                 if (dev == rt->dst.dev &&
1928                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1929                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1930                         break;
1931         }
1932         if (rt)
1933                 dst_hold(&rt->dst);
1934         read_unlock_bh(&table->tb6_lock);
1935         return rt;
1936 }
1937
1938 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1939                                      struct net_device *dev,
1940                                      unsigned int pref)
1941 {
1942         struct fib6_config cfg = {
1943                 .fc_table       = RT6_TABLE_DFLT,
1944                 .fc_metric      = IP6_RT_PRIO_USER,
1945                 .fc_ifindex     = dev->ifindex,
1946                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1947                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1948                 .fc_nlinfo.portid = 0,
1949                 .fc_nlinfo.nlh = NULL,
1950                 .fc_nlinfo.nl_net = dev_net(dev),
1951         };
1952
1953         cfg.fc_gateway = *gwaddr;
1954
1955         ip6_route_add(&cfg);
1956
1957         return rt6_get_dflt_router(gwaddr, dev);
1958 }
1959
1960 void rt6_purge_dflt_routers(struct net *net)
1961 {
1962         struct rt6_info *rt;
1963         struct fib6_table *table;
1964
1965         /* NOTE: Keep consistent with rt6_get_dflt_router */
1966         table = fib6_get_table(net, RT6_TABLE_DFLT);
1967         if (!table)
1968                 return;
1969
1970 restart:
1971         read_lock_bh(&table->tb6_lock);
1972         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1973                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1974                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1975                         dst_hold(&rt->dst);
1976                         read_unlock_bh(&table->tb6_lock);
1977                         ip6_del_rt(rt);
1978                         goto restart;
1979                 }
1980         }
1981         read_unlock_bh(&table->tb6_lock);
1982 }
1983
1984 static void rtmsg_to_fib6_config(struct net *net,
1985                                  struct in6_rtmsg *rtmsg,
1986                                  struct fib6_config *cfg)
1987 {
1988         memset(cfg, 0, sizeof(*cfg));
1989
1990         cfg->fc_table = RT6_TABLE_MAIN;
1991         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1992         cfg->fc_metric = rtmsg->rtmsg_metric;
1993         cfg->fc_expires = rtmsg->rtmsg_info;
1994         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1995         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1996         cfg->fc_flags = rtmsg->rtmsg_flags;
1997
1998         cfg->fc_nlinfo.nl_net = net;
1999
2000         cfg->fc_dst = rtmsg->rtmsg_dst;
2001         cfg->fc_src = rtmsg->rtmsg_src;
2002         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2003 }
2004
2005 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2006 {
2007         struct fib6_config cfg;
2008         struct in6_rtmsg rtmsg;
2009         int err;
2010
2011         switch(cmd) {
2012         case SIOCADDRT:         /* Add a route */
2013         case SIOCDELRT:         /* Delete a route */
2014                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2015                         return -EPERM;
2016                 err = copy_from_user(&rtmsg, arg,
2017                                      sizeof(struct in6_rtmsg));
2018                 if (err)
2019                         return -EFAULT;
2020
2021                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2022
2023                 rtnl_lock();
2024                 switch (cmd) {
2025                 case SIOCADDRT:
2026                         err = ip6_route_add(&cfg);
2027                         break;
2028                 case SIOCDELRT:
2029                         err = ip6_route_del(&cfg);
2030                         break;
2031                 default:
2032                         err = -EINVAL;
2033                 }
2034                 rtnl_unlock();
2035
2036                 return err;
2037         }
2038
2039         return -EINVAL;
2040 }
2041
2042 /*
2043  *      Drop the packet on the floor
2044  */
2045
2046 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2047 {
2048         int type;
2049         struct dst_entry *dst = skb_dst(skb);
2050         switch (ipstats_mib_noroutes) {
2051         case IPSTATS_MIB_INNOROUTES:
2052                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2053                 if (type == IPV6_ADDR_ANY) {
2054                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2055                                       IPSTATS_MIB_INADDRERRORS);
2056                         break;
2057                 }
2058                 /* FALLTHROUGH */
2059         case IPSTATS_MIB_OUTNOROUTES:
2060                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2061                               ipstats_mib_noroutes);
2062                 break;
2063         }
2064         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2065         kfree_skb(skb);
2066         return 0;
2067 }
2068
2069 static int ip6_pkt_discard(struct sk_buff *skb)
2070 {
2071         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2072 }
2073
2074 static int ip6_pkt_discard_out(struct sk_buff *skb)
2075 {
2076         skb->dev = skb_dst(skb)->dev;
2077         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2078 }
2079
2080 static int ip6_pkt_prohibit(struct sk_buff *skb)
2081 {
2082         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2083 }
2084
2085 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2086 {
2087         skb->dev = skb_dst(skb)->dev;
2088         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2089 }
2090
2091 /*
2092  *      Allocate a dst for local (unicast / anycast) address.
2093  */
2094
2095 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2096                                     const struct in6_addr *addr,
2097                                     bool anycast)
2098 {
2099         struct net *net = dev_net(idev->dev);
2100         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2101                                             DST_NOCOUNT, NULL);
2102         if (!rt)
2103                 return ERR_PTR(-ENOMEM);
2104
2105         in6_dev_hold(idev);
2106
2107         rt->dst.flags |= DST_HOST;
2108         rt->dst.input = ip6_input;
2109         rt->dst.output = ip6_output;
2110         rt->rt6i_idev = idev;
2111
2112         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2113         if (anycast)
2114                 rt->rt6i_flags |= RTF_ANYCAST;
2115         else
2116                 rt->rt6i_flags |= RTF_LOCAL;
2117
2118         rt->rt6i_gateway  = *addr;
2119         rt->rt6i_dst.addr = *addr;
2120         rt->rt6i_dst.plen = 128;
2121         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2122
2123         atomic_set(&rt->dst.__refcnt, 1);
2124
2125         return rt;
2126 }
2127
2128 int ip6_route_get_saddr(struct net *net,
2129                         struct rt6_info *rt,
2130                         const struct in6_addr *daddr,
2131                         unsigned int prefs,
2132                         struct in6_addr *saddr)
2133 {
2134         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2135         int err = 0;
2136         if (rt->rt6i_prefsrc.plen)
2137                 *saddr = rt->rt6i_prefsrc.addr;
2138         else
2139                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2140                                          daddr, prefs, saddr);
2141         return err;
2142 }
2143
2144 /* remove deleted ip from prefsrc entries */
2145 struct arg_dev_net_ip {
2146         struct net_device *dev;
2147         struct net *net;
2148         struct in6_addr *addr;
2149 };
2150
2151 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2152 {
2153         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2154         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2155         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2156
2157         if (((void *)rt->dst.dev == dev || !dev) &&
2158             rt != net->ipv6.ip6_null_entry &&
2159             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2160                 /* remove prefsrc entry */
2161                 rt->rt6i_prefsrc.plen = 0;
2162         }
2163         return 0;
2164 }
2165
2166 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2167 {
2168         struct net *net = dev_net(ifp->idev->dev);
2169         struct arg_dev_net_ip adni = {
2170                 .dev = ifp->idev->dev,
2171                 .net = net,
2172                 .addr = &ifp->addr,
2173         };
2174         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2175 }
2176
2177 struct arg_dev_net {
2178         struct net_device *dev;
2179         struct net *net;
2180 };
2181
2182 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2183 {
2184         const struct arg_dev_net *adn = arg;
2185         const struct net_device *dev = adn->dev;
2186
2187         if ((rt->dst.dev == dev || !dev) &&
2188             rt != adn->net->ipv6.ip6_null_entry)
2189                 return -1;
2190
2191         return 0;
2192 }
2193
2194 void rt6_ifdown(struct net *net, struct net_device *dev)
2195 {
2196         struct arg_dev_net adn = {
2197                 .dev = dev,
2198                 .net = net,
2199         };
2200
2201         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2202         icmp6_clean_all(fib6_ifdown, &adn);
2203 }
2204
2205 struct rt6_mtu_change_arg {
2206         struct net_device *dev;
2207         unsigned int mtu;
2208 };
2209
2210 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2211 {
2212         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2213         struct inet6_dev *idev;
2214
2215         /* In IPv6 pmtu discovery is not optional,
2216            so that RTAX_MTU lock cannot disable it.
2217            We still use this lock to block changes
2218            caused by addrconf/ndisc.
2219         */
2220
2221         idev = __in6_dev_get(arg->dev);
2222         if (!idev)
2223                 return 0;
2224
2225         /* For administrative MTU increase, there is no way to discover
2226            IPv6 PMTU increase, so PMTU increase should be updated here.
2227            Since RFC 1981 doesn't include administrative MTU increase
2228            update PMTU increase is a MUST. (i.e. jumbo frame)
2229          */
2230         /*
2231            If new MTU is less than route PMTU, this new MTU will be the
2232            lowest MTU in the path, update the route PMTU to reflect PMTU
2233            decreases; if new MTU is greater than route PMTU, and the
2234            old MTU is the lowest MTU in the path, update the route PMTU
2235            to reflect the increase. In this case if the other nodes' MTU
2236            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2237            PMTU discouvery.
2238          */
2239         if (rt->dst.dev == arg->dev &&
2240             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2241             (dst_mtu(&rt->dst) >= arg->mtu ||
2242              (dst_mtu(&rt->dst) < arg->mtu &&
2243               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2244                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2245         }
2246         return 0;
2247 }
2248
2249 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2250 {
2251         struct rt6_mtu_change_arg arg = {
2252                 .dev = dev,
2253                 .mtu = mtu,
2254         };
2255
2256         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2257 }
2258
2259 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2260         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2261         [RTA_OIF]               = { .type = NLA_U32 },
2262         [RTA_IIF]               = { .type = NLA_U32 },
2263         [RTA_PRIORITY]          = { .type = NLA_U32 },
2264         [RTA_METRICS]           = { .type = NLA_NESTED },
2265         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2266 };
2267
2268 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2269                               struct fib6_config *cfg)
2270 {
2271         struct rtmsg *rtm;
2272         struct nlattr *tb[RTA_MAX+1];
2273         int err;
2274
2275         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2276         if (err < 0)
2277                 goto errout;
2278
2279         err = -EINVAL;
2280         rtm = nlmsg_data(nlh);
2281         memset(cfg, 0, sizeof(*cfg));
2282
2283         cfg->fc_table = rtm->rtm_table;
2284         cfg->fc_dst_len = rtm->rtm_dst_len;
2285         cfg->fc_src_len = rtm->rtm_src_len;
2286         cfg->fc_flags = RTF_UP;
2287         cfg->fc_protocol = rtm->rtm_protocol;
2288         cfg->fc_type = rtm->rtm_type;
2289
2290         if (rtm->rtm_type == RTN_UNREACHABLE ||
2291             rtm->rtm_type == RTN_BLACKHOLE ||
2292             rtm->rtm_type == RTN_PROHIBIT ||
2293             rtm->rtm_type == RTN_THROW)
2294                 cfg->fc_flags |= RTF_REJECT;
2295
2296         if (rtm->rtm_type == RTN_LOCAL)
2297                 cfg->fc_flags |= RTF_LOCAL;
2298
2299         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2300         cfg->fc_nlinfo.nlh = nlh;
2301         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2302
2303         if (tb[RTA_GATEWAY]) {
2304                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2305                 cfg->fc_flags |= RTF_GATEWAY;
2306         }
2307
2308         if (tb[RTA_DST]) {
2309                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2310
2311                 if (nla_len(tb[RTA_DST]) < plen)
2312                         goto errout;
2313
2314                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2315         }
2316
2317         if (tb[RTA_SRC]) {
2318                 int plen = (rtm->rtm_src_len + 7) >> 3;
2319
2320                 if (nla_len(tb[RTA_SRC]) < plen)
2321                         goto errout;
2322
2323                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2324         }
2325
2326         if (tb[RTA_PREFSRC])
2327                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2328
2329         if (tb[RTA_OIF])
2330                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2331
2332         if (tb[RTA_PRIORITY])
2333                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2334
2335         if (tb[RTA_METRICS]) {
2336                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2337                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2338         }
2339
2340         if (tb[RTA_TABLE])
2341                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2342
2343         if (tb[RTA_MULTIPATH]) {
2344                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2345                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2346         }
2347
2348         err = 0;
2349 errout:
2350         return err;
2351 }
2352
2353 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2354 {
2355         struct fib6_config r_cfg;
2356         struct rtnexthop *rtnh;
2357         int remaining;
2358         int attrlen;
2359         int err = 0, last_err = 0;
2360
2361 beginning:
2362         rtnh = (struct rtnexthop *)cfg->fc_mp;
2363         remaining = cfg->fc_mp_len;
2364
2365         /* Parse a Multipath Entry */
2366         while (rtnh_ok(rtnh, remaining)) {
2367                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2368                 if (rtnh->rtnh_ifindex)
2369                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2370
2371                 attrlen = rtnh_attrlen(rtnh);
2372                 if (attrlen > 0) {
2373                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2374
2375                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2376                         if (nla) {
2377                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2378                                 r_cfg.fc_flags |= RTF_GATEWAY;
2379                         }
2380                 }
2381                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2382                 if (err) {
2383                         last_err = err;
2384                         /* If we are trying to remove a route, do not stop the
2385                          * loop when ip6_route_del() fails (because next hop is
2386                          * already gone), we should try to remove all next hops.
2387                          */
2388                         if (add) {
2389                                 /* If add fails, we should try to delete all
2390                                  * next hops that have been already added.
2391                                  */
2392                                 add = 0;
2393                                 goto beginning;
2394                         }
2395                 }
2396                 /* Because each route is added like a single route we remove
2397                  * this flag after the first nexthop (if there is a collision,
2398                  * we have already fail to add the first nexthop:
2399                  * fib6_add_rt2node() has reject it).
2400                  */
2401                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2402                 rtnh = rtnh_next(rtnh, &remaining);
2403         }
2404
2405         return last_err;
2406 }
2407
2408 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2409 {
2410         struct fib6_config cfg;
2411         int err;
2412
2413         err = rtm_to_fib6_config(skb, nlh, &cfg);
2414         if (err < 0)
2415                 return err;
2416
2417         if (cfg.fc_mp)
2418                 return ip6_route_multipath(&cfg, 0);
2419         else
2420                 return ip6_route_del(&cfg);
2421 }
2422
2423 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2424 {
2425         struct fib6_config cfg;
2426         int err;
2427
2428         err = rtm_to_fib6_config(skb, nlh, &cfg);
2429         if (err < 0)
2430                 return err;
2431
2432         if (cfg.fc_mp)
2433                 return ip6_route_multipath(&cfg, 1);
2434         else
2435                 return ip6_route_add(&cfg);
2436 }
2437
2438 static inline size_t rt6_nlmsg_size(void)
2439 {
2440         return NLMSG_ALIGN(sizeof(struct rtmsg))
2441                + nla_total_size(16) /* RTA_SRC */
2442                + nla_total_size(16) /* RTA_DST */
2443                + nla_total_size(16) /* RTA_GATEWAY */
2444                + nla_total_size(16) /* RTA_PREFSRC */
2445                + nla_total_size(4) /* RTA_TABLE */
2446                + nla_total_size(4) /* RTA_IIF */
2447                + nla_total_size(4) /* RTA_OIF */
2448                + nla_total_size(4) /* RTA_PRIORITY */
2449                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2450                + nla_total_size(sizeof(struct rta_cacheinfo));
2451 }
2452
2453 static int rt6_fill_node(struct net *net,
2454                          struct sk_buff *skb, struct rt6_info *rt,
2455                          struct in6_addr *dst, struct in6_addr *src,
2456                          int iif, int type, u32 portid, u32 seq,
2457                          int prefix, int nowait, unsigned int flags)
2458 {
2459         struct rtmsg *rtm;
2460         struct nlmsghdr *nlh;
2461         long expires;
2462         u32 table;
2463
2464         if (prefix) {   /* user wants prefix routes only */
2465                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2466                         /* success since this is not a prefix route */
2467                         return 1;
2468                 }
2469         }
2470
2471         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2472         if (!nlh)
2473                 return -EMSGSIZE;
2474
2475         rtm = nlmsg_data(nlh);
2476         rtm->rtm_family = AF_INET6;
2477         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2478         rtm->rtm_src_len = rt->rt6i_src.plen;
2479         rtm->rtm_tos = 0;
2480         if (rt->rt6i_table)
2481                 table = rt->rt6i_table->tb6_id;
2482         else
2483                 table = RT6_TABLE_UNSPEC;
2484         rtm->rtm_table = table;
2485         if (nla_put_u32(skb, RTA_TABLE, table))
2486                 goto nla_put_failure;
2487         if (rt->rt6i_flags & RTF_REJECT) {
2488                 switch (rt->dst.error) {
2489                 case -EINVAL:
2490                         rtm->rtm_type = RTN_BLACKHOLE;
2491                         break;
2492                 case -EACCES:
2493                         rtm->rtm_type = RTN_PROHIBIT;
2494                         break;
2495                 case -EAGAIN:
2496                         rtm->rtm_type = RTN_THROW;
2497                         break;
2498                 default:
2499                         rtm->rtm_type = RTN_UNREACHABLE;
2500                         break;
2501                 }
2502         }
2503         else if (rt->rt6i_flags & RTF_LOCAL)
2504                 rtm->rtm_type = RTN_LOCAL;
2505         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2506                 rtm->rtm_type = RTN_LOCAL;
2507         else
2508                 rtm->rtm_type = RTN_UNICAST;
2509         rtm->rtm_flags = 0;
2510         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2511         rtm->rtm_protocol = rt->rt6i_protocol;
2512         if (rt->rt6i_flags & RTF_DYNAMIC)
2513                 rtm->rtm_protocol = RTPROT_REDIRECT;
2514         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2515                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2516                         rtm->rtm_protocol = RTPROT_RA;
2517                 else
2518                         rtm->rtm_protocol = RTPROT_KERNEL;
2519         }
2520
2521         if (rt->rt6i_flags & RTF_CACHE)
2522                 rtm->rtm_flags |= RTM_F_CLONED;
2523
2524         if (dst) {
2525                 if (nla_put(skb, RTA_DST, 16, dst))
2526                         goto nla_put_failure;
2527                 rtm->rtm_dst_len = 128;
2528         } else if (rtm->rtm_dst_len)
2529                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2530                         goto nla_put_failure;
2531 #ifdef CONFIG_IPV6_SUBTREES
2532         if (src) {
2533                 if (nla_put(skb, RTA_SRC, 16, src))
2534                         goto nla_put_failure;
2535                 rtm->rtm_src_len = 128;
2536         } else if (rtm->rtm_src_len &&
2537                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2538                 goto nla_put_failure;
2539 #endif
2540         if (iif) {
2541 #ifdef CONFIG_IPV6_MROUTE
2542                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2543                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2544                         if (err <= 0) {
2545                                 if (!nowait) {
2546                                         if (err == 0)
2547                                                 return 0;
2548                                         goto nla_put_failure;
2549                                 } else {
2550                                         if (err == -EMSGSIZE)
2551                                                 goto nla_put_failure;
2552                                 }
2553                         }
2554                 } else
2555 #endif
2556                         if (nla_put_u32(skb, RTA_IIF, iif))
2557                                 goto nla_put_failure;
2558         } else if (dst) {
2559                 struct in6_addr saddr_buf;
2560                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2561                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2562                         goto nla_put_failure;
2563         }
2564
2565         if (rt->rt6i_prefsrc.plen) {
2566                 struct in6_addr saddr_buf;
2567                 saddr_buf = rt->rt6i_prefsrc.addr;
2568                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2569                         goto nla_put_failure;
2570         }
2571
2572         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2573                 goto nla_put_failure;
2574
2575         if (rt->rt6i_flags & RTF_GATEWAY) {
2576                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2577                         goto nla_put_failure;
2578         }
2579
2580         if (rt->dst.dev &&
2581             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2582                 goto nla_put_failure;
2583         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2584                 goto nla_put_failure;
2585
2586         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2587
2588         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2589                 goto nla_put_failure;
2590
2591         return nlmsg_end(skb, nlh);
2592
2593 nla_put_failure:
2594         nlmsg_cancel(skb, nlh);
2595         return -EMSGSIZE;
2596 }
2597
2598 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2599 {
2600         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2601         int prefix;
2602
2603         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2604                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2605                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2606         } else
2607                 prefix = 0;
2608
2609         return rt6_fill_node(arg->net,
2610                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2611                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2612                      prefix, 0, NLM_F_MULTI);
2613 }
2614
2615 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2616 {
2617         struct net *net = sock_net(in_skb->sk);
2618         struct nlattr *tb[RTA_MAX+1];
2619         struct rt6_info *rt;
2620         struct sk_buff *skb;
2621         struct rtmsg *rtm;
2622         struct flowi6 fl6;
2623         int err, iif = 0, oif = 0;
2624
2625         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2626         if (err < 0)
2627                 goto errout;
2628
2629         err = -EINVAL;
2630         memset(&fl6, 0, sizeof(fl6));
2631
2632         if (tb[RTA_SRC]) {
2633                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2634                         goto errout;
2635
2636                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2637         }
2638
2639         if (tb[RTA_DST]) {
2640                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2641                         goto errout;
2642
2643                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2644         }
2645
2646         if (tb[RTA_IIF])
2647                 iif = nla_get_u32(tb[RTA_IIF]);
2648
2649         if (tb[RTA_OIF])
2650                 oif = nla_get_u32(tb[RTA_OIF]);
2651
2652         if (iif) {
2653                 struct net_device *dev;
2654                 int flags = 0;
2655
2656                 dev = __dev_get_by_index(net, iif);
2657                 if (!dev) {
2658                         err = -ENODEV;
2659                         goto errout;
2660                 }
2661
2662                 fl6.flowi6_iif = iif;
2663
2664                 if (!ipv6_addr_any(&fl6.saddr))
2665                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2666
2667                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2668                                                                flags);
2669         } else {
2670                 fl6.flowi6_oif = oif;
2671
2672                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2673         }
2674
2675         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2676         if (!skb) {
2677                 ip6_rt_put(rt);
2678                 err = -ENOBUFS;
2679                 goto errout;
2680         }
2681
2682         /* Reserve room for dummy headers, this skb can pass
2683            through good chunk of routing engine.
2684          */
2685         skb_reset_mac_header(skb);
2686         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2687
2688         skb_dst_set(skb, &rt->dst);
2689
2690         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2691                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2692                             nlh->nlmsg_seq, 0, 0, 0);
2693         if (err < 0) {
2694                 kfree_skb(skb);
2695                 goto errout;
2696         }
2697
2698         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2699 errout:
2700         return err;
2701 }
2702
2703 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2704 {
2705         struct sk_buff *skb;
2706         struct net *net = info->nl_net;
2707         u32 seq;
2708         int err;
2709
2710         err = -ENOBUFS;
2711         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2712
2713         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2714         if (!skb)
2715                 goto errout;
2716
2717         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2718                                 event, info->portid, seq, 0, 0, 0);
2719         if (err < 0) {
2720                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2721                 WARN_ON(err == -EMSGSIZE);
2722                 kfree_skb(skb);
2723                 goto errout;
2724         }
2725         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2726                     info->nlh, gfp_any());
2727         return;
2728 errout:
2729         if (err < 0)
2730                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2731 }
2732
2733 static int ip6_route_dev_notify(struct notifier_block *this,
2734                                 unsigned long event, void *data)
2735 {
2736         struct net_device *dev = (struct net_device *)data;
2737         struct net *net = dev_net(dev);
2738
2739         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2740                 net->ipv6.ip6_null_entry->dst.dev = dev;
2741                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2742 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2743                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2744                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2745                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2746                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2747 #endif
2748         }
2749
2750         return NOTIFY_OK;
2751 }
2752
2753 /*
2754  *      /proc
2755  */
2756
2757 #ifdef CONFIG_PROC_FS
2758
2759 struct rt6_proc_arg
2760 {
2761         char *buffer;
2762         int offset;
2763         int length;
2764         int skip;
2765         int len;
2766 };
2767
2768 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2769 {
2770         struct seq_file *m = p_arg;
2771
2772         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2773
2774 #ifdef CONFIG_IPV6_SUBTREES
2775         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2776 #else
2777         seq_puts(m, "00000000000000000000000000000000 00 ");
2778 #endif
2779         if (rt->rt6i_flags & RTF_GATEWAY) {
2780                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2781         } else {
2782                 seq_puts(m, "00000000000000000000000000000000");
2783         }
2784         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2785                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2786                    rt->dst.__use, rt->rt6i_flags,
2787                    rt->dst.dev ? rt->dst.dev->name : "");
2788         return 0;
2789 }
2790
2791 static int ipv6_route_show(struct seq_file *m, void *v)
2792 {
2793         struct net *net = (struct net *)m->private;
2794         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2795         return 0;
2796 }
2797
2798 static int ipv6_route_open(struct inode *inode, struct file *file)
2799 {
2800         return single_open_net(inode, file, ipv6_route_show);
2801 }
2802
2803 static const struct file_operations ipv6_route_proc_fops = {
2804         .owner          = THIS_MODULE,
2805         .open           = ipv6_route_open,
2806         .read           = seq_read,
2807         .llseek         = seq_lseek,
2808         .release        = single_release_net,
2809 };
2810
2811 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2812 {
2813         struct net *net = (struct net *)seq->private;
2814         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2815                    net->ipv6.rt6_stats->fib_nodes,
2816                    net->ipv6.rt6_stats->fib_route_nodes,
2817                    net->ipv6.rt6_stats->fib_rt_alloc,
2818                    net->ipv6.rt6_stats->fib_rt_entries,
2819                    net->ipv6.rt6_stats->fib_rt_cache,
2820                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2821                    net->ipv6.rt6_stats->fib_discarded_routes);
2822
2823         return 0;
2824 }
2825
2826 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2827 {
2828         return single_open_net(inode, file, rt6_stats_seq_show);
2829 }
2830
2831 static const struct file_operations rt6_stats_seq_fops = {
2832         .owner   = THIS_MODULE,
2833         .open    = rt6_stats_seq_open,
2834         .read    = seq_read,
2835         .llseek  = seq_lseek,
2836         .release = single_release_net,
2837 };
2838 #endif  /* CONFIG_PROC_FS */
2839
2840 #ifdef CONFIG_SYSCTL
2841
2842 static
2843 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2844                               void __user *buffer, size_t *lenp, loff_t *ppos)
2845 {
2846         struct net *net;
2847         int delay;
2848         if (!write)
2849                 return -EINVAL;
2850
2851         net = (struct net *)ctl->extra1;
2852         delay = net->ipv6.sysctl.flush_delay;
2853         proc_dointvec(ctl, write, buffer, lenp, ppos);
2854         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2855         return 0;
2856 }
2857
2858 ctl_table ipv6_route_table_template[] = {
2859         {
2860                 .procname       =       "flush",
2861                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2862                 .maxlen         =       sizeof(int),
2863                 .mode           =       0200,
2864                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2865         },
2866         {
2867                 .procname       =       "gc_thresh",
2868                 .data           =       &ip6_dst_ops_template.gc_thresh,
2869                 .maxlen         =       sizeof(int),
2870                 .mode           =       0644,
2871                 .proc_handler   =       proc_dointvec,
2872         },
2873         {
2874                 .procname       =       "max_size",
2875                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2876                 .maxlen         =       sizeof(int),
2877                 .mode           =       0644,
2878                 .proc_handler   =       proc_dointvec,
2879         },
2880         {
2881                 .procname       =       "gc_min_interval",
2882                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2883                 .maxlen         =       sizeof(int),
2884                 .mode           =       0644,
2885                 .proc_handler   =       proc_dointvec_jiffies,
2886         },
2887         {
2888                 .procname       =       "gc_timeout",
2889                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2890                 .maxlen         =       sizeof(int),
2891                 .mode           =       0644,
2892                 .proc_handler   =       proc_dointvec_jiffies,
2893         },
2894         {
2895                 .procname       =       "gc_interval",
2896                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2897                 .maxlen         =       sizeof(int),
2898                 .mode           =       0644,
2899                 .proc_handler   =       proc_dointvec_jiffies,
2900         },
2901         {
2902                 .procname       =       "gc_elasticity",
2903                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2904                 .maxlen         =       sizeof(int),
2905                 .mode           =       0644,
2906                 .proc_handler   =       proc_dointvec,
2907         },
2908         {
2909                 .procname       =       "mtu_expires",
2910                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2911                 .maxlen         =       sizeof(int),
2912                 .mode           =       0644,
2913                 .proc_handler   =       proc_dointvec_jiffies,
2914         },
2915         {
2916                 .procname       =       "min_adv_mss",
2917                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2918                 .maxlen         =       sizeof(int),
2919                 .mode           =       0644,
2920                 .proc_handler   =       proc_dointvec,
2921         },
2922         {
2923                 .procname       =       "gc_min_interval_ms",
2924                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2925                 .maxlen         =       sizeof(int),
2926                 .mode           =       0644,
2927                 .proc_handler   =       proc_dointvec_ms_jiffies,
2928         },
2929         { }
2930 };
2931
2932 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2933 {
2934         struct ctl_table *table;
2935
2936         table = kmemdup(ipv6_route_table_template,
2937                         sizeof(ipv6_route_table_template),
2938                         GFP_KERNEL);
2939
2940         if (table) {
2941                 table[0].data = &net->ipv6.sysctl.flush_delay;
2942                 table[0].extra1 = net;
2943                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2944                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2945                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2946                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2947                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2948                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2949                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2950                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2951                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2952
2953                 /* Don't export sysctls to unprivileged users */
2954                 if (net->user_ns != &init_user_ns)
2955                         table[0].procname = NULL;
2956         }
2957
2958         return table;
2959 }
2960 #endif
2961
2962 static int __net_init ip6_route_net_init(struct net *net)
2963 {
2964         int ret = -ENOMEM;
2965
2966         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2967                sizeof(net->ipv6.ip6_dst_ops));
2968
2969         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2970                 goto out_ip6_dst_ops;
2971
2972         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2973                                            sizeof(*net->ipv6.ip6_null_entry),
2974                                            GFP_KERNEL);
2975         if (!net->ipv6.ip6_null_entry)
2976                 goto out_ip6_dst_entries;
2977         net->ipv6.ip6_null_entry->dst.path =
2978                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2979         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2980         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2981                          ip6_template_metrics, true);
2982
2983 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2984         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2985                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2986                                                GFP_KERNEL);
2987         if (!net->ipv6.ip6_prohibit_entry)
2988                 goto out_ip6_null_entry;
2989         net->ipv6.ip6_prohibit_entry->dst.path =
2990                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2991         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2992         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2993                          ip6_template_metrics, true);
2994
2995         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2996                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2997                                                GFP_KERNEL);
2998         if (!net->ipv6.ip6_blk_hole_entry)
2999                 goto out_ip6_prohibit_entry;
3000         net->ipv6.ip6_blk_hole_entry->dst.path =
3001                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3002         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3003         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3004                          ip6_template_metrics, true);
3005 #endif
3006
3007         net->ipv6.sysctl.flush_delay = 0;
3008         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3009         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3010         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3011         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3012         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3013         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3014         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3015
3016         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3017
3018         ret = 0;
3019 out:
3020         return ret;
3021
3022 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3023 out_ip6_prohibit_entry:
3024         kfree(net->ipv6.ip6_prohibit_entry);
3025 out_ip6_null_entry:
3026         kfree(net->ipv6.ip6_null_entry);
3027 #endif
3028 out_ip6_dst_entries:
3029         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3030 out_ip6_dst_ops:
3031         goto out;
3032 }
3033
3034 static void __net_exit ip6_route_net_exit(struct net *net)
3035 {
3036         kfree(net->ipv6.ip6_null_entry);
3037 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3038         kfree(net->ipv6.ip6_prohibit_entry);
3039         kfree(net->ipv6.ip6_blk_hole_entry);
3040 #endif
3041         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3042 }
3043
3044 static int __net_init ip6_route_net_init_late(struct net *net)
3045 {
3046 #ifdef CONFIG_PROC_FS
3047         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3048         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3049 #endif
3050         return 0;
3051 }
3052
3053 static void __net_exit ip6_route_net_exit_late(struct net *net)
3054 {
3055 #ifdef CONFIG_PROC_FS
3056         remove_proc_entry("ipv6_route", net->proc_net);
3057         remove_proc_entry("rt6_stats", net->proc_net);
3058 #endif
3059 }
3060
3061 static struct pernet_operations ip6_route_net_ops = {
3062         .init = ip6_route_net_init,
3063         .exit = ip6_route_net_exit,
3064 };
3065
3066 static int __net_init ipv6_inetpeer_init(struct net *net)
3067 {
3068         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3069
3070         if (!bp)
3071                 return -ENOMEM;
3072         inet_peer_base_init(bp);
3073         net->ipv6.peers = bp;
3074         return 0;
3075 }
3076
3077 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3078 {
3079         struct inet_peer_base *bp = net->ipv6.peers;
3080
3081         net->ipv6.peers = NULL;
3082         inetpeer_invalidate_tree(bp);
3083         kfree(bp);
3084 }
3085
3086 static struct pernet_operations ipv6_inetpeer_ops = {
3087         .init   =       ipv6_inetpeer_init,
3088         .exit   =       ipv6_inetpeer_exit,
3089 };
3090
3091 static struct pernet_operations ip6_route_net_late_ops = {
3092         .init = ip6_route_net_init_late,
3093         .exit = ip6_route_net_exit_late,
3094 };
3095
3096 static struct notifier_block ip6_route_dev_notifier = {
3097         .notifier_call = ip6_route_dev_notify,
3098         .priority = 0,
3099 };
3100
3101 int __init ip6_route_init(void)
3102 {
3103         int ret;
3104
3105         ret = -ENOMEM;
3106         ip6_dst_ops_template.kmem_cachep =
3107                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3108                                   SLAB_HWCACHE_ALIGN, NULL);
3109         if (!ip6_dst_ops_template.kmem_cachep)
3110                 goto out;
3111
3112         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3113         if (ret)
3114                 goto out_kmem_cache;
3115
3116         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3117         if (ret)
3118                 goto out_dst_entries;
3119
3120         ret = register_pernet_subsys(&ip6_route_net_ops);
3121         if (ret)
3122                 goto out_register_inetpeer;
3123
3124         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3125
3126         /* Registering of the loopback is done before this portion of code,
3127          * the loopback reference in rt6_info will not be taken, do it
3128          * manually for init_net */
3129         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3130         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3131   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3132         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3133         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3134         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3135         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3136   #endif
3137         ret = fib6_init();
3138         if (ret)
3139                 goto out_register_subsys;
3140
3141         ret = xfrm6_init();
3142         if (ret)
3143                 goto out_fib6_init;
3144
3145         ret = fib6_rules_init();
3146         if (ret)
3147                 goto xfrm6_init;
3148
3149         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3150         if (ret)
3151                 goto fib6_rules_init;
3152
3153         ret = -ENOBUFS;
3154         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3155             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3156             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3157                 goto out_register_late_subsys;
3158
3159         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3160         if (ret)
3161                 goto out_register_late_subsys;
3162
3163 out:
3164         return ret;
3165
3166 out_register_late_subsys:
3167         unregister_pernet_subsys(&ip6_route_net_late_ops);
3168 fib6_rules_init:
3169         fib6_rules_cleanup();
3170 xfrm6_init:
3171         xfrm6_fini();
3172 out_fib6_init:
3173         fib6_gc_cleanup();
3174 out_register_subsys:
3175         unregister_pernet_subsys(&ip6_route_net_ops);
3176 out_register_inetpeer:
3177         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3178 out_dst_entries:
3179         dst_entries_destroy(&ip6_dst_blackhole_ops);
3180 out_kmem_cache:
3181         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3182         goto out;
3183 }
3184
3185 void ip6_route_cleanup(void)
3186 {
3187         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3188         unregister_pernet_subsys(&ip6_route_net_late_ops);
3189         fib6_rules_cleanup();
3190         xfrm6_fini();
3191         fib6_gc_cleanup();
3192         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3193         unregister_pernet_subsys(&ip6_route_net_ops);
3194         dst_entries_destroy(&ip6_dst_blackhole_ops);
3195         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3196 }