d94d224f7e6868c57a5cdf4048f5c6022698b325
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -2,
70         RT6_NUD_FAIL_SOFT = -1,
71         RT6_NUD_SUCCEED = 1
72 };
73
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75                                     const struct in6_addr *dest);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int      ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void             ip6_dst_destroy(struct dst_entry *);
81 static void             ip6_dst_ifdown(struct dst_entry *,
82                                        struct net_device *dev, int how);
83 static int               ip6_dst_gc(struct dst_ops *ops);
84
85 static int              ip6_pkt_discard(struct sk_buff *skb);
86 static int              ip6_pkt_discard_out(struct sk_buff *skb);
87 static int              ip6_pkt_prohibit(struct sk_buff *skb);
88 static int              ip6_pkt_prohibit_out(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
91                                            struct sk_buff *skb, u32 mtu);
92 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
93                                         struct sk_buff *skb);
94
95 #ifdef CONFIG_IPV6_ROUTE_INFO
96 static struct rt6_info *rt6_add_route_info(struct net *net,
97                                            const struct in6_addr *prefix, int prefixlen,
98                                            const struct in6_addr *gwaddr, int ifindex,
99                                            unsigned int pref);
100 static struct rt6_info *rt6_get_route_info(struct net *net,
101                                            const struct in6_addr *prefix, int prefixlen,
102                                            const struct in6_addr *gwaddr, int ifindex);
103 #endif
104
105 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
106 {
107         struct rt6_info *rt = (struct rt6_info *) dst;
108         struct inet_peer *peer;
109         u32 *p = NULL;
110
111         if (!(rt->dst.flags & DST_HOST))
112                 return dst_cow_metrics_generic(dst, old);
113
114         peer = rt6_get_peer_create(rt);
115         if (peer) {
116                 u32 *old_p = __DST_METRICS_PTR(old);
117                 unsigned long prev, new;
118
119                 p = peer->metrics;
120                 if (inet_metrics_new(peer))
121                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123                 new = (unsigned long) p;
124                 prev = cmpxchg(&dst->_metrics, old, new);
125
126                 if (prev != old) {
127                         p = __DST_METRICS_PTR(prev);
128                         if (prev & DST_METRICS_READ_ONLY)
129                                 p = NULL;
130                 }
131         }
132         return p;
133 }
134
135 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
136                                              struct sk_buff *skb,
137                                              const void *daddr)
138 {
139         struct in6_addr *p = &rt->rt6i_gateway;
140
141         if (!ipv6_addr_any(p))
142                 return (const void *) p;
143         else if (skb)
144                 return &ipv6_hdr(skb)->daddr;
145         return daddr;
146 }
147
148 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
149                                           struct sk_buff *skb,
150                                           const void *daddr)
151 {
152         struct rt6_info *rt = (struct rt6_info *) dst;
153         struct neighbour *n;
154
155         daddr = choose_neigh_daddr(rt, skb, daddr);
156         n = __ipv6_neigh_lookup(dst->dev, daddr);
157         if (n)
158                 return n;
159         return neigh_create(&nd_tbl, daddr, dst->dev);
160 }
161
162 static struct dst_ops ip6_dst_ops_template = {
163         .family                 =       AF_INET6,
164         .protocol               =       cpu_to_be16(ETH_P_IPV6),
165         .gc                     =       ip6_dst_gc,
166         .gc_thresh              =       1024,
167         .check                  =       ip6_dst_check,
168         .default_advmss         =       ip6_default_advmss,
169         .mtu                    =       ip6_mtu,
170         .cow_metrics            =       ipv6_cow_metrics,
171         .destroy                =       ip6_dst_destroy,
172         .ifdown                 =       ip6_dst_ifdown,
173         .negative_advice        =       ip6_negative_advice,
174         .link_failure           =       ip6_link_failure,
175         .update_pmtu            =       ip6_rt_update_pmtu,
176         .redirect               =       rt6_do_redirect,
177         .local_out              =       __ip6_local_out,
178         .neigh_lookup           =       ip6_neigh_lookup,
179 };
180
181 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
182 {
183         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185         return mtu ? : dst->dev->mtu;
186 }
187
188 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
189                                          struct sk_buff *skb, u32 mtu)
190 {
191 }
192
193 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
194                                       struct sk_buff *skb)
195 {
196 }
197
198 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
199                                          unsigned long old)
200 {
201         return NULL;
202 }
203
204 static struct dst_ops ip6_dst_blackhole_ops = {
205         .family                 =       AF_INET6,
206         .protocol               =       cpu_to_be16(ETH_P_IPV6),
207         .destroy                =       ip6_dst_destroy,
208         .check                  =       ip6_dst_check,
209         .mtu                    =       ip6_blackhole_mtu,
210         .default_advmss         =       ip6_default_advmss,
211         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
212         .redirect               =       ip6_rt_blackhole_redirect,
213         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
214         .neigh_lookup           =       ip6_neigh_lookup,
215 };
216
217 static const u32 ip6_template_metrics[RTAX_MAX] = {
218         [RTAX_HOPLIMIT - 1] = 0,
219 };
220
221 static const struct rt6_info ip6_null_entry_template = {
222         .dst = {
223                 .__refcnt       = ATOMIC_INIT(1),
224                 .__use          = 1,
225                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
226                 .error          = -ENETUNREACH,
227                 .input          = ip6_pkt_discard,
228                 .output         = ip6_pkt_discard_out,
229         },
230         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
231         .rt6i_protocol  = RTPROT_KERNEL,
232         .rt6i_metric    = ~(u32) 0,
233         .rt6i_ref       = ATOMIC_INIT(1),
234 };
235
236 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
237
238 static const struct rt6_info ip6_prohibit_entry_template = {
239         .dst = {
240                 .__refcnt       = ATOMIC_INIT(1),
241                 .__use          = 1,
242                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
243                 .error          = -EACCES,
244                 .input          = ip6_pkt_prohibit,
245                 .output         = ip6_pkt_prohibit_out,
246         },
247         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
248         .rt6i_protocol  = RTPROT_KERNEL,
249         .rt6i_metric    = ~(u32) 0,
250         .rt6i_ref       = ATOMIC_INIT(1),
251 };
252
253 static const struct rt6_info ip6_blk_hole_entry_template = {
254         .dst = {
255                 .__refcnt       = ATOMIC_INIT(1),
256                 .__use          = 1,
257                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
258                 .error          = -EINVAL,
259                 .input          = dst_discard,
260                 .output         = dst_discard,
261         },
262         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
263         .rt6i_protocol  = RTPROT_KERNEL,
264         .rt6i_metric    = ~(u32) 0,
265         .rt6i_ref       = ATOMIC_INIT(1),
266 };
267
268 #endif
269
270 /* allocate dst with ip6_dst_ops */
271 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
272                                              struct net_device *dev,
273                                              int flags,
274                                              struct fib6_table *table)
275 {
276         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
277                                         0, DST_OBSOLETE_FORCE_CHK, flags);
278
279         if (rt) {
280                 struct dst_entry *dst = &rt->dst;
281
282                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
283                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
284                 rt->rt6i_genid = rt_genid(net);
285                 INIT_LIST_HEAD(&rt->rt6i_siblings);
286                 rt->rt6i_nsiblings = 0;
287         }
288         return rt;
289 }
290
291 static void ip6_dst_destroy(struct dst_entry *dst)
292 {
293         struct rt6_info *rt = (struct rt6_info *)dst;
294         struct inet6_dev *idev = rt->rt6i_idev;
295         struct dst_entry *from = dst->from;
296
297         if (!(rt->dst.flags & DST_HOST))
298                 dst_destroy_metrics_generic(dst);
299
300         if (idev) {
301                 rt->rt6i_idev = NULL;
302                 in6_dev_put(idev);
303         }
304
305         dst->from = NULL;
306         dst_release(from);
307
308         if (rt6_has_peer(rt)) {
309                 struct inet_peer *peer = rt6_peer_ptr(rt);
310                 inet_putpeer(peer);
311         }
312 }
313
314 void rt6_bind_peer(struct rt6_info *rt, int create)
315 {
316         struct inet_peer_base *base;
317         struct inet_peer *peer;
318
319         base = inetpeer_base_ptr(rt->_rt6i_peer);
320         if (!base)
321                 return;
322
323         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
324         if (peer) {
325                 if (!rt6_set_peer(rt, peer))
326                         inet_putpeer(peer);
327         }
328 }
329
330 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
331                            int how)
332 {
333         struct rt6_info *rt = (struct rt6_info *)dst;
334         struct inet6_dev *idev = rt->rt6i_idev;
335         struct net_device *loopback_dev =
336                 dev_net(dev)->loopback_dev;
337
338         if (dev != loopback_dev) {
339                 if (idev && idev->dev == dev) {
340                         struct inet6_dev *loopback_idev =
341                                 in6_dev_get(loopback_dev);
342                         if (loopback_idev) {
343                                 rt->rt6i_idev = loopback_idev;
344                                 in6_dev_put(idev);
345                         }
346                 }
347         }
348 }
349
350 static bool rt6_check_expired(const struct rt6_info *rt)
351 {
352         if (rt->rt6i_flags & RTF_EXPIRES) {
353                 if (time_after(jiffies, rt->dst.expires))
354                         return true;
355         } else if (rt->dst.from) {
356                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
357         }
358         return false;
359 }
360
361 static bool rt6_need_strict(const struct in6_addr *daddr)
362 {
363         return ipv6_addr_type(daddr) &
364                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
365 }
366
367 /* Multipath route selection:
368  *   Hash based function using packet header and flowlabel.
369  * Adapted from fib_info_hashfn()
370  */
371 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
372                                const struct flowi6 *fl6)
373 {
374         unsigned int val = fl6->flowi6_proto;
375
376         val ^= ipv6_addr_hash(&fl6->daddr);
377         val ^= ipv6_addr_hash(&fl6->saddr);
378
379         /* Work only if this not encapsulated */
380         switch (fl6->flowi6_proto) {
381         case IPPROTO_UDP:
382         case IPPROTO_TCP:
383         case IPPROTO_SCTP:
384                 val ^= (__force u16)fl6->fl6_sport;
385                 val ^= (__force u16)fl6->fl6_dport;
386                 break;
387
388         case IPPROTO_ICMPV6:
389                 val ^= (__force u16)fl6->fl6_icmp_type;
390                 val ^= (__force u16)fl6->fl6_icmp_code;
391                 break;
392         }
393         /* RFC6438 recommands to use flowlabel */
394         val ^= (__force u32)fl6->flowlabel;
395
396         /* Perhaps, we need to tune, this function? */
397         val = val ^ (val >> 7) ^ (val >> 12);
398         return val % candidate_count;
399 }
400
401 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
402                                              struct flowi6 *fl6)
403 {
404         struct rt6_info *sibling, *next_sibling;
405         int route_choosen;
406
407         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
408         /* Don't change the route, if route_choosen == 0
409          * (siblings does not include ourself)
410          */
411         if (route_choosen)
412                 list_for_each_entry_safe(sibling, next_sibling,
413                                 &match->rt6i_siblings, rt6i_siblings) {
414                         route_choosen--;
415                         if (route_choosen == 0) {
416                                 match = sibling;
417                                 break;
418                         }
419                 }
420         return match;
421 }
422
423 /*
424  *      Route lookup. Any table->tb6_lock is implied.
425  */
426
427 static inline struct rt6_info *rt6_device_match(struct net *net,
428                                                     struct rt6_info *rt,
429                                                     const struct in6_addr *saddr,
430                                                     int oif,
431                                                     int flags)
432 {
433         struct rt6_info *local = NULL;
434         struct rt6_info *sprt;
435
436         if (!oif && ipv6_addr_any(saddr))
437                 goto out;
438
439         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
440                 struct net_device *dev = sprt->dst.dev;
441
442                 if (oif) {
443                         if (dev->ifindex == oif)
444                                 return sprt;
445                         if (dev->flags & IFF_LOOPBACK) {
446                                 if (!sprt->rt6i_idev ||
447                                     sprt->rt6i_idev->dev->ifindex != oif) {
448                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
449                                                 continue;
450                                         if (local && (!oif ||
451                                                       local->rt6i_idev->dev->ifindex == oif))
452                                                 continue;
453                                 }
454                                 local = sprt;
455                         }
456                 } else {
457                         if (ipv6_chk_addr(net, saddr, dev,
458                                           flags & RT6_LOOKUP_F_IFACE))
459                                 return sprt;
460                 }
461         }
462
463         if (oif) {
464                 if (local)
465                         return local;
466
467                 if (flags & RT6_LOOKUP_F_IFACE)
468                         return net->ipv6.ip6_null_entry;
469         }
470 out:
471         return rt;
472 }
473
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475 struct __rt6_probe_work {
476         struct work_struct work;
477         struct in6_addr target;
478         struct net_device *dev;
479 };
480
481 static void rt6_probe_deferred(struct work_struct *w)
482 {
483         struct in6_addr mcaddr;
484         struct __rt6_probe_work *work =
485                 container_of(w, struct __rt6_probe_work, work);
486
487         addrconf_addr_solict_mult(&work->target, &mcaddr);
488         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
489         dev_put(work->dev);
490         kfree(w);
491 }
492
493 static void rt6_probe(struct rt6_info *rt)
494 {
495         struct neighbour *neigh;
496         /*
497          * Okay, this does not seem to be appropriate
498          * for now, however, we need to check if it
499          * is really so; aka Router Reachability Probing.
500          *
501          * Router Reachability Probe MUST be rate-limited
502          * to no more than one per minute.
503          */
504         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
505                 return;
506         rcu_read_lock_bh();
507         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
508         if (neigh) {
509                 write_lock(&neigh->lock);
510                 if (neigh->nud_state & NUD_VALID)
511                         goto out;
512         }
513
514         if (!neigh ||
515             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
516                 struct __rt6_probe_work *work;
517
518                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
519
520                 if (neigh && work)
521                         neigh->updated = jiffies;
522
523                 if (neigh)
524                         write_unlock(&neigh->lock);
525
526                 if (work) {
527                         INIT_WORK(&work->work, rt6_probe_deferred);
528                         work->target = rt->rt6i_gateway;
529                         dev_hold(rt->dst.dev);
530                         work->dev = rt->dst.dev;
531                         schedule_work(&work->work);
532                 }
533         } else {
534 out:
535                 write_unlock(&neigh->lock);
536         }
537         rcu_read_unlock_bh();
538 }
539 #else
540 static inline void rt6_probe(struct rt6_info *rt)
541 {
542 }
543 #endif
544
545 /*
546  * Default Router Selection (RFC 2461 6.3.6)
547  */
548 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
549 {
550         struct net_device *dev = rt->dst.dev;
551         if (!oif || dev->ifindex == oif)
552                 return 2;
553         if ((dev->flags & IFF_LOOPBACK) &&
554             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
555                 return 1;
556         return 0;
557 }
558
559 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
560 {
561         struct neighbour *neigh;
562         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
563
564         if (rt->rt6i_flags & RTF_NONEXTHOP ||
565             !(rt->rt6i_flags & RTF_GATEWAY))
566                 return RT6_NUD_SUCCEED;
567
568         rcu_read_lock_bh();
569         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570         if (neigh) {
571                 read_lock(&neigh->lock);
572                 if (neigh->nud_state & NUD_VALID)
573                         ret = RT6_NUD_SUCCEED;
574 #ifdef CONFIG_IPV6_ROUTER_PREF
575                 else if (!(neigh->nud_state & NUD_FAILED))
576                         ret = RT6_NUD_SUCCEED;
577 #endif
578                 read_unlock(&neigh->lock);
579         } else {
580                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
581                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
582         }
583         rcu_read_unlock_bh();
584
585         return ret;
586 }
587
588 static int rt6_score_route(struct rt6_info *rt, int oif,
589                            int strict)
590 {
591         int m;
592
593         m = rt6_check_dev(rt, oif);
594         if (!m && (strict & RT6_LOOKUP_F_IFACE))
595                 return RT6_NUD_FAIL_HARD;
596 #ifdef CONFIG_IPV6_ROUTER_PREF
597         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
598 #endif
599         if (strict & RT6_LOOKUP_F_REACHABLE) {
600                 int n = rt6_check_neigh(rt);
601                 if (n < 0)
602                         return n;
603         }
604         return m;
605 }
606
607 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
608                                    int *mpri, struct rt6_info *match,
609                                    bool *do_rr)
610 {
611         int m;
612         bool match_do_rr = false;
613
614         if (rt6_check_expired(rt))
615                 goto out;
616
617         m = rt6_score_route(rt, oif, strict);
618         if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
619                 match_do_rr = true;
620                 m = 0; /* lowest valid score */
621         } else if (m < 0) {
622                 goto out;
623         }
624
625         if (strict & RT6_LOOKUP_F_REACHABLE)
626                 rt6_probe(rt);
627
628         if (m > *mpri) {
629                 *do_rr = match_do_rr;
630                 *mpri = m;
631                 match = rt;
632         }
633 out:
634         return match;
635 }
636
637 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
638                                      struct rt6_info *rr_head,
639                                      u32 metric, int oif, int strict,
640                                      bool *do_rr)
641 {
642         struct rt6_info *rt, *match;
643         int mpri = -1;
644
645         match = NULL;
646         for (rt = rr_head; rt && rt->rt6i_metric == metric;
647              rt = rt->dst.rt6_next)
648                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
649         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
650              rt = rt->dst.rt6_next)
651                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
652
653         return match;
654 }
655
656 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
657 {
658         struct rt6_info *match, *rt0;
659         struct net *net;
660         bool do_rr = false;
661
662         rt0 = fn->rr_ptr;
663         if (!rt0)
664                 fn->rr_ptr = rt0 = fn->leaf;
665
666         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
667                              &do_rr);
668
669         if (do_rr) {
670                 struct rt6_info *next = rt0->dst.rt6_next;
671
672                 /* no entries matched; do round-robin */
673                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
674                         next = fn->leaf;
675
676                 if (next != rt0)
677                         fn->rr_ptr = next;
678         }
679
680         net = dev_net(rt0->dst.dev);
681         return match ? match : net->ipv6.ip6_null_entry;
682 }
683
684 #ifdef CONFIG_IPV6_ROUTE_INFO
685 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
686                   const struct in6_addr *gwaddr)
687 {
688         struct net *net = dev_net(dev);
689         struct route_info *rinfo = (struct route_info *) opt;
690         struct in6_addr prefix_buf, *prefix;
691         unsigned int pref;
692         unsigned long lifetime;
693         struct rt6_info *rt;
694
695         if (len < sizeof(struct route_info)) {
696                 return -EINVAL;
697         }
698
699         /* Sanity check for prefix_len and length */
700         if (rinfo->length > 3) {
701                 return -EINVAL;
702         } else if (rinfo->prefix_len > 128) {
703                 return -EINVAL;
704         } else if (rinfo->prefix_len > 64) {
705                 if (rinfo->length < 2) {
706                         return -EINVAL;
707                 }
708         } else if (rinfo->prefix_len > 0) {
709                 if (rinfo->length < 1) {
710                         return -EINVAL;
711                 }
712         }
713
714         pref = rinfo->route_pref;
715         if (pref == ICMPV6_ROUTER_PREF_INVALID)
716                 return -EINVAL;
717
718         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
719
720         if (rinfo->length == 3)
721                 prefix = (struct in6_addr *)rinfo->prefix;
722         else {
723                 /* this function is safe */
724                 ipv6_addr_prefix(&prefix_buf,
725                                  (struct in6_addr *)rinfo->prefix,
726                                  rinfo->prefix_len);
727                 prefix = &prefix_buf;
728         }
729
730         if (rinfo->prefix_len == 0)
731                 rt = rt6_get_dflt_router(gwaddr, dev);
732         else
733                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
734                                         gwaddr, dev->ifindex);
735
736         if (rt && !lifetime) {
737                 ip6_del_rt(rt);
738                 rt = NULL;
739         }
740
741         if (!rt && lifetime)
742                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
743                                         pref);
744         else if (rt)
745                 rt->rt6i_flags = RTF_ROUTEINFO |
746                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
747
748         if (rt) {
749                 if (!addrconf_finite_timeout(lifetime))
750                         rt6_clean_expires(rt);
751                 else
752                         rt6_set_expires(rt, jiffies + HZ * lifetime);
753
754                 ip6_rt_put(rt);
755         }
756         return 0;
757 }
758 #endif
759
760 #define BACKTRACK(__net, saddr)                 \
761 do { \
762         if (rt == __net->ipv6.ip6_null_entry) { \
763                 struct fib6_node *pn; \
764                 while (1) { \
765                         if (fn->fn_flags & RTN_TL_ROOT) \
766                                 goto out; \
767                         pn = fn->parent; \
768                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
769                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
770                         else \
771                                 fn = pn; \
772                         if (fn->fn_flags & RTN_RTINFO) \
773                                 goto restart; \
774                 } \
775         } \
776 } while (0)
777
778 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
779                                              struct fib6_table *table,
780                                              struct flowi6 *fl6, int flags)
781 {
782         struct fib6_node *fn;
783         struct rt6_info *rt;
784
785         read_lock_bh(&table->tb6_lock);
786         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
787 restart:
788         rt = fn->leaf;
789         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
790         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
791                 rt = rt6_multipath_select(rt, fl6);
792         BACKTRACK(net, &fl6->saddr);
793 out:
794         dst_use(&rt->dst, jiffies);
795         read_unlock_bh(&table->tb6_lock);
796         return rt;
797
798 }
799
800 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
801                                     int flags)
802 {
803         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
804 }
805 EXPORT_SYMBOL_GPL(ip6_route_lookup);
806
807 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
808                             const struct in6_addr *saddr, int oif, int strict)
809 {
810         struct flowi6 fl6 = {
811                 .flowi6_oif = oif,
812                 .daddr = *daddr,
813         };
814         struct dst_entry *dst;
815         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
816
817         if (saddr) {
818                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
819                 flags |= RT6_LOOKUP_F_HAS_SADDR;
820         }
821
822         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
823         if (dst->error == 0)
824                 return (struct rt6_info *) dst;
825
826         dst_release(dst);
827
828         return NULL;
829 }
830
831 EXPORT_SYMBOL(rt6_lookup);
832
833 /* ip6_ins_rt is called with FREE table->tb6_lock.
834    It takes new route entry, the addition fails by any reason the
835    route is freed. In any case, if caller does not hold it, it may
836    be destroyed.
837  */
838
839 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
840 {
841         int err;
842         struct fib6_table *table;
843
844         table = rt->rt6i_table;
845         write_lock_bh(&table->tb6_lock);
846         err = fib6_add(&table->tb6_root, rt, info);
847         write_unlock_bh(&table->tb6_lock);
848
849         return err;
850 }
851
852 int ip6_ins_rt(struct rt6_info *rt)
853 {
854         struct nl_info info = {
855                 .nl_net = dev_net(rt->dst.dev),
856         };
857         return __ip6_ins_rt(rt, &info);
858 }
859
860 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
861                                       const struct in6_addr *daddr,
862                                       const struct in6_addr *saddr)
863 {
864         struct rt6_info *rt;
865
866         /*
867          *      Clone the route.
868          */
869
870         rt = ip6_rt_copy(ort, daddr);
871
872         if (rt) {
873                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
874                         if (ort->rt6i_dst.plen != 128 &&
875                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
876                                 rt->rt6i_flags |= RTF_ANYCAST;
877                 }
878
879                 rt->rt6i_flags |= RTF_CACHE;
880
881 #ifdef CONFIG_IPV6_SUBTREES
882                 if (rt->rt6i_src.plen && saddr) {
883                         rt->rt6i_src.addr = *saddr;
884                         rt->rt6i_src.plen = 128;
885                 }
886 #endif
887         }
888
889         return rt;
890 }
891
892 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
893                                         const struct in6_addr *daddr)
894 {
895         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
896
897         if (rt)
898                 rt->rt6i_flags |= RTF_CACHE;
899         return rt;
900 }
901
902 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
903                                       struct flowi6 *fl6, int flags)
904 {
905         struct fib6_node *fn;
906         struct rt6_info *rt, *nrt;
907         int strict = 0;
908         int attempts = 3;
909         int err;
910         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
911
912         strict |= flags & RT6_LOOKUP_F_IFACE;
913
914 relookup:
915         read_lock_bh(&table->tb6_lock);
916
917 restart_2:
918         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
919
920 restart:
921         rt = rt6_select(fn, oif, strict | reachable);
922         if (rt->rt6i_nsiblings && oif == 0)
923                 rt = rt6_multipath_select(rt, fl6);
924         BACKTRACK(net, &fl6->saddr);
925         if (rt == net->ipv6.ip6_null_entry ||
926             rt->rt6i_flags & RTF_CACHE)
927                 goto out;
928
929         dst_hold(&rt->dst);
930         read_unlock_bh(&table->tb6_lock);
931
932         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
933                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
934         else if (!(rt->dst.flags & DST_HOST))
935                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
936         else
937                 goto out2;
938
939         ip6_rt_put(rt);
940         rt = nrt ? : net->ipv6.ip6_null_entry;
941
942         dst_hold(&rt->dst);
943         if (nrt) {
944                 err = ip6_ins_rt(nrt);
945                 if (!err)
946                         goto out2;
947         }
948
949         if (--attempts <= 0)
950                 goto out2;
951
952         /*
953          * Race condition! In the gap, when table->tb6_lock was
954          * released someone could insert this route.  Relookup.
955          */
956         ip6_rt_put(rt);
957         goto relookup;
958
959 out:
960         if (reachable) {
961                 reachable = 0;
962                 goto restart_2;
963         }
964         dst_hold(&rt->dst);
965         read_unlock_bh(&table->tb6_lock);
966 out2:
967         rt->dst.lastuse = jiffies;
968         rt->dst.__use++;
969
970         return rt;
971 }
972
973 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
974                                             struct flowi6 *fl6, int flags)
975 {
976         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
977 }
978
979 static struct dst_entry *ip6_route_input_lookup(struct net *net,
980                                                 struct net_device *dev,
981                                                 struct flowi6 *fl6, int flags)
982 {
983         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
984                 flags |= RT6_LOOKUP_F_IFACE;
985
986         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
987 }
988
989 void ip6_route_input(struct sk_buff *skb)
990 {
991         const struct ipv6hdr *iph = ipv6_hdr(skb);
992         struct net *net = dev_net(skb->dev);
993         int flags = RT6_LOOKUP_F_HAS_SADDR;
994         struct flowi6 fl6 = {
995                 .flowi6_iif = skb->dev->ifindex,
996                 .daddr = iph->daddr,
997                 .saddr = iph->saddr,
998                 .flowlabel = ip6_flowinfo(iph),
999                 .flowi6_mark = skb->mark,
1000                 .flowi6_proto = iph->nexthdr,
1001         };
1002
1003         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1004 }
1005
1006 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1007                                              struct flowi6 *fl6, int flags)
1008 {
1009         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1010 }
1011
1012 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1013                                     struct flowi6 *fl6)
1014 {
1015         int flags = 0;
1016
1017         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1018
1019         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1020                 flags |= RT6_LOOKUP_F_IFACE;
1021
1022         if (!ipv6_addr_any(&fl6->saddr))
1023                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1024         else if (sk)
1025                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1026
1027         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1028 }
1029
1030 EXPORT_SYMBOL(ip6_route_output);
1031
1032 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1033 {
1034         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1035         struct dst_entry *new = NULL;
1036
1037         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1038         if (rt) {
1039                 new = &rt->dst;
1040
1041                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1042                 rt6_init_peer(rt, net->ipv6.peers);
1043
1044                 new->__use = 1;
1045                 new->input = dst_discard;
1046                 new->output = dst_discard;
1047
1048                 if (dst_metrics_read_only(&ort->dst))
1049                         new->_metrics = ort->dst._metrics;
1050                 else
1051                         dst_copy_metrics(new, &ort->dst);
1052                 rt->rt6i_idev = ort->rt6i_idev;
1053                 if (rt->rt6i_idev)
1054                         in6_dev_hold(rt->rt6i_idev);
1055
1056                 rt->rt6i_gateway = ort->rt6i_gateway;
1057                 rt->rt6i_flags = ort->rt6i_flags;
1058                 rt->rt6i_metric = 0;
1059
1060                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1061 #ifdef CONFIG_IPV6_SUBTREES
1062                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1063 #endif
1064
1065                 dst_free(new);
1066         }
1067
1068         dst_release(dst_orig);
1069         return new ? new : ERR_PTR(-ENOMEM);
1070 }
1071
1072 /*
1073  *      Destination cache support functions
1074  */
1075
1076 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1077 {
1078         struct rt6_info *rt;
1079
1080         rt = (struct rt6_info *) dst;
1081
1082         /* All IPV6 dsts are created with ->obsolete set to the value
1083          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1084          * into this function always.
1085          */
1086         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1087                 return NULL;
1088
1089         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1090                 return NULL;
1091
1092         if (rt6_check_expired(rt))
1093                 return NULL;
1094
1095         return dst;
1096 }
1097
1098 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1099 {
1100         struct rt6_info *rt = (struct rt6_info *) dst;
1101
1102         if (rt) {
1103                 if (rt->rt6i_flags & RTF_CACHE) {
1104                         if (rt6_check_expired(rt)) {
1105                                 ip6_del_rt(rt);
1106                                 dst = NULL;
1107                         }
1108                 } else {
1109                         dst_release(dst);
1110                         dst = NULL;
1111                 }
1112         }
1113         return dst;
1114 }
1115
1116 static void ip6_link_failure(struct sk_buff *skb)
1117 {
1118         struct rt6_info *rt;
1119
1120         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1121
1122         rt = (struct rt6_info *) skb_dst(skb);
1123         if (rt) {
1124                 if (rt->rt6i_flags & RTF_CACHE) {
1125                         dst_hold(&rt->dst);
1126                         if (ip6_del_rt(rt))
1127                                 dst_free(&rt->dst);
1128                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1129                         rt->rt6i_node->fn_sernum = -1;
1130                 }
1131         }
1132 }
1133
1134 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1135                                struct sk_buff *skb, u32 mtu)
1136 {
1137         struct rt6_info *rt6 = (struct rt6_info*)dst;
1138
1139         dst_confirm(dst);
1140         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1141                 struct net *net = dev_net(dst->dev);
1142
1143                 rt6->rt6i_flags |= RTF_MODIFIED;
1144                 if (mtu < IPV6_MIN_MTU)
1145                         mtu = IPV6_MIN_MTU;
1146
1147                 dst_metric_set(dst, RTAX_MTU, mtu);
1148                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1149         }
1150 }
1151
1152 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1153                      int oif, u32 mark)
1154 {
1155         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1156         struct dst_entry *dst;
1157         struct flowi6 fl6;
1158
1159         memset(&fl6, 0, sizeof(fl6));
1160         fl6.flowi6_oif = oif;
1161         fl6.flowi6_mark = mark;
1162         fl6.flowi6_flags = 0;
1163         fl6.daddr = iph->daddr;
1164         fl6.saddr = iph->saddr;
1165         fl6.flowlabel = ip6_flowinfo(iph);
1166
1167         dst = ip6_route_output(net, NULL, &fl6);
1168         if (!dst->error)
1169                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1170         dst_release(dst);
1171 }
1172 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1173
1174 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1175 {
1176         ip6_update_pmtu(skb, sock_net(sk), mtu,
1177                         sk->sk_bound_dev_if, sk->sk_mark);
1178 }
1179 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1180
1181 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1182 {
1183         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1184         struct dst_entry *dst;
1185         struct flowi6 fl6;
1186
1187         memset(&fl6, 0, sizeof(fl6));
1188         fl6.flowi6_oif = oif;
1189         fl6.flowi6_mark = mark;
1190         fl6.flowi6_flags = 0;
1191         fl6.daddr = iph->daddr;
1192         fl6.saddr = iph->saddr;
1193         fl6.flowlabel = ip6_flowinfo(iph);
1194
1195         dst = ip6_route_output(net, NULL, &fl6);
1196         if (!dst->error)
1197                 rt6_do_redirect(dst, NULL, skb);
1198         dst_release(dst);
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_redirect);
1201
1202 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1203 {
1204         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1205 }
1206 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1207
1208 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1209 {
1210         struct net_device *dev = dst->dev;
1211         unsigned int mtu = dst_mtu(dst);
1212         struct net *net = dev_net(dev);
1213
1214         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1215
1216         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1217                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1218
1219         /*
1220          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1221          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1222          * IPV6_MAXPLEN is also valid and means: "any MSS,
1223          * rely only on pmtu discovery"
1224          */
1225         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1226                 mtu = IPV6_MAXPLEN;
1227         return mtu;
1228 }
1229
1230 static unsigned int ip6_mtu(const struct dst_entry *dst)
1231 {
1232         struct inet6_dev *idev;
1233         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1234
1235         if (mtu)
1236                 goto out;
1237
1238         mtu = IPV6_MIN_MTU;
1239
1240         rcu_read_lock();
1241         idev = __in6_dev_get(dst->dev);
1242         if (idev)
1243                 mtu = idev->cnf.mtu6;
1244         rcu_read_unlock();
1245
1246 out:
1247         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1248 }
1249
1250 static struct dst_entry *icmp6_dst_gc_list;
1251 static DEFINE_SPINLOCK(icmp6_dst_lock);
1252
1253 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1254                                   struct flowi6 *fl6)
1255 {
1256         struct dst_entry *dst;
1257         struct rt6_info *rt;
1258         struct inet6_dev *idev = in6_dev_get(dev);
1259         struct net *net = dev_net(dev);
1260
1261         if (unlikely(!idev))
1262                 return ERR_PTR(-ENODEV);
1263
1264         rt = ip6_dst_alloc(net, dev, 0, NULL);
1265         if (unlikely(!rt)) {
1266                 in6_dev_put(idev);
1267                 dst = ERR_PTR(-ENOMEM);
1268                 goto out;
1269         }
1270
1271         rt->dst.flags |= DST_HOST;
1272         rt->dst.output  = ip6_output;
1273         atomic_set(&rt->dst.__refcnt, 1);
1274         rt->rt6i_gateway  = fl6->daddr;
1275         rt->rt6i_dst.addr = fl6->daddr;
1276         rt->rt6i_dst.plen = 128;
1277         rt->rt6i_idev     = idev;
1278         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1279
1280         spin_lock_bh(&icmp6_dst_lock);
1281         rt->dst.next = icmp6_dst_gc_list;
1282         icmp6_dst_gc_list = &rt->dst;
1283         spin_unlock_bh(&icmp6_dst_lock);
1284
1285         fib6_force_start_gc(net);
1286
1287         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1288
1289 out:
1290         return dst;
1291 }
1292
1293 int icmp6_dst_gc(void)
1294 {
1295         struct dst_entry *dst, **pprev;
1296         int more = 0;
1297
1298         spin_lock_bh(&icmp6_dst_lock);
1299         pprev = &icmp6_dst_gc_list;
1300
1301         while ((dst = *pprev) != NULL) {
1302                 if (!atomic_read(&dst->__refcnt)) {
1303                         *pprev = dst->next;
1304                         dst_free(dst);
1305                 } else {
1306                         pprev = &dst->next;
1307                         ++more;
1308                 }
1309         }
1310
1311         spin_unlock_bh(&icmp6_dst_lock);
1312
1313         return more;
1314 }
1315
1316 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1317                             void *arg)
1318 {
1319         struct dst_entry *dst, **pprev;
1320
1321         spin_lock_bh(&icmp6_dst_lock);
1322         pprev = &icmp6_dst_gc_list;
1323         while ((dst = *pprev) != NULL) {
1324                 struct rt6_info *rt = (struct rt6_info *) dst;
1325                 if (func(rt, arg)) {
1326                         *pprev = dst->next;
1327                         dst_free(dst);
1328                 } else {
1329                         pprev = &dst->next;
1330                 }
1331         }
1332         spin_unlock_bh(&icmp6_dst_lock);
1333 }
1334
1335 static int ip6_dst_gc(struct dst_ops *ops)
1336 {
1337         unsigned long now = jiffies;
1338         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1339         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1340         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1341         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1342         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1343         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1344         int entries;
1345
1346         entries = dst_entries_get_fast(ops);
1347         if (time_after(rt_last_gc + rt_min_interval, now) &&
1348             entries <= rt_max_size)
1349                 goto out;
1350
1351         net->ipv6.ip6_rt_gc_expire++;
1352         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1353         net->ipv6.ip6_rt_last_gc = now;
1354         entries = dst_entries_get_slow(ops);
1355         if (entries < ops->gc_thresh)
1356                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1357 out:
1358         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1359         return entries > rt_max_size;
1360 }
1361
1362 int ip6_dst_hoplimit(struct dst_entry *dst)
1363 {
1364         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1365         if (hoplimit == 0) {
1366                 struct net_device *dev = dst->dev;
1367                 struct inet6_dev *idev;
1368
1369                 rcu_read_lock();
1370                 idev = __in6_dev_get(dev);
1371                 if (idev)
1372                         hoplimit = idev->cnf.hop_limit;
1373                 else
1374                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1375                 rcu_read_unlock();
1376         }
1377         return hoplimit;
1378 }
1379 EXPORT_SYMBOL(ip6_dst_hoplimit);
1380
1381 /*
1382  *
1383  */
1384
1385 int ip6_route_add(struct fib6_config *cfg)
1386 {
1387         int err;
1388         struct net *net = cfg->fc_nlinfo.nl_net;
1389         struct rt6_info *rt = NULL;
1390         struct net_device *dev = NULL;
1391         struct inet6_dev *idev = NULL;
1392         struct fib6_table *table;
1393         int addr_type;
1394
1395         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1396                 return -EINVAL;
1397 #ifndef CONFIG_IPV6_SUBTREES
1398         if (cfg->fc_src_len)
1399                 return -EINVAL;
1400 #endif
1401         if (cfg->fc_ifindex) {
1402                 err = -ENODEV;
1403                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1404                 if (!dev)
1405                         goto out;
1406                 idev = in6_dev_get(dev);
1407                 if (!idev)
1408                         goto out;
1409         }
1410
1411         if (cfg->fc_metric == 0)
1412                 cfg->fc_metric = IP6_RT_PRIO_USER;
1413
1414         err = -ENOBUFS;
1415         if (cfg->fc_nlinfo.nlh &&
1416             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1417                 table = fib6_get_table(net, cfg->fc_table);
1418                 if (!table) {
1419                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1420                         table = fib6_new_table(net, cfg->fc_table);
1421                 }
1422         } else {
1423                 table = fib6_new_table(net, cfg->fc_table);
1424         }
1425
1426         if (!table)
1427                 goto out;
1428
1429         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1430
1431         if (!rt) {
1432                 err = -ENOMEM;
1433                 goto out;
1434         }
1435
1436         if (cfg->fc_flags & RTF_EXPIRES)
1437                 rt6_set_expires(rt, jiffies +
1438                                 clock_t_to_jiffies(cfg->fc_expires));
1439         else
1440                 rt6_clean_expires(rt);
1441
1442         if (cfg->fc_protocol == RTPROT_UNSPEC)
1443                 cfg->fc_protocol = RTPROT_BOOT;
1444         rt->rt6i_protocol = cfg->fc_protocol;
1445
1446         addr_type = ipv6_addr_type(&cfg->fc_dst);
1447
1448         if (addr_type & IPV6_ADDR_MULTICAST)
1449                 rt->dst.input = ip6_mc_input;
1450         else if (cfg->fc_flags & RTF_LOCAL)
1451                 rt->dst.input = ip6_input;
1452         else
1453                 rt->dst.input = ip6_forward;
1454
1455         rt->dst.output = ip6_output;
1456
1457         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1458         rt->rt6i_dst.plen = cfg->fc_dst_len;
1459         if (rt->rt6i_dst.plen == 128)
1460                rt->dst.flags |= DST_HOST;
1461
1462         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1463                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1464                 if (!metrics) {
1465                         err = -ENOMEM;
1466                         goto out;
1467                 }
1468                 dst_init_metrics(&rt->dst, metrics, 0);
1469         }
1470 #ifdef CONFIG_IPV6_SUBTREES
1471         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1472         rt->rt6i_src.plen = cfg->fc_src_len;
1473 #endif
1474
1475         rt->rt6i_metric = cfg->fc_metric;
1476
1477         /* We cannot add true routes via loopback here,
1478            they would result in kernel looping; promote them to reject routes
1479          */
1480         if ((cfg->fc_flags & RTF_REJECT) ||
1481             (dev && (dev->flags & IFF_LOOPBACK) &&
1482              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1483              !(cfg->fc_flags & RTF_LOCAL))) {
1484                 /* hold loopback dev/idev if we haven't done so. */
1485                 if (dev != net->loopback_dev) {
1486                         if (dev) {
1487                                 dev_put(dev);
1488                                 in6_dev_put(idev);
1489                         }
1490                         dev = net->loopback_dev;
1491                         dev_hold(dev);
1492                         idev = in6_dev_get(dev);
1493                         if (!idev) {
1494                                 err = -ENODEV;
1495                                 goto out;
1496                         }
1497                 }
1498                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1499                 switch (cfg->fc_type) {
1500                 case RTN_BLACKHOLE:
1501                         rt->dst.error = -EINVAL;
1502                         rt->dst.output = dst_discard;
1503                         rt->dst.input = dst_discard;
1504                         break;
1505                 case RTN_PROHIBIT:
1506                         rt->dst.error = -EACCES;
1507                         rt->dst.output = ip6_pkt_prohibit_out;
1508                         rt->dst.input = ip6_pkt_prohibit;
1509                         break;
1510                 case RTN_THROW:
1511                 default:
1512                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1513                                         : -ENETUNREACH;
1514                         rt->dst.output = ip6_pkt_discard_out;
1515                         rt->dst.input = ip6_pkt_discard;
1516                         break;
1517                 }
1518                 goto install_route;
1519         }
1520
1521         if (cfg->fc_flags & RTF_GATEWAY) {
1522                 const struct in6_addr *gw_addr;
1523                 int gwa_type;
1524
1525                 gw_addr = &cfg->fc_gateway;
1526                 rt->rt6i_gateway = *gw_addr;
1527                 gwa_type = ipv6_addr_type(gw_addr);
1528
1529                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1530                         struct rt6_info *grt;
1531
1532                         /* IPv6 strictly inhibits using not link-local
1533                            addresses as nexthop address.
1534                            Otherwise, router will not able to send redirects.
1535                            It is very good, but in some (rare!) circumstances
1536                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1537                            some exceptions. --ANK
1538                          */
1539                         err = -EINVAL;
1540                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1541                                 goto out;
1542
1543                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1544
1545                         err = -EHOSTUNREACH;
1546                         if (!grt)
1547                                 goto out;
1548                         if (dev) {
1549                                 if (dev != grt->dst.dev) {
1550                                         ip6_rt_put(grt);
1551                                         goto out;
1552                                 }
1553                         } else {
1554                                 dev = grt->dst.dev;
1555                                 idev = grt->rt6i_idev;
1556                                 dev_hold(dev);
1557                                 in6_dev_hold(grt->rt6i_idev);
1558                         }
1559                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1560                                 err = 0;
1561                         ip6_rt_put(grt);
1562
1563                         if (err)
1564                                 goto out;
1565                 }
1566                 err = -EINVAL;
1567                 if (!dev || (dev->flags & IFF_LOOPBACK))
1568                         goto out;
1569         }
1570
1571         err = -ENODEV;
1572         if (!dev)
1573                 goto out;
1574
1575         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1576                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1577                         err = -EINVAL;
1578                         goto out;
1579                 }
1580                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1581                 rt->rt6i_prefsrc.plen = 128;
1582         } else
1583                 rt->rt6i_prefsrc.plen = 0;
1584
1585         rt->rt6i_flags = cfg->fc_flags;
1586
1587 install_route:
1588         if (cfg->fc_mx) {
1589                 struct nlattr *nla;
1590                 int remaining;
1591
1592                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1593                         int type = nla_type(nla);
1594
1595                         if (type) {
1596                                 if (type > RTAX_MAX) {
1597                                         err = -EINVAL;
1598                                         goto out;
1599                                 }
1600
1601                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1602                         }
1603                 }
1604         }
1605
1606         rt->dst.dev = dev;
1607         rt->rt6i_idev = idev;
1608         rt->rt6i_table = table;
1609
1610         cfg->fc_nlinfo.nl_net = dev_net(dev);
1611
1612         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1613
1614 out:
1615         if (dev)
1616                 dev_put(dev);
1617         if (idev)
1618                 in6_dev_put(idev);
1619         if (rt)
1620                 dst_free(&rt->dst);
1621         return err;
1622 }
1623
1624 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1625 {
1626         int err;
1627         struct fib6_table *table;
1628         struct net *net = dev_net(rt->dst.dev);
1629
1630         if (rt == net->ipv6.ip6_null_entry) {
1631                 err = -ENOENT;
1632                 goto out;
1633         }
1634
1635         table = rt->rt6i_table;
1636         write_lock_bh(&table->tb6_lock);
1637         err = fib6_del(rt, info);
1638         write_unlock_bh(&table->tb6_lock);
1639
1640 out:
1641         ip6_rt_put(rt);
1642         return err;
1643 }
1644
1645 int ip6_del_rt(struct rt6_info *rt)
1646 {
1647         struct nl_info info = {
1648                 .nl_net = dev_net(rt->dst.dev),
1649         };
1650         return __ip6_del_rt(rt, &info);
1651 }
1652
1653 static int ip6_route_del(struct fib6_config *cfg)
1654 {
1655         struct fib6_table *table;
1656         struct fib6_node *fn;
1657         struct rt6_info *rt;
1658         int err = -ESRCH;
1659
1660         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1661         if (!table)
1662                 return err;
1663
1664         read_lock_bh(&table->tb6_lock);
1665
1666         fn = fib6_locate(&table->tb6_root,
1667                          &cfg->fc_dst, cfg->fc_dst_len,
1668                          &cfg->fc_src, cfg->fc_src_len);
1669
1670         if (fn) {
1671                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1672                         if (cfg->fc_ifindex &&
1673                             (!rt->dst.dev ||
1674                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1675                                 continue;
1676                         if (cfg->fc_flags & RTF_GATEWAY &&
1677                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1678                                 continue;
1679                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1680                                 continue;
1681                         dst_hold(&rt->dst);
1682                         read_unlock_bh(&table->tb6_lock);
1683
1684                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1685                 }
1686         }
1687         read_unlock_bh(&table->tb6_lock);
1688
1689         return err;
1690 }
1691
1692 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1693 {
1694         struct net *net = dev_net(skb->dev);
1695         struct netevent_redirect netevent;
1696         struct rt6_info *rt, *nrt = NULL;
1697         struct ndisc_options ndopts;
1698         struct inet6_dev *in6_dev;
1699         struct neighbour *neigh;
1700         struct rd_msg *msg;
1701         int optlen, on_link;
1702         u8 *lladdr;
1703
1704         optlen = skb->tail - skb->transport_header;
1705         optlen -= sizeof(*msg);
1706
1707         if (optlen < 0) {
1708                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1709                 return;
1710         }
1711
1712         msg = (struct rd_msg *)icmp6_hdr(skb);
1713
1714         if (ipv6_addr_is_multicast(&msg->dest)) {
1715                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1716                 return;
1717         }
1718
1719         on_link = 0;
1720         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1721                 on_link = 1;
1722         } else if (ipv6_addr_type(&msg->target) !=
1723                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1724                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1725                 return;
1726         }
1727
1728         in6_dev = __in6_dev_get(skb->dev);
1729         if (!in6_dev)
1730                 return;
1731         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1732                 return;
1733
1734         /* RFC2461 8.1:
1735          *      The IP source address of the Redirect MUST be the same as the current
1736          *      first-hop router for the specified ICMP Destination Address.
1737          */
1738
1739         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1740                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1741                 return;
1742         }
1743
1744         lladdr = NULL;
1745         if (ndopts.nd_opts_tgt_lladdr) {
1746                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1747                                              skb->dev);
1748                 if (!lladdr) {
1749                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1750                         return;
1751                 }
1752         }
1753
1754         rt = (struct rt6_info *) dst;
1755         if (rt == net->ipv6.ip6_null_entry) {
1756                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1757                 return;
1758         }
1759
1760         /* Redirect received -> path was valid.
1761          * Look, redirects are sent only in response to data packets,
1762          * so that this nexthop apparently is reachable. --ANK
1763          */
1764         dst_confirm(&rt->dst);
1765
1766         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1767         if (!neigh)
1768                 return;
1769
1770         /*
1771          *      We have finally decided to accept it.
1772          */
1773
1774         neigh_update(neigh, lladdr, NUD_STALE,
1775                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1776                      NEIGH_UPDATE_F_OVERRIDE|
1777                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1778                                      NEIGH_UPDATE_F_ISROUTER))
1779                      );
1780
1781         nrt = ip6_rt_copy(rt, &msg->dest);
1782         if (!nrt)
1783                 goto out;
1784
1785         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1786         if (on_link)
1787                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1788
1789         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1790
1791         if (ip6_ins_rt(nrt))
1792                 goto out;
1793
1794         netevent.old = &rt->dst;
1795         netevent.new = &nrt->dst;
1796         netevent.daddr = &msg->dest;
1797         netevent.neigh = neigh;
1798         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1799
1800         if (rt->rt6i_flags & RTF_CACHE) {
1801                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1802                 ip6_del_rt(rt);
1803         }
1804
1805 out:
1806         neigh_release(neigh);
1807 }
1808
1809 /*
1810  *      Misc support functions
1811  */
1812
1813 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1814                                     const struct in6_addr *dest)
1815 {
1816         struct net *net = dev_net(ort->dst.dev);
1817         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1818                                             ort->rt6i_table);
1819
1820         if (rt) {
1821                 rt->dst.input = ort->dst.input;
1822                 rt->dst.output = ort->dst.output;
1823                 rt->dst.flags |= DST_HOST;
1824
1825                 rt->rt6i_dst.addr = *dest;
1826                 rt->rt6i_dst.plen = 128;
1827                 dst_copy_metrics(&rt->dst, &ort->dst);
1828                 rt->dst.error = ort->dst.error;
1829                 rt->rt6i_idev = ort->rt6i_idev;
1830                 if (rt->rt6i_idev)
1831                         in6_dev_hold(rt->rt6i_idev);
1832                 rt->dst.lastuse = jiffies;
1833
1834                 if (ort->rt6i_flags & RTF_GATEWAY)
1835                         rt->rt6i_gateway = ort->rt6i_gateway;
1836                 else
1837                         rt->rt6i_gateway = *dest;
1838                 rt->rt6i_flags = ort->rt6i_flags;
1839                 rt6_set_from(rt, ort);
1840                 rt->rt6i_metric = 0;
1841
1842 #ifdef CONFIG_IPV6_SUBTREES
1843                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1844 #endif
1845                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1846                 rt->rt6i_table = ort->rt6i_table;
1847         }
1848         return rt;
1849 }
1850
1851 #ifdef CONFIG_IPV6_ROUTE_INFO
1852 static struct rt6_info *rt6_get_route_info(struct net *net,
1853                                            const struct in6_addr *prefix, int prefixlen,
1854                                            const struct in6_addr *gwaddr, int ifindex)
1855 {
1856         struct fib6_node *fn;
1857         struct rt6_info *rt = NULL;
1858         struct fib6_table *table;
1859
1860         table = fib6_get_table(net, RT6_TABLE_INFO);
1861         if (!table)
1862                 return NULL;
1863
1864         read_lock_bh(&table->tb6_lock);
1865         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1866         if (!fn)
1867                 goto out;
1868
1869         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1870                 if (rt->dst.dev->ifindex != ifindex)
1871                         continue;
1872                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1873                         continue;
1874                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1875                         continue;
1876                 dst_hold(&rt->dst);
1877                 break;
1878         }
1879 out:
1880         read_unlock_bh(&table->tb6_lock);
1881         return rt;
1882 }
1883
1884 static struct rt6_info *rt6_add_route_info(struct net *net,
1885                                            const struct in6_addr *prefix, int prefixlen,
1886                                            const struct in6_addr *gwaddr, int ifindex,
1887                                            unsigned int pref)
1888 {
1889         struct fib6_config cfg = {
1890                 .fc_table       = RT6_TABLE_INFO,
1891                 .fc_metric      = IP6_RT_PRIO_USER,
1892                 .fc_ifindex     = ifindex,
1893                 .fc_dst_len     = prefixlen,
1894                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1895                                   RTF_UP | RTF_PREF(pref),
1896                 .fc_nlinfo.portid = 0,
1897                 .fc_nlinfo.nlh = NULL,
1898                 .fc_nlinfo.nl_net = net,
1899         };
1900
1901         cfg.fc_dst = *prefix;
1902         cfg.fc_gateway = *gwaddr;
1903
1904         /* We should treat it as a default route if prefix length is 0. */
1905         if (!prefixlen)
1906                 cfg.fc_flags |= RTF_DEFAULT;
1907
1908         ip6_route_add(&cfg);
1909
1910         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1911 }
1912 #endif
1913
1914 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1915 {
1916         struct rt6_info *rt;
1917         struct fib6_table *table;
1918
1919         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1920         if (!table)
1921                 return NULL;
1922
1923         read_lock_bh(&table->tb6_lock);
1924         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1925                 if (dev == rt->dst.dev &&
1926                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1927                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1928                         break;
1929         }
1930         if (rt)
1931                 dst_hold(&rt->dst);
1932         read_unlock_bh(&table->tb6_lock);
1933         return rt;
1934 }
1935
1936 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1937                                      struct net_device *dev,
1938                                      unsigned int pref)
1939 {
1940         struct fib6_config cfg = {
1941                 .fc_table       = RT6_TABLE_DFLT,
1942                 .fc_metric      = IP6_RT_PRIO_USER,
1943                 .fc_ifindex     = dev->ifindex,
1944                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1945                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1946                 .fc_nlinfo.portid = 0,
1947                 .fc_nlinfo.nlh = NULL,
1948                 .fc_nlinfo.nl_net = dev_net(dev),
1949         };
1950
1951         cfg.fc_gateway = *gwaddr;
1952
1953         ip6_route_add(&cfg);
1954
1955         return rt6_get_dflt_router(gwaddr, dev);
1956 }
1957
1958 void rt6_purge_dflt_routers(struct net *net)
1959 {
1960         struct rt6_info *rt;
1961         struct fib6_table *table;
1962
1963         /* NOTE: Keep consistent with rt6_get_dflt_router */
1964         table = fib6_get_table(net, RT6_TABLE_DFLT);
1965         if (!table)
1966                 return;
1967
1968 restart:
1969         read_lock_bh(&table->tb6_lock);
1970         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1971                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1972                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1973                         dst_hold(&rt->dst);
1974                         read_unlock_bh(&table->tb6_lock);
1975                         ip6_del_rt(rt);
1976                         goto restart;
1977                 }
1978         }
1979         read_unlock_bh(&table->tb6_lock);
1980 }
1981
1982 static void rtmsg_to_fib6_config(struct net *net,
1983                                  struct in6_rtmsg *rtmsg,
1984                                  struct fib6_config *cfg)
1985 {
1986         memset(cfg, 0, sizeof(*cfg));
1987
1988         cfg->fc_table = RT6_TABLE_MAIN;
1989         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1990         cfg->fc_metric = rtmsg->rtmsg_metric;
1991         cfg->fc_expires = rtmsg->rtmsg_info;
1992         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1993         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1994         cfg->fc_flags = rtmsg->rtmsg_flags;
1995
1996         cfg->fc_nlinfo.nl_net = net;
1997
1998         cfg->fc_dst = rtmsg->rtmsg_dst;
1999         cfg->fc_src = rtmsg->rtmsg_src;
2000         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2001 }
2002
2003 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2004 {
2005         struct fib6_config cfg;
2006         struct in6_rtmsg rtmsg;
2007         int err;
2008
2009         switch(cmd) {
2010         case SIOCADDRT:         /* Add a route */
2011         case SIOCDELRT:         /* Delete a route */
2012                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2013                         return -EPERM;
2014                 err = copy_from_user(&rtmsg, arg,
2015                                      sizeof(struct in6_rtmsg));
2016                 if (err)
2017                         return -EFAULT;
2018
2019                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2020
2021                 rtnl_lock();
2022                 switch (cmd) {
2023                 case SIOCADDRT:
2024                         err = ip6_route_add(&cfg);
2025                         break;
2026                 case SIOCDELRT:
2027                         err = ip6_route_del(&cfg);
2028                         break;
2029                 default:
2030                         err = -EINVAL;
2031                 }
2032                 rtnl_unlock();
2033
2034                 return err;
2035         }
2036
2037         return -EINVAL;
2038 }
2039
2040 /*
2041  *      Drop the packet on the floor
2042  */
2043
2044 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2045 {
2046         int type;
2047         struct dst_entry *dst = skb_dst(skb);
2048         switch (ipstats_mib_noroutes) {
2049         case IPSTATS_MIB_INNOROUTES:
2050                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2051                 if (type == IPV6_ADDR_ANY) {
2052                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2053                                       IPSTATS_MIB_INADDRERRORS);
2054                         break;
2055                 }
2056                 /* FALLTHROUGH */
2057         case IPSTATS_MIB_OUTNOROUTES:
2058                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2059                               ipstats_mib_noroutes);
2060                 break;
2061         }
2062         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2063         kfree_skb(skb);
2064         return 0;
2065 }
2066
2067 static int ip6_pkt_discard(struct sk_buff *skb)
2068 {
2069         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2070 }
2071
2072 static int ip6_pkt_discard_out(struct sk_buff *skb)
2073 {
2074         skb->dev = skb_dst(skb)->dev;
2075         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2076 }
2077
2078 static int ip6_pkt_prohibit(struct sk_buff *skb)
2079 {
2080         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2081 }
2082
2083 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2084 {
2085         skb->dev = skb_dst(skb)->dev;
2086         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2087 }
2088
2089 /*
2090  *      Allocate a dst for local (unicast / anycast) address.
2091  */
2092
2093 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2094                                     const struct in6_addr *addr,
2095                                     bool anycast)
2096 {
2097         struct net *net = dev_net(idev->dev);
2098         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2099                                             DST_NOCOUNT, NULL);
2100         if (!rt)
2101                 return ERR_PTR(-ENOMEM);
2102
2103         in6_dev_hold(idev);
2104
2105         rt->dst.flags |= DST_HOST;
2106         rt->dst.input = ip6_input;
2107         rt->dst.output = ip6_output;
2108         rt->rt6i_idev = idev;
2109
2110         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2111         if (anycast)
2112                 rt->rt6i_flags |= RTF_ANYCAST;
2113         else
2114                 rt->rt6i_flags |= RTF_LOCAL;
2115
2116         rt->rt6i_gateway  = *addr;
2117         rt->rt6i_dst.addr = *addr;
2118         rt->rt6i_dst.plen = 128;
2119         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2120
2121         atomic_set(&rt->dst.__refcnt, 1);
2122
2123         return rt;
2124 }
2125
2126 int ip6_route_get_saddr(struct net *net,
2127                         struct rt6_info *rt,
2128                         const struct in6_addr *daddr,
2129                         unsigned int prefs,
2130                         struct in6_addr *saddr)
2131 {
2132         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2133         int err = 0;
2134         if (rt->rt6i_prefsrc.plen)
2135                 *saddr = rt->rt6i_prefsrc.addr;
2136         else
2137                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2138                                          daddr, prefs, saddr);
2139         return err;
2140 }
2141
2142 /* remove deleted ip from prefsrc entries */
2143 struct arg_dev_net_ip {
2144         struct net_device *dev;
2145         struct net *net;
2146         struct in6_addr *addr;
2147 };
2148
2149 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2150 {
2151         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2152         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2153         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2154
2155         if (((void *)rt->dst.dev == dev || !dev) &&
2156             rt != net->ipv6.ip6_null_entry &&
2157             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2158                 /* remove prefsrc entry */
2159                 rt->rt6i_prefsrc.plen = 0;
2160         }
2161         return 0;
2162 }
2163
2164 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2165 {
2166         struct net *net = dev_net(ifp->idev->dev);
2167         struct arg_dev_net_ip adni = {
2168                 .dev = ifp->idev->dev,
2169                 .net = net,
2170                 .addr = &ifp->addr,
2171         };
2172         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2173 }
2174
2175 struct arg_dev_net {
2176         struct net_device *dev;
2177         struct net *net;
2178 };
2179
2180 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2181 {
2182         const struct arg_dev_net *adn = arg;
2183         const struct net_device *dev = adn->dev;
2184
2185         if ((rt->dst.dev == dev || !dev) &&
2186             rt != adn->net->ipv6.ip6_null_entry)
2187                 return -1;
2188
2189         return 0;
2190 }
2191
2192 void rt6_ifdown(struct net *net, struct net_device *dev)
2193 {
2194         struct arg_dev_net adn = {
2195                 .dev = dev,
2196                 .net = net,
2197         };
2198
2199         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2200         icmp6_clean_all(fib6_ifdown, &adn);
2201 }
2202
2203 struct rt6_mtu_change_arg {
2204         struct net_device *dev;
2205         unsigned int mtu;
2206 };
2207
2208 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2209 {
2210         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2211         struct inet6_dev *idev;
2212
2213         /* In IPv6 pmtu discovery is not optional,
2214            so that RTAX_MTU lock cannot disable it.
2215            We still use this lock to block changes
2216            caused by addrconf/ndisc.
2217         */
2218
2219         idev = __in6_dev_get(arg->dev);
2220         if (!idev)
2221                 return 0;
2222
2223         /* For administrative MTU increase, there is no way to discover
2224            IPv6 PMTU increase, so PMTU increase should be updated here.
2225            Since RFC 1981 doesn't include administrative MTU increase
2226            update PMTU increase is a MUST. (i.e. jumbo frame)
2227          */
2228         /*
2229            If new MTU is less than route PMTU, this new MTU will be the
2230            lowest MTU in the path, update the route PMTU to reflect PMTU
2231            decreases; if new MTU is greater than route PMTU, and the
2232            old MTU is the lowest MTU in the path, update the route PMTU
2233            to reflect the increase. In this case if the other nodes' MTU
2234            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2235            PMTU discouvery.
2236          */
2237         if (rt->dst.dev == arg->dev &&
2238             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2239             (dst_mtu(&rt->dst) >= arg->mtu ||
2240              (dst_mtu(&rt->dst) < arg->mtu &&
2241               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2242                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2243         }
2244         return 0;
2245 }
2246
2247 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2248 {
2249         struct rt6_mtu_change_arg arg = {
2250                 .dev = dev,
2251                 .mtu = mtu,
2252         };
2253
2254         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2255 }
2256
2257 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2258         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2259         [RTA_OIF]               = { .type = NLA_U32 },
2260         [RTA_IIF]               = { .type = NLA_U32 },
2261         [RTA_PRIORITY]          = { .type = NLA_U32 },
2262         [RTA_METRICS]           = { .type = NLA_NESTED },
2263         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2264 };
2265
2266 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2267                               struct fib6_config *cfg)
2268 {
2269         struct rtmsg *rtm;
2270         struct nlattr *tb[RTA_MAX+1];
2271         int err;
2272
2273         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2274         if (err < 0)
2275                 goto errout;
2276
2277         err = -EINVAL;
2278         rtm = nlmsg_data(nlh);
2279         memset(cfg, 0, sizeof(*cfg));
2280
2281         cfg->fc_table = rtm->rtm_table;
2282         cfg->fc_dst_len = rtm->rtm_dst_len;
2283         cfg->fc_src_len = rtm->rtm_src_len;
2284         cfg->fc_flags = RTF_UP;
2285         cfg->fc_protocol = rtm->rtm_protocol;
2286         cfg->fc_type = rtm->rtm_type;
2287
2288         if (rtm->rtm_type == RTN_UNREACHABLE ||
2289             rtm->rtm_type == RTN_BLACKHOLE ||
2290             rtm->rtm_type == RTN_PROHIBIT ||
2291             rtm->rtm_type == RTN_THROW)
2292                 cfg->fc_flags |= RTF_REJECT;
2293
2294         if (rtm->rtm_type == RTN_LOCAL)
2295                 cfg->fc_flags |= RTF_LOCAL;
2296
2297         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2298         cfg->fc_nlinfo.nlh = nlh;
2299         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2300
2301         if (tb[RTA_GATEWAY]) {
2302                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2303                 cfg->fc_flags |= RTF_GATEWAY;
2304         }
2305
2306         if (tb[RTA_DST]) {
2307                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2308
2309                 if (nla_len(tb[RTA_DST]) < plen)
2310                         goto errout;
2311
2312                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2313         }
2314
2315         if (tb[RTA_SRC]) {
2316                 int plen = (rtm->rtm_src_len + 7) >> 3;
2317
2318                 if (nla_len(tb[RTA_SRC]) < plen)
2319                         goto errout;
2320
2321                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2322         }
2323
2324         if (tb[RTA_PREFSRC])
2325                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2326
2327         if (tb[RTA_OIF])
2328                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2329
2330         if (tb[RTA_PRIORITY])
2331                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2332
2333         if (tb[RTA_METRICS]) {
2334                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2335                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2336         }
2337
2338         if (tb[RTA_TABLE])
2339                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2340
2341         if (tb[RTA_MULTIPATH]) {
2342                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2343                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2344         }
2345
2346         err = 0;
2347 errout:
2348         return err;
2349 }
2350
2351 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2352 {
2353         struct fib6_config r_cfg;
2354         struct rtnexthop *rtnh;
2355         int remaining;
2356         int attrlen;
2357         int err = 0, last_err = 0;
2358
2359 beginning:
2360         rtnh = (struct rtnexthop *)cfg->fc_mp;
2361         remaining = cfg->fc_mp_len;
2362
2363         /* Parse a Multipath Entry */
2364         while (rtnh_ok(rtnh, remaining)) {
2365                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2366                 if (rtnh->rtnh_ifindex)
2367                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2368
2369                 attrlen = rtnh_attrlen(rtnh);
2370                 if (attrlen > 0) {
2371                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2372
2373                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2374                         if (nla) {
2375                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2376                                 r_cfg.fc_flags |= RTF_GATEWAY;
2377                         }
2378                 }
2379                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2380                 if (err) {
2381                         last_err = err;
2382                         /* If we are trying to remove a route, do not stop the
2383                          * loop when ip6_route_del() fails (because next hop is
2384                          * already gone), we should try to remove all next hops.
2385                          */
2386                         if (add) {
2387                                 /* If add fails, we should try to delete all
2388                                  * next hops that have been already added.
2389                                  */
2390                                 add = 0;
2391                                 goto beginning;
2392                         }
2393                 }
2394                 /* Because each route is added like a single route we remove
2395                  * this flag after the first nexthop (if there is a collision,
2396                  * we have already fail to add the first nexthop:
2397                  * fib6_add_rt2node() has reject it).
2398                  */
2399                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2400                 rtnh = rtnh_next(rtnh, &remaining);
2401         }
2402
2403         return last_err;
2404 }
2405
2406 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2407 {
2408         struct fib6_config cfg;
2409         int err;
2410
2411         err = rtm_to_fib6_config(skb, nlh, &cfg);
2412         if (err < 0)
2413                 return err;
2414
2415         if (cfg.fc_mp)
2416                 return ip6_route_multipath(&cfg, 0);
2417         else
2418                 return ip6_route_del(&cfg);
2419 }
2420
2421 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2422 {
2423         struct fib6_config cfg;
2424         int err;
2425
2426         err = rtm_to_fib6_config(skb, nlh, &cfg);
2427         if (err < 0)
2428                 return err;
2429
2430         if (cfg.fc_mp)
2431                 return ip6_route_multipath(&cfg, 1);
2432         else
2433                 return ip6_route_add(&cfg);
2434 }
2435
2436 static inline size_t rt6_nlmsg_size(void)
2437 {
2438         return NLMSG_ALIGN(sizeof(struct rtmsg))
2439                + nla_total_size(16) /* RTA_SRC */
2440                + nla_total_size(16) /* RTA_DST */
2441                + nla_total_size(16) /* RTA_GATEWAY */
2442                + nla_total_size(16) /* RTA_PREFSRC */
2443                + nla_total_size(4) /* RTA_TABLE */
2444                + nla_total_size(4) /* RTA_IIF */
2445                + nla_total_size(4) /* RTA_OIF */
2446                + nla_total_size(4) /* RTA_PRIORITY */
2447                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2448                + nla_total_size(sizeof(struct rta_cacheinfo));
2449 }
2450
2451 static int rt6_fill_node(struct net *net,
2452                          struct sk_buff *skb, struct rt6_info *rt,
2453                          struct in6_addr *dst, struct in6_addr *src,
2454                          int iif, int type, u32 portid, u32 seq,
2455                          int prefix, int nowait, unsigned int flags)
2456 {
2457         struct rtmsg *rtm;
2458         struct nlmsghdr *nlh;
2459         long expires;
2460         u32 table;
2461
2462         if (prefix) {   /* user wants prefix routes only */
2463                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2464                         /* success since this is not a prefix route */
2465                         return 1;
2466                 }
2467         }
2468
2469         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2470         if (!nlh)
2471                 return -EMSGSIZE;
2472
2473         rtm = nlmsg_data(nlh);
2474         rtm->rtm_family = AF_INET6;
2475         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2476         rtm->rtm_src_len = rt->rt6i_src.plen;
2477         rtm->rtm_tos = 0;
2478         if (rt->rt6i_table)
2479                 table = rt->rt6i_table->tb6_id;
2480         else
2481                 table = RT6_TABLE_UNSPEC;
2482         rtm->rtm_table = table;
2483         if (nla_put_u32(skb, RTA_TABLE, table))
2484                 goto nla_put_failure;
2485         if (rt->rt6i_flags & RTF_REJECT) {
2486                 switch (rt->dst.error) {
2487                 case -EINVAL:
2488                         rtm->rtm_type = RTN_BLACKHOLE;
2489                         break;
2490                 case -EACCES:
2491                         rtm->rtm_type = RTN_PROHIBIT;
2492                         break;
2493                 case -EAGAIN:
2494                         rtm->rtm_type = RTN_THROW;
2495                         break;
2496                 default:
2497                         rtm->rtm_type = RTN_UNREACHABLE;
2498                         break;
2499                 }
2500         }
2501         else if (rt->rt6i_flags & RTF_LOCAL)
2502                 rtm->rtm_type = RTN_LOCAL;
2503         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2504                 rtm->rtm_type = RTN_LOCAL;
2505         else
2506                 rtm->rtm_type = RTN_UNICAST;
2507         rtm->rtm_flags = 0;
2508         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2509         rtm->rtm_protocol = rt->rt6i_protocol;
2510         if (rt->rt6i_flags & RTF_DYNAMIC)
2511                 rtm->rtm_protocol = RTPROT_REDIRECT;
2512         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2513                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2514                         rtm->rtm_protocol = RTPROT_RA;
2515                 else
2516                         rtm->rtm_protocol = RTPROT_KERNEL;
2517         }
2518
2519         if (rt->rt6i_flags & RTF_CACHE)
2520                 rtm->rtm_flags |= RTM_F_CLONED;
2521
2522         if (dst) {
2523                 if (nla_put(skb, RTA_DST, 16, dst))
2524                         goto nla_put_failure;
2525                 rtm->rtm_dst_len = 128;
2526         } else if (rtm->rtm_dst_len)
2527                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2528                         goto nla_put_failure;
2529 #ifdef CONFIG_IPV6_SUBTREES
2530         if (src) {
2531                 if (nla_put(skb, RTA_SRC, 16, src))
2532                         goto nla_put_failure;
2533                 rtm->rtm_src_len = 128;
2534         } else if (rtm->rtm_src_len &&
2535                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2536                 goto nla_put_failure;
2537 #endif
2538         if (iif) {
2539 #ifdef CONFIG_IPV6_MROUTE
2540                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2541                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2542                         if (err <= 0) {
2543                                 if (!nowait) {
2544                                         if (err == 0)
2545                                                 return 0;
2546                                         goto nla_put_failure;
2547                                 } else {
2548                                         if (err == -EMSGSIZE)
2549                                                 goto nla_put_failure;
2550                                 }
2551                         }
2552                 } else
2553 #endif
2554                         if (nla_put_u32(skb, RTA_IIF, iif))
2555                                 goto nla_put_failure;
2556         } else if (dst) {
2557                 struct in6_addr saddr_buf;
2558                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2559                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2560                         goto nla_put_failure;
2561         }
2562
2563         if (rt->rt6i_prefsrc.plen) {
2564                 struct in6_addr saddr_buf;
2565                 saddr_buf = rt->rt6i_prefsrc.addr;
2566                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2567                         goto nla_put_failure;
2568         }
2569
2570         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2571                 goto nla_put_failure;
2572
2573         if (rt->rt6i_flags & RTF_GATEWAY) {
2574                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2575                         goto nla_put_failure;
2576         }
2577
2578         if (rt->dst.dev &&
2579             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2580                 goto nla_put_failure;
2581         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2582                 goto nla_put_failure;
2583
2584         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2585
2586         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2587                 goto nla_put_failure;
2588
2589         return nlmsg_end(skb, nlh);
2590
2591 nla_put_failure:
2592         nlmsg_cancel(skb, nlh);
2593         return -EMSGSIZE;
2594 }
2595
2596 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2597 {
2598         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2599         int prefix;
2600
2601         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2602                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2603                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2604         } else
2605                 prefix = 0;
2606
2607         return rt6_fill_node(arg->net,
2608                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2609                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2610                      prefix, 0, NLM_F_MULTI);
2611 }
2612
2613 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2614 {
2615         struct net *net = sock_net(in_skb->sk);
2616         struct nlattr *tb[RTA_MAX+1];
2617         struct rt6_info *rt;
2618         struct sk_buff *skb;
2619         struct rtmsg *rtm;
2620         struct flowi6 fl6;
2621         int err, iif = 0, oif = 0;
2622
2623         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2624         if (err < 0)
2625                 goto errout;
2626
2627         err = -EINVAL;
2628         memset(&fl6, 0, sizeof(fl6));
2629
2630         if (tb[RTA_SRC]) {
2631                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2632                         goto errout;
2633
2634                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2635         }
2636
2637         if (tb[RTA_DST]) {
2638                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2639                         goto errout;
2640
2641                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2642         }
2643
2644         if (tb[RTA_IIF])
2645                 iif = nla_get_u32(tb[RTA_IIF]);
2646
2647         if (tb[RTA_OIF])
2648                 oif = nla_get_u32(tb[RTA_OIF]);
2649
2650         if (iif) {
2651                 struct net_device *dev;
2652                 int flags = 0;
2653
2654                 dev = __dev_get_by_index(net, iif);
2655                 if (!dev) {
2656                         err = -ENODEV;
2657                         goto errout;
2658                 }
2659
2660                 fl6.flowi6_iif = iif;
2661
2662                 if (!ipv6_addr_any(&fl6.saddr))
2663                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2664
2665                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2666                                                                flags);
2667         } else {
2668                 fl6.flowi6_oif = oif;
2669
2670                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2671         }
2672
2673         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2674         if (!skb) {
2675                 ip6_rt_put(rt);
2676                 err = -ENOBUFS;
2677                 goto errout;
2678         }
2679
2680         /* Reserve room for dummy headers, this skb can pass
2681            through good chunk of routing engine.
2682          */
2683         skb_reset_mac_header(skb);
2684         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2685
2686         skb_dst_set(skb, &rt->dst);
2687
2688         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2689                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2690                             nlh->nlmsg_seq, 0, 0, 0);
2691         if (err < 0) {
2692                 kfree_skb(skb);
2693                 goto errout;
2694         }
2695
2696         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2697 errout:
2698         return err;
2699 }
2700
2701 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2702 {
2703         struct sk_buff *skb;
2704         struct net *net = info->nl_net;
2705         u32 seq;
2706         int err;
2707
2708         err = -ENOBUFS;
2709         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2710
2711         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2712         if (!skb)
2713                 goto errout;
2714
2715         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2716                                 event, info->portid, seq, 0, 0, 0);
2717         if (err < 0) {
2718                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2719                 WARN_ON(err == -EMSGSIZE);
2720                 kfree_skb(skb);
2721                 goto errout;
2722         }
2723         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2724                     info->nlh, gfp_any());
2725         return;
2726 errout:
2727         if (err < 0)
2728                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2729 }
2730
2731 static int ip6_route_dev_notify(struct notifier_block *this,
2732                                 unsigned long event, void *data)
2733 {
2734         struct net_device *dev = (struct net_device *)data;
2735         struct net *net = dev_net(dev);
2736
2737         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2738                 net->ipv6.ip6_null_entry->dst.dev = dev;
2739                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2740 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2741                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2742                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2743                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2744                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2745 #endif
2746         }
2747
2748         return NOTIFY_OK;
2749 }
2750
2751 /*
2752  *      /proc
2753  */
2754
2755 #ifdef CONFIG_PROC_FS
2756
2757 struct rt6_proc_arg
2758 {
2759         char *buffer;
2760         int offset;
2761         int length;
2762         int skip;
2763         int len;
2764 };
2765
2766 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2767 {
2768         struct seq_file *m = p_arg;
2769
2770         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2771
2772 #ifdef CONFIG_IPV6_SUBTREES
2773         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2774 #else
2775         seq_puts(m, "00000000000000000000000000000000 00 ");
2776 #endif
2777         if (rt->rt6i_flags & RTF_GATEWAY) {
2778                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2779         } else {
2780                 seq_puts(m, "00000000000000000000000000000000");
2781         }
2782         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2783                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2784                    rt->dst.__use, rt->rt6i_flags,
2785                    rt->dst.dev ? rt->dst.dev->name : "");
2786         return 0;
2787 }
2788
2789 static int ipv6_route_show(struct seq_file *m, void *v)
2790 {
2791         struct net *net = (struct net *)m->private;
2792         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2793         return 0;
2794 }
2795
2796 static int ipv6_route_open(struct inode *inode, struct file *file)
2797 {
2798         return single_open_net(inode, file, ipv6_route_show);
2799 }
2800
2801 static const struct file_operations ipv6_route_proc_fops = {
2802         .owner          = THIS_MODULE,
2803         .open           = ipv6_route_open,
2804         .read           = seq_read,
2805         .llseek         = seq_lseek,
2806         .release        = single_release_net,
2807 };
2808
2809 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2810 {
2811         struct net *net = (struct net *)seq->private;
2812         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2813                    net->ipv6.rt6_stats->fib_nodes,
2814                    net->ipv6.rt6_stats->fib_route_nodes,
2815                    net->ipv6.rt6_stats->fib_rt_alloc,
2816                    net->ipv6.rt6_stats->fib_rt_entries,
2817                    net->ipv6.rt6_stats->fib_rt_cache,
2818                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2819                    net->ipv6.rt6_stats->fib_discarded_routes);
2820
2821         return 0;
2822 }
2823
2824 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2825 {
2826         return single_open_net(inode, file, rt6_stats_seq_show);
2827 }
2828
2829 static const struct file_operations rt6_stats_seq_fops = {
2830         .owner   = THIS_MODULE,
2831         .open    = rt6_stats_seq_open,
2832         .read    = seq_read,
2833         .llseek  = seq_lseek,
2834         .release = single_release_net,
2835 };
2836 #endif  /* CONFIG_PROC_FS */
2837
2838 #ifdef CONFIG_SYSCTL
2839
2840 static
2841 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2842                               void __user *buffer, size_t *lenp, loff_t *ppos)
2843 {
2844         struct net *net;
2845         int delay;
2846         if (!write)
2847                 return -EINVAL;
2848
2849         net = (struct net *)ctl->extra1;
2850         delay = net->ipv6.sysctl.flush_delay;
2851         proc_dointvec(ctl, write, buffer, lenp, ppos);
2852         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2853         return 0;
2854 }
2855
2856 ctl_table ipv6_route_table_template[] = {
2857         {
2858                 .procname       =       "flush",
2859                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2860                 .maxlen         =       sizeof(int),
2861                 .mode           =       0200,
2862                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2863         },
2864         {
2865                 .procname       =       "gc_thresh",
2866                 .data           =       &ip6_dst_ops_template.gc_thresh,
2867                 .maxlen         =       sizeof(int),
2868                 .mode           =       0644,
2869                 .proc_handler   =       proc_dointvec,
2870         },
2871         {
2872                 .procname       =       "max_size",
2873                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2874                 .maxlen         =       sizeof(int),
2875                 .mode           =       0644,
2876                 .proc_handler   =       proc_dointvec,
2877         },
2878         {
2879                 .procname       =       "gc_min_interval",
2880                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2881                 .maxlen         =       sizeof(int),
2882                 .mode           =       0644,
2883                 .proc_handler   =       proc_dointvec_jiffies,
2884         },
2885         {
2886                 .procname       =       "gc_timeout",
2887                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2888                 .maxlen         =       sizeof(int),
2889                 .mode           =       0644,
2890                 .proc_handler   =       proc_dointvec_jiffies,
2891         },
2892         {
2893                 .procname       =       "gc_interval",
2894                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2895                 .maxlen         =       sizeof(int),
2896                 .mode           =       0644,
2897                 .proc_handler   =       proc_dointvec_jiffies,
2898         },
2899         {
2900                 .procname       =       "gc_elasticity",
2901                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2902                 .maxlen         =       sizeof(int),
2903                 .mode           =       0644,
2904                 .proc_handler   =       proc_dointvec,
2905         },
2906         {
2907                 .procname       =       "mtu_expires",
2908                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2909                 .maxlen         =       sizeof(int),
2910                 .mode           =       0644,
2911                 .proc_handler   =       proc_dointvec_jiffies,
2912         },
2913         {
2914                 .procname       =       "min_adv_mss",
2915                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2916                 .maxlen         =       sizeof(int),
2917                 .mode           =       0644,
2918                 .proc_handler   =       proc_dointvec,
2919         },
2920         {
2921                 .procname       =       "gc_min_interval_ms",
2922                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2923                 .maxlen         =       sizeof(int),
2924                 .mode           =       0644,
2925                 .proc_handler   =       proc_dointvec_ms_jiffies,
2926         },
2927         { }
2928 };
2929
2930 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2931 {
2932         struct ctl_table *table;
2933
2934         table = kmemdup(ipv6_route_table_template,
2935                         sizeof(ipv6_route_table_template),
2936                         GFP_KERNEL);
2937
2938         if (table) {
2939                 table[0].data = &net->ipv6.sysctl.flush_delay;
2940                 table[0].extra1 = net;
2941                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2942                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2943                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2944                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2945                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2946                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2947                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2948                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2949                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2950
2951                 /* Don't export sysctls to unprivileged users */
2952                 if (net->user_ns != &init_user_ns)
2953                         table[0].procname = NULL;
2954         }
2955
2956         return table;
2957 }
2958 #endif
2959
2960 static int __net_init ip6_route_net_init(struct net *net)
2961 {
2962         int ret = -ENOMEM;
2963
2964         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2965                sizeof(net->ipv6.ip6_dst_ops));
2966
2967         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2968                 goto out_ip6_dst_ops;
2969
2970         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2971                                            sizeof(*net->ipv6.ip6_null_entry),
2972                                            GFP_KERNEL);
2973         if (!net->ipv6.ip6_null_entry)
2974                 goto out_ip6_dst_entries;
2975         net->ipv6.ip6_null_entry->dst.path =
2976                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2977         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2978         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2979                          ip6_template_metrics, true);
2980
2981 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2982         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2983                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2984                                                GFP_KERNEL);
2985         if (!net->ipv6.ip6_prohibit_entry)
2986                 goto out_ip6_null_entry;
2987         net->ipv6.ip6_prohibit_entry->dst.path =
2988                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2989         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2990         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2991                          ip6_template_metrics, true);
2992
2993         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2994                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2995                                                GFP_KERNEL);
2996         if (!net->ipv6.ip6_blk_hole_entry)
2997                 goto out_ip6_prohibit_entry;
2998         net->ipv6.ip6_blk_hole_entry->dst.path =
2999                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3000         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3001         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3002                          ip6_template_metrics, true);
3003 #endif
3004
3005         net->ipv6.sysctl.flush_delay = 0;
3006         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3007         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3008         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3009         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3010         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3011         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3012         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3013
3014         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3015
3016         ret = 0;
3017 out:
3018         return ret;
3019
3020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3021 out_ip6_prohibit_entry:
3022         kfree(net->ipv6.ip6_prohibit_entry);
3023 out_ip6_null_entry:
3024         kfree(net->ipv6.ip6_null_entry);
3025 #endif
3026 out_ip6_dst_entries:
3027         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3028 out_ip6_dst_ops:
3029         goto out;
3030 }
3031
3032 static void __net_exit ip6_route_net_exit(struct net *net)
3033 {
3034         kfree(net->ipv6.ip6_null_entry);
3035 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3036         kfree(net->ipv6.ip6_prohibit_entry);
3037         kfree(net->ipv6.ip6_blk_hole_entry);
3038 #endif
3039         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3040 }
3041
3042 static int __net_init ip6_route_net_init_late(struct net *net)
3043 {
3044 #ifdef CONFIG_PROC_FS
3045         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3046         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3047 #endif
3048         return 0;
3049 }
3050
3051 static void __net_exit ip6_route_net_exit_late(struct net *net)
3052 {
3053 #ifdef CONFIG_PROC_FS
3054         remove_proc_entry("ipv6_route", net->proc_net);
3055         remove_proc_entry("rt6_stats", net->proc_net);
3056 #endif
3057 }
3058
3059 static struct pernet_operations ip6_route_net_ops = {
3060         .init = ip6_route_net_init,
3061         .exit = ip6_route_net_exit,
3062 };
3063
3064 static int __net_init ipv6_inetpeer_init(struct net *net)
3065 {
3066         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3067
3068         if (!bp)
3069                 return -ENOMEM;
3070         inet_peer_base_init(bp);
3071         net->ipv6.peers = bp;
3072         return 0;
3073 }
3074
3075 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3076 {
3077         struct inet_peer_base *bp = net->ipv6.peers;
3078
3079         net->ipv6.peers = NULL;
3080         inetpeer_invalidate_tree(bp);
3081         kfree(bp);
3082 }
3083
3084 static struct pernet_operations ipv6_inetpeer_ops = {
3085         .init   =       ipv6_inetpeer_init,
3086         .exit   =       ipv6_inetpeer_exit,
3087 };
3088
3089 static struct pernet_operations ip6_route_net_late_ops = {
3090         .init = ip6_route_net_init_late,
3091         .exit = ip6_route_net_exit_late,
3092 };
3093
3094 static struct notifier_block ip6_route_dev_notifier = {
3095         .notifier_call = ip6_route_dev_notify,
3096         .priority = 0,
3097 };
3098
3099 int __init ip6_route_init(void)
3100 {
3101         int ret;
3102
3103         ret = -ENOMEM;
3104         ip6_dst_ops_template.kmem_cachep =
3105                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3106                                   SLAB_HWCACHE_ALIGN, NULL);
3107         if (!ip6_dst_ops_template.kmem_cachep)
3108                 goto out;
3109
3110         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3111         if (ret)
3112                 goto out_kmem_cache;
3113
3114         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3115         if (ret)
3116                 goto out_dst_entries;
3117
3118         ret = register_pernet_subsys(&ip6_route_net_ops);
3119         if (ret)
3120                 goto out_register_inetpeer;
3121
3122         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3123
3124         /* Registering of the loopback is done before this portion of code,
3125          * the loopback reference in rt6_info will not be taken, do it
3126          * manually for init_net */
3127         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3128         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3129   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3130         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3131         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3132         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3133         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3134   #endif
3135         ret = fib6_init();
3136         if (ret)
3137                 goto out_register_subsys;
3138
3139         ret = xfrm6_init();
3140         if (ret)
3141                 goto out_fib6_init;
3142
3143         ret = fib6_rules_init();
3144         if (ret)
3145                 goto xfrm6_init;
3146
3147         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3148         if (ret)
3149                 goto fib6_rules_init;
3150
3151         ret = -ENOBUFS;
3152         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3153             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3154             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3155                 goto out_register_late_subsys;
3156
3157         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3158         if (ret)
3159                 goto out_register_late_subsys;
3160
3161 out:
3162         return ret;
3163
3164 out_register_late_subsys:
3165         unregister_pernet_subsys(&ip6_route_net_late_ops);
3166 fib6_rules_init:
3167         fib6_rules_cleanup();
3168 xfrm6_init:
3169         xfrm6_fini();
3170 out_fib6_init:
3171         fib6_gc_cleanup();
3172 out_register_subsys:
3173         unregister_pernet_subsys(&ip6_route_net_ops);
3174 out_register_inetpeer:
3175         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3176 out_dst_entries:
3177         dst_entries_destroy(&ip6_dst_blackhole_ops);
3178 out_kmem_cache:
3179         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3180         goto out;
3181 }
3182
3183 void ip6_route_cleanup(void)
3184 {
3185         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3186         unregister_pernet_subsys(&ip6_route_net_late_ops);
3187         fib6_rules_cleanup();
3188         xfrm6_fini();
3189         fib6_gc_cleanup();
3190         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3191         unregister_pernet_subsys(&ip6_route_net_ops);
3192         dst_entries_destroy(&ip6_dst_blackhole_ops);
3193         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3194 }