ipv6: Limit mtu to 65575 bytes
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -2,
70         RT6_NUD_FAIL_SOFT = -1,
71         RT6_NUD_SUCCEED = 1
72 };
73
74 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
75                                     const struct in6_addr *dest);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int      ip6_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void             ip6_dst_destroy(struct dst_entry *);
81 static void             ip6_dst_ifdown(struct dst_entry *,
82                                        struct net_device *dev, int how);
83 static int               ip6_dst_gc(struct dst_ops *ops);
84
85 static int              ip6_pkt_discard(struct sk_buff *skb);
86 static int              ip6_pkt_discard_out(struct sk_buff *skb);
87 static int              ip6_pkt_prohibit(struct sk_buff *skb);
88 static int              ip6_pkt_prohibit_out(struct sk_buff *skb);
89 static void             ip6_link_failure(struct sk_buff *skb);
90 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
91                                            struct sk_buff *skb, u32 mtu);
92 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
93                                         struct sk_buff *skb);
94
95 #ifdef CONFIG_IPV6_ROUTE_INFO
96 static struct rt6_info *rt6_add_route_info(struct net *net,
97                                            const struct in6_addr *prefix, int prefixlen,
98                                            const struct in6_addr *gwaddr, int ifindex,
99                                            unsigned int pref);
100 static struct rt6_info *rt6_get_route_info(struct net *net,
101                                            const struct in6_addr *prefix, int prefixlen,
102                                            const struct in6_addr *gwaddr, int ifindex);
103 #endif
104
105 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
106 {
107         struct rt6_info *rt = (struct rt6_info *) dst;
108         struct inet_peer *peer;
109         u32 *p = NULL;
110
111         if (!(rt->dst.flags & DST_HOST))
112                 return NULL;
113
114         peer = rt6_get_peer_create(rt);
115         if (peer) {
116                 u32 *old_p = __DST_METRICS_PTR(old);
117                 unsigned long prev, new;
118
119                 p = peer->metrics;
120                 if (inet_metrics_new(peer))
121                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123                 new = (unsigned long) p;
124                 prev = cmpxchg(&dst->_metrics, old, new);
125
126                 if (prev != old) {
127                         p = __DST_METRICS_PTR(prev);
128                         if (prev & DST_METRICS_READ_ONLY)
129                                 p = NULL;
130                 }
131         }
132         return p;
133 }
134
135 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
136                                              struct sk_buff *skb,
137                                              const void *daddr)
138 {
139         struct in6_addr *p = &rt->rt6i_gateway;
140
141         if (!ipv6_addr_any(p))
142                 return (const void *) p;
143         else if (skb)
144                 return &ipv6_hdr(skb)->daddr;
145         return daddr;
146 }
147
148 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
149                                           struct sk_buff *skb,
150                                           const void *daddr)
151 {
152         struct rt6_info *rt = (struct rt6_info *) dst;
153         struct neighbour *n;
154
155         daddr = choose_neigh_daddr(rt, skb, daddr);
156         n = __ipv6_neigh_lookup(dst->dev, daddr);
157         if (n)
158                 return n;
159         return neigh_create(&nd_tbl, daddr, dst->dev);
160 }
161
162 static struct dst_ops ip6_dst_ops_template = {
163         .family                 =       AF_INET6,
164         .protocol               =       cpu_to_be16(ETH_P_IPV6),
165         .gc                     =       ip6_dst_gc,
166         .gc_thresh              =       1024,
167         .check                  =       ip6_dst_check,
168         .default_advmss         =       ip6_default_advmss,
169         .mtu                    =       ip6_mtu,
170         .cow_metrics            =       ipv6_cow_metrics,
171         .destroy                =       ip6_dst_destroy,
172         .ifdown                 =       ip6_dst_ifdown,
173         .negative_advice        =       ip6_negative_advice,
174         .link_failure           =       ip6_link_failure,
175         .update_pmtu            =       ip6_rt_update_pmtu,
176         .redirect               =       rt6_do_redirect,
177         .local_out              =       __ip6_local_out,
178         .neigh_lookup           =       ip6_neigh_lookup,
179 };
180
181 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
182 {
183         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185         return mtu ? : dst->dev->mtu;
186 }
187
188 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
189                                          struct sk_buff *skb, u32 mtu)
190 {
191 }
192
193 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
194                                       struct sk_buff *skb)
195 {
196 }
197
198 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
199                                          unsigned long old)
200 {
201         return NULL;
202 }
203
204 static struct dst_ops ip6_dst_blackhole_ops = {
205         .family                 =       AF_INET6,
206         .protocol               =       cpu_to_be16(ETH_P_IPV6),
207         .destroy                =       ip6_dst_destroy,
208         .check                  =       ip6_dst_check,
209         .mtu                    =       ip6_blackhole_mtu,
210         .default_advmss         =       ip6_default_advmss,
211         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
212         .redirect               =       ip6_rt_blackhole_redirect,
213         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
214         .neigh_lookup           =       ip6_neigh_lookup,
215 };
216
217 static const u32 ip6_template_metrics[RTAX_MAX] = {
218         [RTAX_HOPLIMIT - 1] = 0,
219 };
220
221 static const struct rt6_info ip6_null_entry_template = {
222         .dst = {
223                 .__refcnt       = ATOMIC_INIT(1),
224                 .__use          = 1,
225                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
226                 .error          = -ENETUNREACH,
227                 .input          = ip6_pkt_discard,
228                 .output         = ip6_pkt_discard_out,
229         },
230         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
231         .rt6i_protocol  = RTPROT_KERNEL,
232         .rt6i_metric    = ~(u32) 0,
233         .rt6i_ref       = ATOMIC_INIT(1),
234 };
235
236 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
237
238 static const struct rt6_info ip6_prohibit_entry_template = {
239         .dst = {
240                 .__refcnt       = ATOMIC_INIT(1),
241                 .__use          = 1,
242                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
243                 .error          = -EACCES,
244                 .input          = ip6_pkt_prohibit,
245                 .output         = ip6_pkt_prohibit_out,
246         },
247         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
248         .rt6i_protocol  = RTPROT_KERNEL,
249         .rt6i_metric    = ~(u32) 0,
250         .rt6i_ref       = ATOMIC_INIT(1),
251 };
252
253 static const struct rt6_info ip6_blk_hole_entry_template = {
254         .dst = {
255                 .__refcnt       = ATOMIC_INIT(1),
256                 .__use          = 1,
257                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
258                 .error          = -EINVAL,
259                 .input          = dst_discard,
260                 .output         = dst_discard,
261         },
262         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
263         .rt6i_protocol  = RTPROT_KERNEL,
264         .rt6i_metric    = ~(u32) 0,
265         .rt6i_ref       = ATOMIC_INIT(1),
266 };
267
268 #endif
269
270 /* allocate dst with ip6_dst_ops */
271 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
272                                              struct net_device *dev,
273                                              int flags,
274                                              struct fib6_table *table)
275 {
276         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
277                                         0, DST_OBSOLETE_FORCE_CHK, flags);
278
279         if (rt) {
280                 struct dst_entry *dst = &rt->dst;
281
282                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
283                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
284                 rt->rt6i_genid = rt_genid(net);
285                 INIT_LIST_HEAD(&rt->rt6i_siblings);
286                 rt->rt6i_nsiblings = 0;
287         }
288         return rt;
289 }
290
291 static void ip6_dst_destroy(struct dst_entry *dst)
292 {
293         struct rt6_info *rt = (struct rt6_info *)dst;
294         struct inet6_dev *idev = rt->rt6i_idev;
295         struct dst_entry *from = dst->from;
296
297         if (!(rt->dst.flags & DST_HOST))
298                 dst_destroy_metrics_generic(dst);
299
300         if (idev) {
301                 rt->rt6i_idev = NULL;
302                 in6_dev_put(idev);
303         }
304
305         dst->from = NULL;
306         dst_release(from);
307
308         if (rt6_has_peer(rt)) {
309                 struct inet_peer *peer = rt6_peer_ptr(rt);
310                 inet_putpeer(peer);
311         }
312 }
313
314 void rt6_bind_peer(struct rt6_info *rt, int create)
315 {
316         struct inet_peer_base *base;
317         struct inet_peer *peer;
318
319         base = inetpeer_base_ptr(rt->_rt6i_peer);
320         if (!base)
321                 return;
322
323         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
324         if (peer) {
325                 if (!rt6_set_peer(rt, peer))
326                         inet_putpeer(peer);
327         }
328 }
329
330 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
331                            int how)
332 {
333         struct rt6_info *rt = (struct rt6_info *)dst;
334         struct inet6_dev *idev = rt->rt6i_idev;
335         struct net_device *loopback_dev =
336                 dev_net(dev)->loopback_dev;
337
338         if (dev != loopback_dev) {
339                 if (idev && idev->dev == dev) {
340                         struct inet6_dev *loopback_idev =
341                                 in6_dev_get(loopback_dev);
342                         if (loopback_idev) {
343                                 rt->rt6i_idev = loopback_idev;
344                                 in6_dev_put(idev);
345                         }
346                 }
347         }
348 }
349
350 static bool rt6_check_expired(const struct rt6_info *rt)
351 {
352         if (rt->rt6i_flags & RTF_EXPIRES) {
353                 if (time_after(jiffies, rt->dst.expires))
354                         return true;
355         } else if (rt->dst.from) {
356                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
357         }
358         return false;
359 }
360
361 static bool rt6_need_strict(const struct in6_addr *daddr)
362 {
363         return ipv6_addr_type(daddr) &
364                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
365 }
366
367 /* Multipath route selection:
368  *   Hash based function using packet header and flowlabel.
369  * Adapted from fib_info_hashfn()
370  */
371 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
372                                const struct flowi6 *fl6)
373 {
374         unsigned int val = fl6->flowi6_proto;
375
376         val ^= ipv6_addr_hash(&fl6->daddr);
377         val ^= ipv6_addr_hash(&fl6->saddr);
378
379         /* Work only if this not encapsulated */
380         switch (fl6->flowi6_proto) {
381         case IPPROTO_UDP:
382         case IPPROTO_TCP:
383         case IPPROTO_SCTP:
384                 val ^= (__force u16)fl6->fl6_sport;
385                 val ^= (__force u16)fl6->fl6_dport;
386                 break;
387
388         case IPPROTO_ICMPV6:
389                 val ^= (__force u16)fl6->fl6_icmp_type;
390                 val ^= (__force u16)fl6->fl6_icmp_code;
391                 break;
392         }
393         /* RFC6438 recommands to use flowlabel */
394         val ^= (__force u32)fl6->flowlabel;
395
396         /* Perhaps, we need to tune, this function? */
397         val = val ^ (val >> 7) ^ (val >> 12);
398         return val % candidate_count;
399 }
400
401 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
402                                              struct flowi6 *fl6)
403 {
404         struct rt6_info *sibling, *next_sibling;
405         int route_choosen;
406
407         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
408         /* Don't change the route, if route_choosen == 0
409          * (siblings does not include ourself)
410          */
411         if (route_choosen)
412                 list_for_each_entry_safe(sibling, next_sibling,
413                                 &match->rt6i_siblings, rt6i_siblings) {
414                         route_choosen--;
415                         if (route_choosen == 0) {
416                                 match = sibling;
417                                 break;
418                         }
419                 }
420         return match;
421 }
422
423 /*
424  *      Route lookup. Any table->tb6_lock is implied.
425  */
426
427 static inline struct rt6_info *rt6_device_match(struct net *net,
428                                                     struct rt6_info *rt,
429                                                     const struct in6_addr *saddr,
430                                                     int oif,
431                                                     int flags)
432 {
433         struct rt6_info *local = NULL;
434         struct rt6_info *sprt;
435
436         if (!oif && ipv6_addr_any(saddr))
437                 goto out;
438
439         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
440                 struct net_device *dev = sprt->dst.dev;
441
442                 if (oif) {
443                         if (dev->ifindex == oif)
444                                 return sprt;
445                         if (dev->flags & IFF_LOOPBACK) {
446                                 if (!sprt->rt6i_idev ||
447                                     sprt->rt6i_idev->dev->ifindex != oif) {
448                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
449                                                 continue;
450                                         if (local && (!oif ||
451                                                       local->rt6i_idev->dev->ifindex == oif))
452                                                 continue;
453                                 }
454                                 local = sprt;
455                         }
456                 } else {
457                         if (ipv6_chk_addr(net, saddr, dev,
458                                           flags & RT6_LOOKUP_F_IFACE))
459                                 return sprt;
460                 }
461         }
462
463         if (oif) {
464                 if (local)
465                         return local;
466
467                 if (flags & RT6_LOOKUP_F_IFACE)
468                         return net->ipv6.ip6_null_entry;
469         }
470 out:
471         return rt;
472 }
473
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475 struct __rt6_probe_work {
476         struct work_struct work;
477         struct in6_addr target;
478         struct net_device *dev;
479 };
480
481 static void rt6_probe_deferred(struct work_struct *w)
482 {
483         struct in6_addr mcaddr;
484         struct __rt6_probe_work *work =
485                 container_of(w, struct __rt6_probe_work, work);
486
487         addrconf_addr_solict_mult(&work->target, &mcaddr);
488         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
489         dev_put(work->dev);
490         kfree(w);
491 }
492
493 static void rt6_probe(struct rt6_info *rt)
494 {
495         struct neighbour *neigh;
496         /*
497          * Okay, this does not seem to be appropriate
498          * for now, however, we need to check if it
499          * is really so; aka Router Reachability Probing.
500          *
501          * Router Reachability Probe MUST be rate-limited
502          * to no more than one per minute.
503          */
504         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
505                 return;
506         rcu_read_lock_bh();
507         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
508         if (neigh) {
509                 write_lock(&neigh->lock);
510                 if (neigh->nud_state & NUD_VALID)
511                         goto out;
512         }
513
514         if (!neigh ||
515             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
516                 struct __rt6_probe_work *work;
517
518                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
519
520                 if (neigh && work)
521                         neigh->updated = jiffies;
522
523                 if (neigh)
524                         write_unlock(&neigh->lock);
525
526                 if (work) {
527                         INIT_WORK(&work->work, rt6_probe_deferred);
528                         work->target = rt->rt6i_gateway;
529                         dev_hold(rt->dst.dev);
530                         work->dev = rt->dst.dev;
531                         schedule_work(&work->work);
532                 }
533         } else {
534 out:
535                 write_unlock(&neigh->lock);
536         }
537         rcu_read_unlock_bh();
538 }
539 #else
540 static inline void rt6_probe(struct rt6_info *rt)
541 {
542 }
543 #endif
544
545 /*
546  * Default Router Selection (RFC 2461 6.3.6)
547  */
548 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
549 {
550         struct net_device *dev = rt->dst.dev;
551         if (!oif || dev->ifindex == oif)
552                 return 2;
553         if ((dev->flags & IFF_LOOPBACK) &&
554             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
555                 return 1;
556         return 0;
557 }
558
559 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
560 {
561         struct neighbour *neigh;
562         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
563
564         if (rt->rt6i_flags & RTF_NONEXTHOP ||
565             !(rt->rt6i_flags & RTF_GATEWAY))
566                 return RT6_NUD_SUCCEED;
567
568         rcu_read_lock_bh();
569         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570         if (neigh) {
571                 read_lock(&neigh->lock);
572                 if (neigh->nud_state & NUD_VALID)
573                         ret = RT6_NUD_SUCCEED;
574 #ifdef CONFIG_IPV6_ROUTER_PREF
575                 else if (!(neigh->nud_state & NUD_FAILED))
576                         ret = RT6_NUD_SUCCEED;
577 #endif
578                 read_unlock(&neigh->lock);
579         } else {
580                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
581                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
582         }
583         rcu_read_unlock_bh();
584
585         return ret;
586 }
587
588 static int rt6_score_route(struct rt6_info *rt, int oif,
589                            int strict)
590 {
591         int m;
592
593         m = rt6_check_dev(rt, oif);
594         if (!m && (strict & RT6_LOOKUP_F_IFACE))
595                 return RT6_NUD_FAIL_HARD;
596 #ifdef CONFIG_IPV6_ROUTER_PREF
597         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
598 #endif
599         if (strict & RT6_LOOKUP_F_REACHABLE) {
600                 int n = rt6_check_neigh(rt);
601                 if (n < 0)
602                         return n;
603         }
604         return m;
605 }
606
607 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
608                                    int *mpri, struct rt6_info *match,
609                                    bool *do_rr)
610 {
611         int m;
612         bool match_do_rr = false;
613
614         if (rt6_check_expired(rt))
615                 goto out;
616
617         m = rt6_score_route(rt, oif, strict);
618         if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
619                 match_do_rr = true;
620                 m = 0; /* lowest valid score */
621         } else if (m < 0) {
622                 goto out;
623         }
624
625         if (strict & RT6_LOOKUP_F_REACHABLE)
626                 rt6_probe(rt);
627
628         if (m > *mpri) {
629                 *do_rr = match_do_rr;
630                 *mpri = m;
631                 match = rt;
632         }
633 out:
634         return match;
635 }
636
637 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
638                                      struct rt6_info *rr_head,
639                                      u32 metric, int oif, int strict,
640                                      bool *do_rr)
641 {
642         struct rt6_info *rt, *match;
643         int mpri = -1;
644
645         match = NULL;
646         for (rt = rr_head; rt && rt->rt6i_metric == metric;
647              rt = rt->dst.rt6_next)
648                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
649         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
650              rt = rt->dst.rt6_next)
651                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
652
653         return match;
654 }
655
656 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
657 {
658         struct rt6_info *match, *rt0;
659         struct net *net;
660         bool do_rr = false;
661
662         rt0 = fn->rr_ptr;
663         if (!rt0)
664                 fn->rr_ptr = rt0 = fn->leaf;
665
666         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
667                              &do_rr);
668
669         if (do_rr) {
670                 struct rt6_info *next = rt0->dst.rt6_next;
671
672                 /* no entries matched; do round-robin */
673                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
674                         next = fn->leaf;
675
676                 if (next != rt0)
677                         fn->rr_ptr = next;
678         }
679
680         net = dev_net(rt0->dst.dev);
681         return match ? match : net->ipv6.ip6_null_entry;
682 }
683
684 #ifdef CONFIG_IPV6_ROUTE_INFO
685 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
686                   const struct in6_addr *gwaddr)
687 {
688         struct net *net = dev_net(dev);
689         struct route_info *rinfo = (struct route_info *) opt;
690         struct in6_addr prefix_buf, *prefix;
691         unsigned int pref;
692         unsigned long lifetime;
693         struct rt6_info *rt;
694
695         if (len < sizeof(struct route_info)) {
696                 return -EINVAL;
697         }
698
699         /* Sanity check for prefix_len and length */
700         if (rinfo->length > 3) {
701                 return -EINVAL;
702         } else if (rinfo->prefix_len > 128) {
703                 return -EINVAL;
704         } else if (rinfo->prefix_len > 64) {
705                 if (rinfo->length < 2) {
706                         return -EINVAL;
707                 }
708         } else if (rinfo->prefix_len > 0) {
709                 if (rinfo->length < 1) {
710                         return -EINVAL;
711                 }
712         }
713
714         pref = rinfo->route_pref;
715         if (pref == ICMPV6_ROUTER_PREF_INVALID)
716                 return -EINVAL;
717
718         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
719
720         if (rinfo->length == 3)
721                 prefix = (struct in6_addr *)rinfo->prefix;
722         else {
723                 /* this function is safe */
724                 ipv6_addr_prefix(&prefix_buf,
725                                  (struct in6_addr *)rinfo->prefix,
726                                  rinfo->prefix_len);
727                 prefix = &prefix_buf;
728         }
729
730         if (rinfo->prefix_len == 0)
731                 rt = rt6_get_dflt_router(gwaddr, dev);
732         else
733                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
734                                         gwaddr, dev->ifindex);
735
736         if (rt && !lifetime) {
737                 ip6_del_rt(rt);
738                 rt = NULL;
739         }
740
741         if (!rt && lifetime)
742                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
743                                         pref);
744         else if (rt)
745                 rt->rt6i_flags = RTF_ROUTEINFO |
746                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
747
748         if (rt) {
749                 if (!addrconf_finite_timeout(lifetime))
750                         rt6_clean_expires(rt);
751                 else
752                         rt6_set_expires(rt, jiffies + HZ * lifetime);
753
754                 ip6_rt_put(rt);
755         }
756         return 0;
757 }
758 #endif
759
760 #define BACKTRACK(__net, saddr)                 \
761 do { \
762         if (rt == __net->ipv6.ip6_null_entry) { \
763                 struct fib6_node *pn; \
764                 while (1) { \
765                         if (fn->fn_flags & RTN_TL_ROOT) \
766                                 goto out; \
767                         pn = fn->parent; \
768                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
769                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
770                         else \
771                                 fn = pn; \
772                         if (fn->fn_flags & RTN_RTINFO) \
773                                 goto restart; \
774                 } \
775         } \
776 } while (0)
777
778 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
779                                              struct fib6_table *table,
780                                              struct flowi6 *fl6, int flags)
781 {
782         struct fib6_node *fn;
783         struct rt6_info *rt;
784
785         read_lock_bh(&table->tb6_lock);
786         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
787 restart:
788         rt = fn->leaf;
789         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
790         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
791                 rt = rt6_multipath_select(rt, fl6);
792         BACKTRACK(net, &fl6->saddr);
793 out:
794         dst_use(&rt->dst, jiffies);
795         read_unlock_bh(&table->tb6_lock);
796         return rt;
797
798 }
799
800 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
801                                     int flags)
802 {
803         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
804 }
805 EXPORT_SYMBOL_GPL(ip6_route_lookup);
806
807 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
808                             const struct in6_addr *saddr, int oif, int strict)
809 {
810         struct flowi6 fl6 = {
811                 .flowi6_oif = oif,
812                 .daddr = *daddr,
813         };
814         struct dst_entry *dst;
815         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
816
817         if (saddr) {
818                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
819                 flags |= RT6_LOOKUP_F_HAS_SADDR;
820         }
821
822         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
823         if (dst->error == 0)
824                 return (struct rt6_info *) dst;
825
826         dst_release(dst);
827
828         return NULL;
829 }
830
831 EXPORT_SYMBOL(rt6_lookup);
832
833 /* ip6_ins_rt is called with FREE table->tb6_lock.
834    It takes new route entry, the addition fails by any reason the
835    route is freed. In any case, if caller does not hold it, it may
836    be destroyed.
837  */
838
839 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
840 {
841         int err;
842         struct fib6_table *table;
843
844         table = rt->rt6i_table;
845         write_lock_bh(&table->tb6_lock);
846         err = fib6_add(&table->tb6_root, rt, info);
847         write_unlock_bh(&table->tb6_lock);
848
849         return err;
850 }
851
852 int ip6_ins_rt(struct rt6_info *rt)
853 {
854         struct nl_info info = {
855                 .nl_net = dev_net(rt->dst.dev),
856         };
857         return __ip6_ins_rt(rt, &info);
858 }
859
860 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
861                                       const struct in6_addr *daddr,
862                                       const struct in6_addr *saddr)
863 {
864         struct rt6_info *rt;
865
866         /*
867          *      Clone the route.
868          */
869
870         rt = ip6_rt_copy(ort, daddr);
871
872         if (rt) {
873                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
874                         if (ort->rt6i_dst.plen != 128 &&
875                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
876                                 rt->rt6i_flags |= RTF_ANYCAST;
877                 }
878
879                 rt->rt6i_flags |= RTF_CACHE;
880
881 #ifdef CONFIG_IPV6_SUBTREES
882                 if (rt->rt6i_src.plen && saddr) {
883                         rt->rt6i_src.addr = *saddr;
884                         rt->rt6i_src.plen = 128;
885                 }
886 #endif
887         }
888
889         return rt;
890 }
891
892 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
893                                         const struct in6_addr *daddr)
894 {
895         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
896
897         if (rt)
898                 rt->rt6i_flags |= RTF_CACHE;
899         return rt;
900 }
901
902 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
903                                       struct flowi6 *fl6, int flags)
904 {
905         struct fib6_node *fn;
906         struct rt6_info *rt, *nrt;
907         int strict = 0;
908         int attempts = 3;
909         int err;
910         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
911
912         strict |= flags & RT6_LOOKUP_F_IFACE;
913
914 relookup:
915         read_lock_bh(&table->tb6_lock);
916
917 restart_2:
918         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
919
920 restart:
921         rt = rt6_select(fn, oif, strict | reachable);
922         if (rt->rt6i_nsiblings && oif == 0)
923                 rt = rt6_multipath_select(rt, fl6);
924         BACKTRACK(net, &fl6->saddr);
925         if (rt == net->ipv6.ip6_null_entry ||
926             rt->rt6i_flags & RTF_CACHE)
927                 goto out;
928
929         dst_hold(&rt->dst);
930         read_unlock_bh(&table->tb6_lock);
931
932         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
933                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
934         else if (!(rt->dst.flags & DST_HOST))
935                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
936         else
937                 goto out2;
938
939         ip6_rt_put(rt);
940         rt = nrt ? : net->ipv6.ip6_null_entry;
941
942         dst_hold(&rt->dst);
943         if (nrt) {
944                 err = ip6_ins_rt(nrt);
945                 if (!err)
946                         goto out2;
947         }
948
949         if (--attempts <= 0)
950                 goto out2;
951
952         /*
953          * Race condition! In the gap, when table->tb6_lock was
954          * released someone could insert this route.  Relookup.
955          */
956         ip6_rt_put(rt);
957         goto relookup;
958
959 out:
960         if (reachable) {
961                 reachable = 0;
962                 goto restart_2;
963         }
964         dst_hold(&rt->dst);
965         read_unlock_bh(&table->tb6_lock);
966 out2:
967         rt->dst.lastuse = jiffies;
968         rt->dst.__use++;
969
970         return rt;
971 }
972
973 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
974                                             struct flowi6 *fl6, int flags)
975 {
976         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
977 }
978
979 static struct dst_entry *ip6_route_input_lookup(struct net *net,
980                                                 struct net_device *dev,
981                                                 struct flowi6 *fl6, int flags)
982 {
983         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
984                 flags |= RT6_LOOKUP_F_IFACE;
985
986         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
987 }
988
989 void ip6_route_input(struct sk_buff *skb)
990 {
991         const struct ipv6hdr *iph = ipv6_hdr(skb);
992         struct net *net = dev_net(skb->dev);
993         int flags = RT6_LOOKUP_F_HAS_SADDR;
994         struct flowi6 fl6 = {
995                 .flowi6_iif = skb->dev->ifindex,
996                 .daddr = iph->daddr,
997                 .saddr = iph->saddr,
998                 .flowlabel = ip6_flowinfo(iph),
999                 .flowi6_mark = skb->mark,
1000                 .flowi6_proto = iph->nexthdr,
1001         };
1002
1003         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1004 }
1005
1006 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1007                                              struct flowi6 *fl6, int flags)
1008 {
1009         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1010 }
1011
1012 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1013                                     struct flowi6 *fl6)
1014 {
1015         int flags = 0;
1016
1017         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1018
1019         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1020                 flags |= RT6_LOOKUP_F_IFACE;
1021
1022         if (!ipv6_addr_any(&fl6->saddr))
1023                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1024         else if (sk)
1025                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1026
1027         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1028 }
1029
1030 EXPORT_SYMBOL(ip6_route_output);
1031
1032 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1033 {
1034         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1035         struct dst_entry *new = NULL;
1036
1037         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1038         if (rt) {
1039                 new = &rt->dst;
1040
1041                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1042                 rt6_init_peer(rt, net->ipv6.peers);
1043
1044                 new->__use = 1;
1045                 new->input = dst_discard;
1046                 new->output = dst_discard;
1047
1048                 if (dst_metrics_read_only(&ort->dst))
1049                         new->_metrics = ort->dst._metrics;
1050                 else
1051                         dst_copy_metrics(new, &ort->dst);
1052                 rt->rt6i_idev = ort->rt6i_idev;
1053                 if (rt->rt6i_idev)
1054                         in6_dev_hold(rt->rt6i_idev);
1055
1056                 rt->rt6i_gateway = ort->rt6i_gateway;
1057                 rt->rt6i_flags = ort->rt6i_flags;
1058                 rt->rt6i_metric = 0;
1059
1060                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1061 #ifdef CONFIG_IPV6_SUBTREES
1062                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1063 #endif
1064
1065                 dst_free(new);
1066         }
1067
1068         dst_release(dst_orig);
1069         return new ? new : ERR_PTR(-ENOMEM);
1070 }
1071
1072 /*
1073  *      Destination cache support functions
1074  */
1075
1076 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1077 {
1078         struct rt6_info *rt;
1079
1080         rt = (struct rt6_info *) dst;
1081
1082         /* All IPV6 dsts are created with ->obsolete set to the value
1083          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1084          * into this function always.
1085          */
1086         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1087                 return NULL;
1088
1089         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1090                 return NULL;
1091
1092         if (rt6_check_expired(rt))
1093                 return NULL;
1094
1095         return dst;
1096 }
1097
1098 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1099 {
1100         struct rt6_info *rt = (struct rt6_info *) dst;
1101
1102         if (rt) {
1103                 if (rt->rt6i_flags & RTF_CACHE) {
1104                         if (rt6_check_expired(rt)) {
1105                                 ip6_del_rt(rt);
1106                                 dst = NULL;
1107                         }
1108                 } else {
1109                         dst_release(dst);
1110                         dst = NULL;
1111                 }
1112         }
1113         return dst;
1114 }
1115
1116 static void ip6_link_failure(struct sk_buff *skb)
1117 {
1118         struct rt6_info *rt;
1119
1120         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1121
1122         rt = (struct rt6_info *) skb_dst(skb);
1123         if (rt) {
1124                 if (rt->rt6i_flags & RTF_CACHE) {
1125                         dst_hold(&rt->dst);
1126                         if (ip6_del_rt(rt))
1127                                 dst_free(&rt->dst);
1128                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1129                         rt->rt6i_node->fn_sernum = -1;
1130                 }
1131         }
1132 }
1133
1134 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1135                                struct sk_buff *skb, u32 mtu)
1136 {
1137         struct rt6_info *rt6 = (struct rt6_info*)dst;
1138
1139         dst_confirm(dst);
1140         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1141                 struct net *net = dev_net(dst->dev);
1142
1143                 rt6->rt6i_flags |= RTF_MODIFIED;
1144                 if (mtu < IPV6_MIN_MTU) {
1145                         u32 features = dst_metric(dst, RTAX_FEATURES);
1146                         mtu = IPV6_MIN_MTU;
1147                         features |= RTAX_FEATURE_ALLFRAG;
1148                         dst_metric_set(dst, RTAX_FEATURES, features);
1149                 }
1150                 dst_metric_set(dst, RTAX_MTU, mtu);
1151                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1152         }
1153 }
1154
1155 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1156                      int oif, u32 mark)
1157 {
1158         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1159         struct dst_entry *dst;
1160         struct flowi6 fl6;
1161
1162         memset(&fl6, 0, sizeof(fl6));
1163         fl6.flowi6_oif = oif;
1164         fl6.flowi6_mark = mark;
1165         fl6.flowi6_flags = 0;
1166         fl6.daddr = iph->daddr;
1167         fl6.saddr = iph->saddr;
1168         fl6.flowlabel = ip6_flowinfo(iph);
1169
1170         dst = ip6_route_output(net, NULL, &fl6);
1171         if (!dst->error)
1172                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1173         dst_release(dst);
1174 }
1175 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1176
1177 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1178 {
1179         ip6_update_pmtu(skb, sock_net(sk), mtu,
1180                         sk->sk_bound_dev_if, sk->sk_mark);
1181 }
1182 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1183
1184 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1185 {
1186         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1187         struct dst_entry *dst;
1188         struct flowi6 fl6;
1189
1190         memset(&fl6, 0, sizeof(fl6));
1191         fl6.flowi6_oif = oif;
1192         fl6.flowi6_mark = mark;
1193         fl6.flowi6_flags = 0;
1194         fl6.daddr = iph->daddr;
1195         fl6.saddr = iph->saddr;
1196         fl6.flowlabel = ip6_flowinfo(iph);
1197
1198         dst = ip6_route_output(net, NULL, &fl6);
1199         if (!dst->error)
1200                 rt6_do_redirect(dst, NULL, skb);
1201         dst_release(dst);
1202 }
1203 EXPORT_SYMBOL_GPL(ip6_redirect);
1204
1205 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1206 {
1207         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1208 }
1209 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1210
1211 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1212 {
1213         struct net_device *dev = dst->dev;
1214         unsigned int mtu = dst_mtu(dst);
1215         struct net *net = dev_net(dev);
1216
1217         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1218
1219         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1220                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1221
1222         /*
1223          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1224          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1225          * IPV6_MAXPLEN is also valid and means: "any MSS,
1226          * rely only on pmtu discovery"
1227          */
1228         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1229                 mtu = IPV6_MAXPLEN;
1230         return mtu;
1231 }
1232
1233 static unsigned int ip6_mtu(const struct dst_entry *dst)
1234 {
1235         struct inet6_dev *idev;
1236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1237
1238         if (mtu)
1239                 goto out;
1240
1241         mtu = IPV6_MIN_MTU;
1242
1243         rcu_read_lock();
1244         idev = __in6_dev_get(dst->dev);
1245         if (idev)
1246                 mtu = idev->cnf.mtu6;
1247         rcu_read_unlock();
1248
1249 out:
1250         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1251 }
1252
1253 static struct dst_entry *icmp6_dst_gc_list;
1254 static DEFINE_SPINLOCK(icmp6_dst_lock);
1255
1256 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1257                                   struct flowi6 *fl6)
1258 {
1259         struct dst_entry *dst;
1260         struct rt6_info *rt;
1261         struct inet6_dev *idev = in6_dev_get(dev);
1262         struct net *net = dev_net(dev);
1263
1264         if (unlikely(!idev))
1265                 return ERR_PTR(-ENODEV);
1266
1267         rt = ip6_dst_alloc(net, dev, 0, NULL);
1268         if (unlikely(!rt)) {
1269                 in6_dev_put(idev);
1270                 dst = ERR_PTR(-ENOMEM);
1271                 goto out;
1272         }
1273
1274         rt->dst.flags |= DST_HOST;
1275         rt->dst.output  = ip6_output;
1276         atomic_set(&rt->dst.__refcnt, 1);
1277         rt->rt6i_gateway  = fl6->daddr;
1278         rt->rt6i_dst.addr = fl6->daddr;
1279         rt->rt6i_dst.plen = 128;
1280         rt->rt6i_idev     = idev;
1281         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1282
1283         spin_lock_bh(&icmp6_dst_lock);
1284         rt->dst.next = icmp6_dst_gc_list;
1285         icmp6_dst_gc_list = &rt->dst;
1286         spin_unlock_bh(&icmp6_dst_lock);
1287
1288         fib6_force_start_gc(net);
1289
1290         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1291
1292 out:
1293         return dst;
1294 }
1295
1296 int icmp6_dst_gc(void)
1297 {
1298         struct dst_entry *dst, **pprev;
1299         int more = 0;
1300
1301         spin_lock_bh(&icmp6_dst_lock);
1302         pprev = &icmp6_dst_gc_list;
1303
1304         while ((dst = *pprev) != NULL) {
1305                 if (!atomic_read(&dst->__refcnt)) {
1306                         *pprev = dst->next;
1307                         dst_free(dst);
1308                 } else {
1309                         pprev = &dst->next;
1310                         ++more;
1311                 }
1312         }
1313
1314         spin_unlock_bh(&icmp6_dst_lock);
1315
1316         return more;
1317 }
1318
1319 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1320                             void *arg)
1321 {
1322         struct dst_entry *dst, **pprev;
1323
1324         spin_lock_bh(&icmp6_dst_lock);
1325         pprev = &icmp6_dst_gc_list;
1326         while ((dst = *pprev) != NULL) {
1327                 struct rt6_info *rt = (struct rt6_info *) dst;
1328                 if (func(rt, arg)) {
1329                         *pprev = dst->next;
1330                         dst_free(dst);
1331                 } else {
1332                         pprev = &dst->next;
1333                 }
1334         }
1335         spin_unlock_bh(&icmp6_dst_lock);
1336 }
1337
1338 static int ip6_dst_gc(struct dst_ops *ops)
1339 {
1340         unsigned long now = jiffies;
1341         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1342         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1343         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1344         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1345         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1346         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1347         int entries;
1348
1349         entries = dst_entries_get_fast(ops);
1350         if (time_after(rt_last_gc + rt_min_interval, now) &&
1351             entries <= rt_max_size)
1352                 goto out;
1353
1354         net->ipv6.ip6_rt_gc_expire++;
1355         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1356         net->ipv6.ip6_rt_last_gc = now;
1357         entries = dst_entries_get_slow(ops);
1358         if (entries < ops->gc_thresh)
1359                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1360 out:
1361         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1362         return entries > rt_max_size;
1363 }
1364
1365 int ip6_dst_hoplimit(struct dst_entry *dst)
1366 {
1367         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1368         if (hoplimit == 0) {
1369                 struct net_device *dev = dst->dev;
1370                 struct inet6_dev *idev;
1371
1372                 rcu_read_lock();
1373                 idev = __in6_dev_get(dev);
1374                 if (idev)
1375                         hoplimit = idev->cnf.hop_limit;
1376                 else
1377                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1378                 rcu_read_unlock();
1379         }
1380         return hoplimit;
1381 }
1382 EXPORT_SYMBOL(ip6_dst_hoplimit);
1383
1384 /*
1385  *
1386  */
1387
1388 int ip6_route_add(struct fib6_config *cfg)
1389 {
1390         int err;
1391         struct net *net = cfg->fc_nlinfo.nl_net;
1392         struct rt6_info *rt = NULL;
1393         struct net_device *dev = NULL;
1394         struct inet6_dev *idev = NULL;
1395         struct fib6_table *table;
1396         int addr_type;
1397
1398         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1399                 return -EINVAL;
1400 #ifndef CONFIG_IPV6_SUBTREES
1401         if (cfg->fc_src_len)
1402                 return -EINVAL;
1403 #endif
1404         if (cfg->fc_ifindex) {
1405                 err = -ENODEV;
1406                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1407                 if (!dev)
1408                         goto out;
1409                 idev = in6_dev_get(dev);
1410                 if (!idev)
1411                         goto out;
1412         }
1413
1414         if (cfg->fc_metric == 0)
1415                 cfg->fc_metric = IP6_RT_PRIO_USER;
1416
1417         err = -ENOBUFS;
1418         if (cfg->fc_nlinfo.nlh &&
1419             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1420                 table = fib6_get_table(net, cfg->fc_table);
1421                 if (!table) {
1422                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1423                         table = fib6_new_table(net, cfg->fc_table);
1424                 }
1425         } else {
1426                 table = fib6_new_table(net, cfg->fc_table);
1427         }
1428
1429         if (!table)
1430                 goto out;
1431
1432         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1433
1434         if (!rt) {
1435                 err = -ENOMEM;
1436                 goto out;
1437         }
1438
1439         if (cfg->fc_flags & RTF_EXPIRES)
1440                 rt6_set_expires(rt, jiffies +
1441                                 clock_t_to_jiffies(cfg->fc_expires));
1442         else
1443                 rt6_clean_expires(rt);
1444
1445         if (cfg->fc_protocol == RTPROT_UNSPEC)
1446                 cfg->fc_protocol = RTPROT_BOOT;
1447         rt->rt6i_protocol = cfg->fc_protocol;
1448
1449         addr_type = ipv6_addr_type(&cfg->fc_dst);
1450
1451         if (addr_type & IPV6_ADDR_MULTICAST)
1452                 rt->dst.input = ip6_mc_input;
1453         else if (cfg->fc_flags & RTF_LOCAL)
1454                 rt->dst.input = ip6_input;
1455         else
1456                 rt->dst.input = ip6_forward;
1457
1458         rt->dst.output = ip6_output;
1459
1460         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1461         rt->rt6i_dst.plen = cfg->fc_dst_len;
1462         if (rt->rt6i_dst.plen == 128)
1463                rt->dst.flags |= DST_HOST;
1464
1465         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1466                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1467                 if (!metrics) {
1468                         err = -ENOMEM;
1469                         goto out;
1470                 }
1471                 dst_init_metrics(&rt->dst, metrics, 0);
1472         }
1473 #ifdef CONFIG_IPV6_SUBTREES
1474         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1475         rt->rt6i_src.plen = cfg->fc_src_len;
1476 #endif
1477
1478         rt->rt6i_metric = cfg->fc_metric;
1479
1480         /* We cannot add true routes via loopback here,
1481            they would result in kernel looping; promote them to reject routes
1482          */
1483         if ((cfg->fc_flags & RTF_REJECT) ||
1484             (dev && (dev->flags & IFF_LOOPBACK) &&
1485              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1486              !(cfg->fc_flags & RTF_LOCAL))) {
1487                 /* hold loopback dev/idev if we haven't done so. */
1488                 if (dev != net->loopback_dev) {
1489                         if (dev) {
1490                                 dev_put(dev);
1491                                 in6_dev_put(idev);
1492                         }
1493                         dev = net->loopback_dev;
1494                         dev_hold(dev);
1495                         idev = in6_dev_get(dev);
1496                         if (!idev) {
1497                                 err = -ENODEV;
1498                                 goto out;
1499                         }
1500                 }
1501                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1502                 switch (cfg->fc_type) {
1503                 case RTN_BLACKHOLE:
1504                         rt->dst.error = -EINVAL;
1505                         rt->dst.output = dst_discard;
1506                         rt->dst.input = dst_discard;
1507                         break;
1508                 case RTN_PROHIBIT:
1509                         rt->dst.error = -EACCES;
1510                         rt->dst.output = ip6_pkt_prohibit_out;
1511                         rt->dst.input = ip6_pkt_prohibit;
1512                         break;
1513                 case RTN_THROW:
1514                 default:
1515                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1516                                         : -ENETUNREACH;
1517                         rt->dst.output = ip6_pkt_discard_out;
1518                         rt->dst.input = ip6_pkt_discard;
1519                         break;
1520                 }
1521                 goto install_route;
1522         }
1523
1524         if (cfg->fc_flags & RTF_GATEWAY) {
1525                 const struct in6_addr *gw_addr;
1526                 int gwa_type;
1527
1528                 gw_addr = &cfg->fc_gateway;
1529                 rt->rt6i_gateway = *gw_addr;
1530                 gwa_type = ipv6_addr_type(gw_addr);
1531
1532                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1533                         struct rt6_info *grt;
1534
1535                         /* IPv6 strictly inhibits using not link-local
1536                            addresses as nexthop address.
1537                            Otherwise, router will not able to send redirects.
1538                            It is very good, but in some (rare!) circumstances
1539                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1540                            some exceptions. --ANK
1541                          */
1542                         err = -EINVAL;
1543                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1544                                 goto out;
1545
1546                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1547
1548                         err = -EHOSTUNREACH;
1549                         if (!grt)
1550                                 goto out;
1551                         if (dev) {
1552                                 if (dev != grt->dst.dev) {
1553                                         ip6_rt_put(grt);
1554                                         goto out;
1555                                 }
1556                         } else {
1557                                 dev = grt->dst.dev;
1558                                 idev = grt->rt6i_idev;
1559                                 dev_hold(dev);
1560                                 in6_dev_hold(grt->rt6i_idev);
1561                         }
1562                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1563                                 err = 0;
1564                         ip6_rt_put(grt);
1565
1566                         if (err)
1567                                 goto out;
1568                 }
1569                 err = -EINVAL;
1570                 if (!dev || (dev->flags & IFF_LOOPBACK))
1571                         goto out;
1572         }
1573
1574         err = -ENODEV;
1575         if (!dev)
1576                 goto out;
1577
1578         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1579                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1580                         err = -EINVAL;
1581                         goto out;
1582                 }
1583                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1584                 rt->rt6i_prefsrc.plen = 128;
1585         } else
1586                 rt->rt6i_prefsrc.plen = 0;
1587
1588         rt->rt6i_flags = cfg->fc_flags;
1589
1590 install_route:
1591         if (cfg->fc_mx) {
1592                 struct nlattr *nla;
1593                 int remaining;
1594
1595                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1596                         int type = nla_type(nla);
1597
1598                         if (type) {
1599                                 if (type > RTAX_MAX) {
1600                                         err = -EINVAL;
1601                                         goto out;
1602                                 }
1603
1604                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1605                         }
1606                 }
1607         }
1608
1609         rt->dst.dev = dev;
1610         rt->rt6i_idev = idev;
1611         rt->rt6i_table = table;
1612
1613         cfg->fc_nlinfo.nl_net = dev_net(dev);
1614
1615         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1616
1617 out:
1618         if (dev)
1619                 dev_put(dev);
1620         if (idev)
1621                 in6_dev_put(idev);
1622         if (rt)
1623                 dst_free(&rt->dst);
1624         return err;
1625 }
1626
1627 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1628 {
1629         int err;
1630         struct fib6_table *table;
1631         struct net *net = dev_net(rt->dst.dev);
1632
1633         if (rt == net->ipv6.ip6_null_entry) {
1634                 err = -ENOENT;
1635                 goto out;
1636         }
1637
1638         table = rt->rt6i_table;
1639         write_lock_bh(&table->tb6_lock);
1640         err = fib6_del(rt, info);
1641         write_unlock_bh(&table->tb6_lock);
1642
1643 out:
1644         ip6_rt_put(rt);
1645         return err;
1646 }
1647
1648 int ip6_del_rt(struct rt6_info *rt)
1649 {
1650         struct nl_info info = {
1651                 .nl_net = dev_net(rt->dst.dev),
1652         };
1653         return __ip6_del_rt(rt, &info);
1654 }
1655
1656 static int ip6_route_del(struct fib6_config *cfg)
1657 {
1658         struct fib6_table *table;
1659         struct fib6_node *fn;
1660         struct rt6_info *rt;
1661         int err = -ESRCH;
1662
1663         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1664         if (!table)
1665                 return err;
1666
1667         read_lock_bh(&table->tb6_lock);
1668
1669         fn = fib6_locate(&table->tb6_root,
1670                          &cfg->fc_dst, cfg->fc_dst_len,
1671                          &cfg->fc_src, cfg->fc_src_len);
1672
1673         if (fn) {
1674                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1675                         if (cfg->fc_ifindex &&
1676                             (!rt->dst.dev ||
1677                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1678                                 continue;
1679                         if (cfg->fc_flags & RTF_GATEWAY &&
1680                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1681                                 continue;
1682                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1683                                 continue;
1684                         dst_hold(&rt->dst);
1685                         read_unlock_bh(&table->tb6_lock);
1686
1687                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1688                 }
1689         }
1690         read_unlock_bh(&table->tb6_lock);
1691
1692         return err;
1693 }
1694
1695 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1696 {
1697         struct net *net = dev_net(skb->dev);
1698         struct netevent_redirect netevent;
1699         struct rt6_info *rt, *nrt = NULL;
1700         struct ndisc_options ndopts;
1701         struct inet6_dev *in6_dev;
1702         struct neighbour *neigh;
1703         struct rd_msg *msg;
1704         int optlen, on_link;
1705         u8 *lladdr;
1706
1707         optlen = skb->tail - skb->transport_header;
1708         optlen -= sizeof(*msg);
1709
1710         if (optlen < 0) {
1711                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1712                 return;
1713         }
1714
1715         msg = (struct rd_msg *)icmp6_hdr(skb);
1716
1717         if (ipv6_addr_is_multicast(&msg->dest)) {
1718                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1719                 return;
1720         }
1721
1722         on_link = 0;
1723         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1724                 on_link = 1;
1725         } else if (ipv6_addr_type(&msg->target) !=
1726                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1727                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1728                 return;
1729         }
1730
1731         in6_dev = __in6_dev_get(skb->dev);
1732         if (!in6_dev)
1733                 return;
1734         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1735                 return;
1736
1737         /* RFC2461 8.1:
1738          *      The IP source address of the Redirect MUST be the same as the current
1739          *      first-hop router for the specified ICMP Destination Address.
1740          */
1741
1742         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1743                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1744                 return;
1745         }
1746
1747         lladdr = NULL;
1748         if (ndopts.nd_opts_tgt_lladdr) {
1749                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1750                                              skb->dev);
1751                 if (!lladdr) {
1752                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1753                         return;
1754                 }
1755         }
1756
1757         rt = (struct rt6_info *) dst;
1758         if (rt == net->ipv6.ip6_null_entry) {
1759                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1760                 return;
1761         }
1762
1763         /* Redirect received -> path was valid.
1764          * Look, redirects are sent only in response to data packets,
1765          * so that this nexthop apparently is reachable. --ANK
1766          */
1767         dst_confirm(&rt->dst);
1768
1769         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1770         if (!neigh)
1771                 return;
1772
1773         /*
1774          *      We have finally decided to accept it.
1775          */
1776
1777         neigh_update(neigh, lladdr, NUD_STALE,
1778                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1779                      NEIGH_UPDATE_F_OVERRIDE|
1780                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1781                                      NEIGH_UPDATE_F_ISROUTER))
1782                      );
1783
1784         nrt = ip6_rt_copy(rt, &msg->dest);
1785         if (!nrt)
1786                 goto out;
1787
1788         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1789         if (on_link)
1790                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1791
1792         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1793
1794         if (ip6_ins_rt(nrt))
1795                 goto out;
1796
1797         netevent.old = &rt->dst;
1798         netevent.new = &nrt->dst;
1799         netevent.daddr = &msg->dest;
1800         netevent.neigh = neigh;
1801         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1802
1803         if (rt->rt6i_flags & RTF_CACHE) {
1804                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1805                 ip6_del_rt(rt);
1806         }
1807
1808 out:
1809         neigh_release(neigh);
1810 }
1811
1812 /*
1813  *      Misc support functions
1814  */
1815
1816 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1817                                     const struct in6_addr *dest)
1818 {
1819         struct net *net = dev_net(ort->dst.dev);
1820         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1821                                             ort->rt6i_table);
1822
1823         if (rt) {
1824                 rt->dst.input = ort->dst.input;
1825                 rt->dst.output = ort->dst.output;
1826                 rt->dst.flags |= DST_HOST;
1827
1828                 rt->rt6i_dst.addr = *dest;
1829                 rt->rt6i_dst.plen = 128;
1830                 dst_copy_metrics(&rt->dst, &ort->dst);
1831                 rt->dst.error = ort->dst.error;
1832                 rt->rt6i_idev = ort->rt6i_idev;
1833                 if (rt->rt6i_idev)
1834                         in6_dev_hold(rt->rt6i_idev);
1835                 rt->dst.lastuse = jiffies;
1836
1837                 if (ort->rt6i_flags & RTF_GATEWAY)
1838                         rt->rt6i_gateway = ort->rt6i_gateway;
1839                 else
1840                         rt->rt6i_gateway = *dest;
1841                 rt->rt6i_flags = ort->rt6i_flags;
1842                 rt6_set_from(rt, ort);
1843                 rt->rt6i_metric = 0;
1844
1845 #ifdef CONFIG_IPV6_SUBTREES
1846                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1847 #endif
1848                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1849                 rt->rt6i_table = ort->rt6i_table;
1850         }
1851         return rt;
1852 }
1853
1854 #ifdef CONFIG_IPV6_ROUTE_INFO
1855 static struct rt6_info *rt6_get_route_info(struct net *net,
1856                                            const struct in6_addr *prefix, int prefixlen,
1857                                            const struct in6_addr *gwaddr, int ifindex)
1858 {
1859         struct fib6_node *fn;
1860         struct rt6_info *rt = NULL;
1861         struct fib6_table *table;
1862
1863         table = fib6_get_table(net, RT6_TABLE_INFO);
1864         if (!table)
1865                 return NULL;
1866
1867         read_lock_bh(&table->tb6_lock);
1868         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1869         if (!fn)
1870                 goto out;
1871
1872         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1873                 if (rt->dst.dev->ifindex != ifindex)
1874                         continue;
1875                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1876                         continue;
1877                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1878                         continue;
1879                 dst_hold(&rt->dst);
1880                 break;
1881         }
1882 out:
1883         read_unlock_bh(&table->tb6_lock);
1884         return rt;
1885 }
1886
1887 static struct rt6_info *rt6_add_route_info(struct net *net,
1888                                            const struct in6_addr *prefix, int prefixlen,
1889                                            const struct in6_addr *gwaddr, int ifindex,
1890                                            unsigned int pref)
1891 {
1892         struct fib6_config cfg = {
1893                 .fc_table       = RT6_TABLE_INFO,
1894                 .fc_metric      = IP6_RT_PRIO_USER,
1895                 .fc_ifindex     = ifindex,
1896                 .fc_dst_len     = prefixlen,
1897                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1898                                   RTF_UP | RTF_PREF(pref),
1899                 .fc_nlinfo.portid = 0,
1900                 .fc_nlinfo.nlh = NULL,
1901                 .fc_nlinfo.nl_net = net,
1902         };
1903
1904         cfg.fc_dst = *prefix;
1905         cfg.fc_gateway = *gwaddr;
1906
1907         /* We should treat it as a default route if prefix length is 0. */
1908         if (!prefixlen)
1909                 cfg.fc_flags |= RTF_DEFAULT;
1910
1911         ip6_route_add(&cfg);
1912
1913         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1914 }
1915 #endif
1916
1917 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1918 {
1919         struct rt6_info *rt;
1920         struct fib6_table *table;
1921
1922         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1923         if (!table)
1924                 return NULL;
1925
1926         read_lock_bh(&table->tb6_lock);
1927         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1928                 if (dev == rt->dst.dev &&
1929                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1930                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1931                         break;
1932         }
1933         if (rt)
1934                 dst_hold(&rt->dst);
1935         read_unlock_bh(&table->tb6_lock);
1936         return rt;
1937 }
1938
1939 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1940                                      struct net_device *dev,
1941                                      unsigned int pref)
1942 {
1943         struct fib6_config cfg = {
1944                 .fc_table       = RT6_TABLE_DFLT,
1945                 .fc_metric      = IP6_RT_PRIO_USER,
1946                 .fc_ifindex     = dev->ifindex,
1947                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1948                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1949                 .fc_nlinfo.portid = 0,
1950                 .fc_nlinfo.nlh = NULL,
1951                 .fc_nlinfo.nl_net = dev_net(dev),
1952         };
1953
1954         cfg.fc_gateway = *gwaddr;
1955
1956         ip6_route_add(&cfg);
1957
1958         return rt6_get_dflt_router(gwaddr, dev);
1959 }
1960
1961 void rt6_purge_dflt_routers(struct net *net)
1962 {
1963         struct rt6_info *rt;
1964         struct fib6_table *table;
1965
1966         /* NOTE: Keep consistent with rt6_get_dflt_router */
1967         table = fib6_get_table(net, RT6_TABLE_DFLT);
1968         if (!table)
1969                 return;
1970
1971 restart:
1972         read_lock_bh(&table->tb6_lock);
1973         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1974                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1975                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1976                         dst_hold(&rt->dst);
1977                         read_unlock_bh(&table->tb6_lock);
1978                         ip6_del_rt(rt);
1979                         goto restart;
1980                 }
1981         }
1982         read_unlock_bh(&table->tb6_lock);
1983 }
1984
1985 static void rtmsg_to_fib6_config(struct net *net,
1986                                  struct in6_rtmsg *rtmsg,
1987                                  struct fib6_config *cfg)
1988 {
1989         memset(cfg, 0, sizeof(*cfg));
1990
1991         cfg->fc_table = RT6_TABLE_MAIN;
1992         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1993         cfg->fc_metric = rtmsg->rtmsg_metric;
1994         cfg->fc_expires = rtmsg->rtmsg_info;
1995         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1996         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1997         cfg->fc_flags = rtmsg->rtmsg_flags;
1998
1999         cfg->fc_nlinfo.nl_net = net;
2000
2001         cfg->fc_dst = rtmsg->rtmsg_dst;
2002         cfg->fc_src = rtmsg->rtmsg_src;
2003         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2004 }
2005
2006 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2007 {
2008         struct fib6_config cfg;
2009         struct in6_rtmsg rtmsg;
2010         int err;
2011
2012         switch(cmd) {
2013         case SIOCADDRT:         /* Add a route */
2014         case SIOCDELRT:         /* Delete a route */
2015                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2016                         return -EPERM;
2017                 err = copy_from_user(&rtmsg, arg,
2018                                      sizeof(struct in6_rtmsg));
2019                 if (err)
2020                         return -EFAULT;
2021
2022                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2023
2024                 rtnl_lock();
2025                 switch (cmd) {
2026                 case SIOCADDRT:
2027                         err = ip6_route_add(&cfg);
2028                         break;
2029                 case SIOCDELRT:
2030                         err = ip6_route_del(&cfg);
2031                         break;
2032                 default:
2033                         err = -EINVAL;
2034                 }
2035                 rtnl_unlock();
2036
2037                 return err;
2038         }
2039
2040         return -EINVAL;
2041 }
2042
2043 /*
2044  *      Drop the packet on the floor
2045  */
2046
2047 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2048 {
2049         int type;
2050         struct dst_entry *dst = skb_dst(skb);
2051         switch (ipstats_mib_noroutes) {
2052         case IPSTATS_MIB_INNOROUTES:
2053                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2054                 if (type == IPV6_ADDR_ANY) {
2055                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2056                                       IPSTATS_MIB_INADDRERRORS);
2057                         break;
2058                 }
2059                 /* FALLTHROUGH */
2060         case IPSTATS_MIB_OUTNOROUTES:
2061                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2062                               ipstats_mib_noroutes);
2063                 break;
2064         }
2065         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2066         kfree_skb(skb);
2067         return 0;
2068 }
2069
2070 static int ip6_pkt_discard(struct sk_buff *skb)
2071 {
2072         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2073 }
2074
2075 static int ip6_pkt_discard_out(struct sk_buff *skb)
2076 {
2077         skb->dev = skb_dst(skb)->dev;
2078         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2079 }
2080
2081 static int ip6_pkt_prohibit(struct sk_buff *skb)
2082 {
2083         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2084 }
2085
2086 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2087 {
2088         skb->dev = skb_dst(skb)->dev;
2089         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2090 }
2091
2092 /*
2093  *      Allocate a dst for local (unicast / anycast) address.
2094  */
2095
2096 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2097                                     const struct in6_addr *addr,
2098                                     bool anycast)
2099 {
2100         struct net *net = dev_net(idev->dev);
2101         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2102                                             DST_NOCOUNT, NULL);
2103         if (!rt)
2104                 return ERR_PTR(-ENOMEM);
2105
2106         in6_dev_hold(idev);
2107
2108         rt->dst.flags |= DST_HOST;
2109         rt->dst.input = ip6_input;
2110         rt->dst.output = ip6_output;
2111         rt->rt6i_idev = idev;
2112
2113         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2114         if (anycast)
2115                 rt->rt6i_flags |= RTF_ANYCAST;
2116         else
2117                 rt->rt6i_flags |= RTF_LOCAL;
2118
2119         rt->rt6i_gateway  = *addr;
2120         rt->rt6i_dst.addr = *addr;
2121         rt->rt6i_dst.plen = 128;
2122         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2123
2124         atomic_set(&rt->dst.__refcnt, 1);
2125
2126         return rt;
2127 }
2128
2129 int ip6_route_get_saddr(struct net *net,
2130                         struct rt6_info *rt,
2131                         const struct in6_addr *daddr,
2132                         unsigned int prefs,
2133                         struct in6_addr *saddr)
2134 {
2135         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2136         int err = 0;
2137         if (rt->rt6i_prefsrc.plen)
2138                 *saddr = rt->rt6i_prefsrc.addr;
2139         else
2140                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2141                                          daddr, prefs, saddr);
2142         return err;
2143 }
2144
2145 /* remove deleted ip from prefsrc entries */
2146 struct arg_dev_net_ip {
2147         struct net_device *dev;
2148         struct net *net;
2149         struct in6_addr *addr;
2150 };
2151
2152 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2153 {
2154         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2155         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2156         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2157
2158         if (((void *)rt->dst.dev == dev || !dev) &&
2159             rt != net->ipv6.ip6_null_entry &&
2160             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2161                 /* remove prefsrc entry */
2162                 rt->rt6i_prefsrc.plen = 0;
2163         }
2164         return 0;
2165 }
2166
2167 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2168 {
2169         struct net *net = dev_net(ifp->idev->dev);
2170         struct arg_dev_net_ip adni = {
2171                 .dev = ifp->idev->dev,
2172                 .net = net,
2173                 .addr = &ifp->addr,
2174         };
2175         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2176 }
2177
2178 struct arg_dev_net {
2179         struct net_device *dev;
2180         struct net *net;
2181 };
2182
2183 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2184 {
2185         const struct arg_dev_net *adn = arg;
2186         const struct net_device *dev = adn->dev;
2187
2188         if ((rt->dst.dev == dev || !dev) &&
2189             rt != adn->net->ipv6.ip6_null_entry)
2190                 return -1;
2191
2192         return 0;
2193 }
2194
2195 void rt6_ifdown(struct net *net, struct net_device *dev)
2196 {
2197         struct arg_dev_net adn = {
2198                 .dev = dev,
2199                 .net = net,
2200         };
2201
2202         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2203         icmp6_clean_all(fib6_ifdown, &adn);
2204 }
2205
2206 struct rt6_mtu_change_arg {
2207         struct net_device *dev;
2208         unsigned int mtu;
2209 };
2210
2211 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2212 {
2213         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2214         struct inet6_dev *idev;
2215
2216         /* In IPv6 pmtu discovery is not optional,
2217            so that RTAX_MTU lock cannot disable it.
2218            We still use this lock to block changes
2219            caused by addrconf/ndisc.
2220         */
2221
2222         idev = __in6_dev_get(arg->dev);
2223         if (!idev)
2224                 return 0;
2225
2226         /* For administrative MTU increase, there is no way to discover
2227            IPv6 PMTU increase, so PMTU increase should be updated here.
2228            Since RFC 1981 doesn't include administrative MTU increase
2229            update PMTU increase is a MUST. (i.e. jumbo frame)
2230          */
2231         /*
2232            If new MTU is less than route PMTU, this new MTU will be the
2233            lowest MTU in the path, update the route PMTU to reflect PMTU
2234            decreases; if new MTU is greater than route PMTU, and the
2235            old MTU is the lowest MTU in the path, update the route PMTU
2236            to reflect the increase. In this case if the other nodes' MTU
2237            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2238            PMTU discouvery.
2239          */
2240         if (rt->dst.dev == arg->dev &&
2241             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2242             (dst_mtu(&rt->dst) >= arg->mtu ||
2243              (dst_mtu(&rt->dst) < arg->mtu &&
2244               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2245                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2246         }
2247         return 0;
2248 }
2249
2250 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2251 {
2252         struct rt6_mtu_change_arg arg = {
2253                 .dev = dev,
2254                 .mtu = mtu,
2255         };
2256
2257         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2258 }
2259
2260 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2261         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2262         [RTA_OIF]               = { .type = NLA_U32 },
2263         [RTA_IIF]               = { .type = NLA_U32 },
2264         [RTA_PRIORITY]          = { .type = NLA_U32 },
2265         [RTA_METRICS]           = { .type = NLA_NESTED },
2266         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2267 };
2268
2269 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2270                               struct fib6_config *cfg)
2271 {
2272         struct rtmsg *rtm;
2273         struct nlattr *tb[RTA_MAX+1];
2274         int err;
2275
2276         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2277         if (err < 0)
2278                 goto errout;
2279
2280         err = -EINVAL;
2281         rtm = nlmsg_data(nlh);
2282         memset(cfg, 0, sizeof(*cfg));
2283
2284         cfg->fc_table = rtm->rtm_table;
2285         cfg->fc_dst_len = rtm->rtm_dst_len;
2286         cfg->fc_src_len = rtm->rtm_src_len;
2287         cfg->fc_flags = RTF_UP;
2288         cfg->fc_protocol = rtm->rtm_protocol;
2289         cfg->fc_type = rtm->rtm_type;
2290
2291         if (rtm->rtm_type == RTN_UNREACHABLE ||
2292             rtm->rtm_type == RTN_BLACKHOLE ||
2293             rtm->rtm_type == RTN_PROHIBIT ||
2294             rtm->rtm_type == RTN_THROW)
2295                 cfg->fc_flags |= RTF_REJECT;
2296
2297         if (rtm->rtm_type == RTN_LOCAL)
2298                 cfg->fc_flags |= RTF_LOCAL;
2299
2300         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2301         cfg->fc_nlinfo.nlh = nlh;
2302         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2303
2304         if (tb[RTA_GATEWAY]) {
2305                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2306                 cfg->fc_flags |= RTF_GATEWAY;
2307         }
2308
2309         if (tb[RTA_DST]) {
2310                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2311
2312                 if (nla_len(tb[RTA_DST]) < plen)
2313                         goto errout;
2314
2315                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2316         }
2317
2318         if (tb[RTA_SRC]) {
2319                 int plen = (rtm->rtm_src_len + 7) >> 3;
2320
2321                 if (nla_len(tb[RTA_SRC]) < plen)
2322                         goto errout;
2323
2324                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2325         }
2326
2327         if (tb[RTA_PREFSRC])
2328                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2329
2330         if (tb[RTA_OIF])
2331                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2332
2333         if (tb[RTA_PRIORITY])
2334                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2335
2336         if (tb[RTA_METRICS]) {
2337                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2338                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2339         }
2340
2341         if (tb[RTA_TABLE])
2342                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2343
2344         if (tb[RTA_MULTIPATH]) {
2345                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2346                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2347         }
2348
2349         err = 0;
2350 errout:
2351         return err;
2352 }
2353
2354 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2355 {
2356         struct fib6_config r_cfg;
2357         struct rtnexthop *rtnh;
2358         int remaining;
2359         int attrlen;
2360         int err = 0, last_err = 0;
2361
2362 beginning:
2363         rtnh = (struct rtnexthop *)cfg->fc_mp;
2364         remaining = cfg->fc_mp_len;
2365
2366         /* Parse a Multipath Entry */
2367         while (rtnh_ok(rtnh, remaining)) {
2368                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2369                 if (rtnh->rtnh_ifindex)
2370                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2371
2372                 attrlen = rtnh_attrlen(rtnh);
2373                 if (attrlen > 0) {
2374                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2375
2376                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2377                         if (nla) {
2378                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2379                                 r_cfg.fc_flags |= RTF_GATEWAY;
2380                         }
2381                 }
2382                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2383                 if (err) {
2384                         last_err = err;
2385                         /* If we are trying to remove a route, do not stop the
2386                          * loop when ip6_route_del() fails (because next hop is
2387                          * already gone), we should try to remove all next hops.
2388                          */
2389                         if (add) {
2390                                 /* If add fails, we should try to delete all
2391                                  * next hops that have been already added.
2392                                  */
2393                                 add = 0;
2394                                 goto beginning;
2395                         }
2396                 }
2397                 /* Because each route is added like a single route we remove
2398                  * this flag after the first nexthop (if there is a collision,
2399                  * we have already fail to add the first nexthop:
2400                  * fib6_add_rt2node() has reject it).
2401                  */
2402                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2403                 rtnh = rtnh_next(rtnh, &remaining);
2404         }
2405
2406         return last_err;
2407 }
2408
2409 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2410 {
2411         struct fib6_config cfg;
2412         int err;
2413
2414         err = rtm_to_fib6_config(skb, nlh, &cfg);
2415         if (err < 0)
2416                 return err;
2417
2418         if (cfg.fc_mp)
2419                 return ip6_route_multipath(&cfg, 0);
2420         else
2421                 return ip6_route_del(&cfg);
2422 }
2423
2424 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2425 {
2426         struct fib6_config cfg;
2427         int err;
2428
2429         err = rtm_to_fib6_config(skb, nlh, &cfg);
2430         if (err < 0)
2431                 return err;
2432
2433         if (cfg.fc_mp)
2434                 return ip6_route_multipath(&cfg, 1);
2435         else
2436                 return ip6_route_add(&cfg);
2437 }
2438
2439 static inline size_t rt6_nlmsg_size(void)
2440 {
2441         return NLMSG_ALIGN(sizeof(struct rtmsg))
2442                + nla_total_size(16) /* RTA_SRC */
2443                + nla_total_size(16) /* RTA_DST */
2444                + nla_total_size(16) /* RTA_GATEWAY */
2445                + nla_total_size(16) /* RTA_PREFSRC */
2446                + nla_total_size(4) /* RTA_TABLE */
2447                + nla_total_size(4) /* RTA_IIF */
2448                + nla_total_size(4) /* RTA_OIF */
2449                + nla_total_size(4) /* RTA_PRIORITY */
2450                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2451                + nla_total_size(sizeof(struct rta_cacheinfo));
2452 }
2453
2454 static int rt6_fill_node(struct net *net,
2455                          struct sk_buff *skb, struct rt6_info *rt,
2456                          struct in6_addr *dst, struct in6_addr *src,
2457                          int iif, int type, u32 portid, u32 seq,
2458                          int prefix, int nowait, unsigned int flags)
2459 {
2460         struct rtmsg *rtm;
2461         struct nlmsghdr *nlh;
2462         long expires;
2463         u32 table;
2464
2465         if (prefix) {   /* user wants prefix routes only */
2466                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2467                         /* success since this is not a prefix route */
2468                         return 1;
2469                 }
2470         }
2471
2472         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2473         if (!nlh)
2474                 return -EMSGSIZE;
2475
2476         rtm = nlmsg_data(nlh);
2477         rtm->rtm_family = AF_INET6;
2478         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2479         rtm->rtm_src_len = rt->rt6i_src.plen;
2480         rtm->rtm_tos = 0;
2481         if (rt->rt6i_table)
2482                 table = rt->rt6i_table->tb6_id;
2483         else
2484                 table = RT6_TABLE_UNSPEC;
2485         rtm->rtm_table = table;
2486         if (nla_put_u32(skb, RTA_TABLE, table))
2487                 goto nla_put_failure;
2488         if (rt->rt6i_flags & RTF_REJECT) {
2489                 switch (rt->dst.error) {
2490                 case -EINVAL:
2491                         rtm->rtm_type = RTN_BLACKHOLE;
2492                         break;
2493                 case -EACCES:
2494                         rtm->rtm_type = RTN_PROHIBIT;
2495                         break;
2496                 case -EAGAIN:
2497                         rtm->rtm_type = RTN_THROW;
2498                         break;
2499                 default:
2500                         rtm->rtm_type = RTN_UNREACHABLE;
2501                         break;
2502                 }
2503         }
2504         else if (rt->rt6i_flags & RTF_LOCAL)
2505                 rtm->rtm_type = RTN_LOCAL;
2506         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2507                 rtm->rtm_type = RTN_LOCAL;
2508         else
2509                 rtm->rtm_type = RTN_UNICAST;
2510         rtm->rtm_flags = 0;
2511         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2512         rtm->rtm_protocol = rt->rt6i_protocol;
2513         if (rt->rt6i_flags & RTF_DYNAMIC)
2514                 rtm->rtm_protocol = RTPROT_REDIRECT;
2515         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2516                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2517                         rtm->rtm_protocol = RTPROT_RA;
2518                 else
2519                         rtm->rtm_protocol = RTPROT_KERNEL;
2520         }
2521
2522         if (rt->rt6i_flags & RTF_CACHE)
2523                 rtm->rtm_flags |= RTM_F_CLONED;
2524
2525         if (dst) {
2526                 if (nla_put(skb, RTA_DST, 16, dst))
2527                         goto nla_put_failure;
2528                 rtm->rtm_dst_len = 128;
2529         } else if (rtm->rtm_dst_len)
2530                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2531                         goto nla_put_failure;
2532 #ifdef CONFIG_IPV6_SUBTREES
2533         if (src) {
2534                 if (nla_put(skb, RTA_SRC, 16, src))
2535                         goto nla_put_failure;
2536                 rtm->rtm_src_len = 128;
2537         } else if (rtm->rtm_src_len &&
2538                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2539                 goto nla_put_failure;
2540 #endif
2541         if (iif) {
2542 #ifdef CONFIG_IPV6_MROUTE
2543                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2544                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2545                         if (err <= 0) {
2546                                 if (!nowait) {
2547                                         if (err == 0)
2548                                                 return 0;
2549                                         goto nla_put_failure;
2550                                 } else {
2551                                         if (err == -EMSGSIZE)
2552                                                 goto nla_put_failure;
2553                                 }
2554                         }
2555                 } else
2556 #endif
2557                         if (nla_put_u32(skb, RTA_IIF, iif))
2558                                 goto nla_put_failure;
2559         } else if (dst) {
2560                 struct in6_addr saddr_buf;
2561                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2562                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2563                         goto nla_put_failure;
2564         }
2565
2566         if (rt->rt6i_prefsrc.plen) {
2567                 struct in6_addr saddr_buf;
2568                 saddr_buf = rt->rt6i_prefsrc.addr;
2569                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2570                         goto nla_put_failure;
2571         }
2572
2573         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2574                 goto nla_put_failure;
2575
2576         if (rt->rt6i_flags & RTF_GATEWAY) {
2577                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2578                         goto nla_put_failure;
2579         }
2580
2581         if (rt->dst.dev &&
2582             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2583                 goto nla_put_failure;
2584         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2585                 goto nla_put_failure;
2586
2587         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2588
2589         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2590                 goto nla_put_failure;
2591
2592         return nlmsg_end(skb, nlh);
2593
2594 nla_put_failure:
2595         nlmsg_cancel(skb, nlh);
2596         return -EMSGSIZE;
2597 }
2598
2599 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2600 {
2601         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2602         int prefix;
2603
2604         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2605                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2606                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2607         } else
2608                 prefix = 0;
2609
2610         return rt6_fill_node(arg->net,
2611                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2612                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2613                      prefix, 0, NLM_F_MULTI);
2614 }
2615
2616 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2617 {
2618         struct net *net = sock_net(in_skb->sk);
2619         struct nlattr *tb[RTA_MAX+1];
2620         struct rt6_info *rt;
2621         struct sk_buff *skb;
2622         struct rtmsg *rtm;
2623         struct flowi6 fl6;
2624         int err, iif = 0, oif = 0;
2625
2626         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2627         if (err < 0)
2628                 goto errout;
2629
2630         err = -EINVAL;
2631         memset(&fl6, 0, sizeof(fl6));
2632
2633         if (tb[RTA_SRC]) {
2634                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2635                         goto errout;
2636
2637                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2638         }
2639
2640         if (tb[RTA_DST]) {
2641                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2642                         goto errout;
2643
2644                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2645         }
2646
2647         if (tb[RTA_IIF])
2648                 iif = nla_get_u32(tb[RTA_IIF]);
2649
2650         if (tb[RTA_OIF])
2651                 oif = nla_get_u32(tb[RTA_OIF]);
2652
2653         if (iif) {
2654                 struct net_device *dev;
2655                 int flags = 0;
2656
2657                 dev = __dev_get_by_index(net, iif);
2658                 if (!dev) {
2659                         err = -ENODEV;
2660                         goto errout;
2661                 }
2662
2663                 fl6.flowi6_iif = iif;
2664
2665                 if (!ipv6_addr_any(&fl6.saddr))
2666                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2667
2668                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2669                                                                flags);
2670         } else {
2671                 fl6.flowi6_oif = oif;
2672
2673                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2674         }
2675
2676         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2677         if (!skb) {
2678                 ip6_rt_put(rt);
2679                 err = -ENOBUFS;
2680                 goto errout;
2681         }
2682
2683         /* Reserve room for dummy headers, this skb can pass
2684            through good chunk of routing engine.
2685          */
2686         skb_reset_mac_header(skb);
2687         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2688
2689         skb_dst_set(skb, &rt->dst);
2690
2691         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2692                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2693                             nlh->nlmsg_seq, 0, 0, 0);
2694         if (err < 0) {
2695                 kfree_skb(skb);
2696                 goto errout;
2697         }
2698
2699         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2700 errout:
2701         return err;
2702 }
2703
2704 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2705 {
2706         struct sk_buff *skb;
2707         struct net *net = info->nl_net;
2708         u32 seq;
2709         int err;
2710
2711         err = -ENOBUFS;
2712         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2713
2714         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2715         if (!skb)
2716                 goto errout;
2717
2718         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2719                                 event, info->portid, seq, 0, 0, 0);
2720         if (err < 0) {
2721                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2722                 WARN_ON(err == -EMSGSIZE);
2723                 kfree_skb(skb);
2724                 goto errout;
2725         }
2726         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2727                     info->nlh, gfp_any());
2728         return;
2729 errout:
2730         if (err < 0)
2731                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2732 }
2733
2734 static int ip6_route_dev_notify(struct notifier_block *this,
2735                                 unsigned long event, void *data)
2736 {
2737         struct net_device *dev = (struct net_device *)data;
2738         struct net *net = dev_net(dev);
2739
2740         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2741                 net->ipv6.ip6_null_entry->dst.dev = dev;
2742                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2743 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2744                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2745                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2746                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2747                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2748 #endif
2749         }
2750
2751         return NOTIFY_OK;
2752 }
2753
2754 /*
2755  *      /proc
2756  */
2757
2758 #ifdef CONFIG_PROC_FS
2759
2760 struct rt6_proc_arg
2761 {
2762         char *buffer;
2763         int offset;
2764         int length;
2765         int skip;
2766         int len;
2767 };
2768
2769 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2770 {
2771         struct seq_file *m = p_arg;
2772
2773         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2774
2775 #ifdef CONFIG_IPV6_SUBTREES
2776         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2777 #else
2778         seq_puts(m, "00000000000000000000000000000000 00 ");
2779 #endif
2780         if (rt->rt6i_flags & RTF_GATEWAY) {
2781                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2782         } else {
2783                 seq_puts(m, "00000000000000000000000000000000");
2784         }
2785         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2786                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2787                    rt->dst.__use, rt->rt6i_flags,
2788                    rt->dst.dev ? rt->dst.dev->name : "");
2789         return 0;
2790 }
2791
2792 static int ipv6_route_show(struct seq_file *m, void *v)
2793 {
2794         struct net *net = (struct net *)m->private;
2795         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2796         return 0;
2797 }
2798
2799 static int ipv6_route_open(struct inode *inode, struct file *file)
2800 {
2801         return single_open_net(inode, file, ipv6_route_show);
2802 }
2803
2804 static const struct file_operations ipv6_route_proc_fops = {
2805         .owner          = THIS_MODULE,
2806         .open           = ipv6_route_open,
2807         .read           = seq_read,
2808         .llseek         = seq_lseek,
2809         .release        = single_release_net,
2810 };
2811
2812 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2813 {
2814         struct net *net = (struct net *)seq->private;
2815         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2816                    net->ipv6.rt6_stats->fib_nodes,
2817                    net->ipv6.rt6_stats->fib_route_nodes,
2818                    net->ipv6.rt6_stats->fib_rt_alloc,
2819                    net->ipv6.rt6_stats->fib_rt_entries,
2820                    net->ipv6.rt6_stats->fib_rt_cache,
2821                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2822                    net->ipv6.rt6_stats->fib_discarded_routes);
2823
2824         return 0;
2825 }
2826
2827 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2828 {
2829         return single_open_net(inode, file, rt6_stats_seq_show);
2830 }
2831
2832 static const struct file_operations rt6_stats_seq_fops = {
2833         .owner   = THIS_MODULE,
2834         .open    = rt6_stats_seq_open,
2835         .read    = seq_read,
2836         .llseek  = seq_lseek,
2837         .release = single_release_net,
2838 };
2839 #endif  /* CONFIG_PROC_FS */
2840
2841 #ifdef CONFIG_SYSCTL
2842
2843 static
2844 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2845                               void __user *buffer, size_t *lenp, loff_t *ppos)
2846 {
2847         struct net *net;
2848         int delay;
2849         if (!write)
2850                 return -EINVAL;
2851
2852         net = (struct net *)ctl->extra1;
2853         delay = net->ipv6.sysctl.flush_delay;
2854         proc_dointvec(ctl, write, buffer, lenp, ppos);
2855         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2856         return 0;
2857 }
2858
2859 ctl_table ipv6_route_table_template[] = {
2860         {
2861                 .procname       =       "flush",
2862                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2863                 .maxlen         =       sizeof(int),
2864                 .mode           =       0200,
2865                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2866         },
2867         {
2868                 .procname       =       "gc_thresh",
2869                 .data           =       &ip6_dst_ops_template.gc_thresh,
2870                 .maxlen         =       sizeof(int),
2871                 .mode           =       0644,
2872                 .proc_handler   =       proc_dointvec,
2873         },
2874         {
2875                 .procname       =       "max_size",
2876                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2877                 .maxlen         =       sizeof(int),
2878                 .mode           =       0644,
2879                 .proc_handler   =       proc_dointvec,
2880         },
2881         {
2882                 .procname       =       "gc_min_interval",
2883                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2884                 .maxlen         =       sizeof(int),
2885                 .mode           =       0644,
2886                 .proc_handler   =       proc_dointvec_jiffies,
2887         },
2888         {
2889                 .procname       =       "gc_timeout",
2890                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2891                 .maxlen         =       sizeof(int),
2892                 .mode           =       0644,
2893                 .proc_handler   =       proc_dointvec_jiffies,
2894         },
2895         {
2896                 .procname       =       "gc_interval",
2897                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2898                 .maxlen         =       sizeof(int),
2899                 .mode           =       0644,
2900                 .proc_handler   =       proc_dointvec_jiffies,
2901         },
2902         {
2903                 .procname       =       "gc_elasticity",
2904                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2905                 .maxlen         =       sizeof(int),
2906                 .mode           =       0644,
2907                 .proc_handler   =       proc_dointvec,
2908         },
2909         {
2910                 .procname       =       "mtu_expires",
2911                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2912                 .maxlen         =       sizeof(int),
2913                 .mode           =       0644,
2914                 .proc_handler   =       proc_dointvec_jiffies,
2915         },
2916         {
2917                 .procname       =       "min_adv_mss",
2918                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2919                 .maxlen         =       sizeof(int),
2920                 .mode           =       0644,
2921                 .proc_handler   =       proc_dointvec,
2922         },
2923         {
2924                 .procname       =       "gc_min_interval_ms",
2925                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2926                 .maxlen         =       sizeof(int),
2927                 .mode           =       0644,
2928                 .proc_handler   =       proc_dointvec_ms_jiffies,
2929         },
2930         { }
2931 };
2932
2933 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2934 {
2935         struct ctl_table *table;
2936
2937         table = kmemdup(ipv6_route_table_template,
2938                         sizeof(ipv6_route_table_template),
2939                         GFP_KERNEL);
2940
2941         if (table) {
2942                 table[0].data = &net->ipv6.sysctl.flush_delay;
2943                 table[0].extra1 = net;
2944                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2945                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2946                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2947                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2948                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2949                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2950                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2951                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2952                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2953
2954                 /* Don't export sysctls to unprivileged users */
2955                 if (net->user_ns != &init_user_ns)
2956                         table[0].procname = NULL;
2957         }
2958
2959         return table;
2960 }
2961 #endif
2962
2963 static int __net_init ip6_route_net_init(struct net *net)
2964 {
2965         int ret = -ENOMEM;
2966
2967         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2968                sizeof(net->ipv6.ip6_dst_ops));
2969
2970         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2971                 goto out_ip6_dst_ops;
2972
2973         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2974                                            sizeof(*net->ipv6.ip6_null_entry),
2975                                            GFP_KERNEL);
2976         if (!net->ipv6.ip6_null_entry)
2977                 goto out_ip6_dst_entries;
2978         net->ipv6.ip6_null_entry->dst.path =
2979                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2980         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2981         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2982                          ip6_template_metrics, true);
2983
2984 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2985         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2986                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2987                                                GFP_KERNEL);
2988         if (!net->ipv6.ip6_prohibit_entry)
2989                 goto out_ip6_null_entry;
2990         net->ipv6.ip6_prohibit_entry->dst.path =
2991                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2992         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2993         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2994                          ip6_template_metrics, true);
2995
2996         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2997                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2998                                                GFP_KERNEL);
2999         if (!net->ipv6.ip6_blk_hole_entry)
3000                 goto out_ip6_prohibit_entry;
3001         net->ipv6.ip6_blk_hole_entry->dst.path =
3002                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3003         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3004         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3005                          ip6_template_metrics, true);
3006 #endif
3007
3008         net->ipv6.sysctl.flush_delay = 0;
3009         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3010         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3011         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3012         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3013         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3014         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3015         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3016
3017         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3018
3019         ret = 0;
3020 out:
3021         return ret;
3022
3023 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3024 out_ip6_prohibit_entry:
3025         kfree(net->ipv6.ip6_prohibit_entry);
3026 out_ip6_null_entry:
3027         kfree(net->ipv6.ip6_null_entry);
3028 #endif
3029 out_ip6_dst_entries:
3030         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3031 out_ip6_dst_ops:
3032         goto out;
3033 }
3034
3035 static void __net_exit ip6_route_net_exit(struct net *net)
3036 {
3037         kfree(net->ipv6.ip6_null_entry);
3038 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3039         kfree(net->ipv6.ip6_prohibit_entry);
3040         kfree(net->ipv6.ip6_blk_hole_entry);
3041 #endif
3042         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3043 }
3044
3045 static int __net_init ip6_route_net_init_late(struct net *net)
3046 {
3047 #ifdef CONFIG_PROC_FS
3048         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3049         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3050 #endif
3051         return 0;
3052 }
3053
3054 static void __net_exit ip6_route_net_exit_late(struct net *net)
3055 {
3056 #ifdef CONFIG_PROC_FS
3057         remove_proc_entry("ipv6_route", net->proc_net);
3058         remove_proc_entry("rt6_stats", net->proc_net);
3059 #endif
3060 }
3061
3062 static struct pernet_operations ip6_route_net_ops = {
3063         .init = ip6_route_net_init,
3064         .exit = ip6_route_net_exit,
3065 };
3066
3067 static int __net_init ipv6_inetpeer_init(struct net *net)
3068 {
3069         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3070
3071         if (!bp)
3072                 return -ENOMEM;
3073         inet_peer_base_init(bp);
3074         net->ipv6.peers = bp;
3075         return 0;
3076 }
3077
3078 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3079 {
3080         struct inet_peer_base *bp = net->ipv6.peers;
3081
3082         net->ipv6.peers = NULL;
3083         inetpeer_invalidate_tree(bp);
3084         kfree(bp);
3085 }
3086
3087 static struct pernet_operations ipv6_inetpeer_ops = {
3088         .init   =       ipv6_inetpeer_init,
3089         .exit   =       ipv6_inetpeer_exit,
3090 };
3091
3092 static struct pernet_operations ip6_route_net_late_ops = {
3093         .init = ip6_route_net_init_late,
3094         .exit = ip6_route_net_exit_late,
3095 };
3096
3097 static struct notifier_block ip6_route_dev_notifier = {
3098         .notifier_call = ip6_route_dev_notify,
3099         .priority = 0,
3100 };
3101
3102 int __init ip6_route_init(void)
3103 {
3104         int ret;
3105
3106         ret = -ENOMEM;
3107         ip6_dst_ops_template.kmem_cachep =
3108                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3109                                   SLAB_HWCACHE_ALIGN, NULL);
3110         if (!ip6_dst_ops_template.kmem_cachep)
3111                 goto out;
3112
3113         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3114         if (ret)
3115                 goto out_kmem_cache;
3116
3117         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3118         if (ret)
3119                 goto out_dst_entries;
3120
3121         ret = register_pernet_subsys(&ip6_route_net_ops);
3122         if (ret)
3123                 goto out_register_inetpeer;
3124
3125         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3126
3127         /* Registering of the loopback is done before this portion of code,
3128          * the loopback reference in rt6_info will not be taken, do it
3129          * manually for init_net */
3130         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3131         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3132   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3133         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3134         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3135         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3136         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3137   #endif
3138         ret = fib6_init();
3139         if (ret)
3140                 goto out_register_subsys;
3141
3142         ret = xfrm6_init();
3143         if (ret)
3144                 goto out_fib6_init;
3145
3146         ret = fib6_rules_init();
3147         if (ret)
3148                 goto xfrm6_init;
3149
3150         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3151         if (ret)
3152                 goto fib6_rules_init;
3153
3154         ret = -ENOBUFS;
3155         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3156             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3157             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3158                 goto out_register_late_subsys;
3159
3160         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3161         if (ret)
3162                 goto out_register_late_subsys;
3163
3164 out:
3165         return ret;
3166
3167 out_register_late_subsys:
3168         unregister_pernet_subsys(&ip6_route_net_late_ops);
3169 fib6_rules_init:
3170         fib6_rules_cleanup();
3171 xfrm6_init:
3172         xfrm6_fini();
3173 out_fib6_init:
3174         fib6_gc_cleanup();
3175 out_register_subsys:
3176         unregister_pernet_subsys(&ip6_route_net_ops);
3177 out_register_inetpeer:
3178         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3179 out_dst_entries:
3180         dst_entries_destroy(&ip6_dst_blackhole_ops);
3181 out_kmem_cache:
3182         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3183         goto out;
3184 }
3185
3186 void ip6_route_cleanup(void)
3187 {
3188         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3189         unregister_pernet_subsys(&ip6_route_net_late_ops);
3190         fib6_rules_cleanup();
3191         xfrm6_fini();
3192         fib6_gc_cleanup();
3193         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3194         unregister_pernet_subsys(&ip6_route_net_ops);
3195         dst_entries_destroy(&ip6_dst_blackhole_ops);
3196         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3197 }