Merge tag 'lsk-v4.4-16.05-android'
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, unsigned int pref);
105 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
106                                            const struct in6_addr *prefix, int prefixlen,
107                                            const struct in6_addr *gwaddr);
108 #endif
109
110 struct uncached_list {
111         spinlock_t              lock;
112         struct list_head        head;
113 };
114
115 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117 static void rt6_uncached_list_add(struct rt6_info *rt)
118 {
119         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121         rt->dst.flags |= DST_NOCACHE;
122         rt->rt6i_uncached_list = ul;
123
124         spin_lock_bh(&ul->lock);
125         list_add_tail(&rt->rt6i_uncached, &ul->head);
126         spin_unlock_bh(&ul->lock);
127 }
128
129 static void rt6_uncached_list_del(struct rt6_info *rt)
130 {
131         if (!list_empty(&rt->rt6i_uncached)) {
132                 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134                 spin_lock_bh(&ul->lock);
135                 list_del(&rt->rt6i_uncached);
136                 spin_unlock_bh(&ul->lock);
137         }
138 }
139
140 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141 {
142         struct net_device *loopback_dev = net->loopback_dev;
143         int cpu;
144
145         if (dev == loopback_dev)
146                 return;
147
148         for_each_possible_cpu(cpu) {
149                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150                 struct rt6_info *rt;
151
152                 spin_lock_bh(&ul->lock);
153                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154                         struct inet6_dev *rt_idev = rt->rt6i_idev;
155                         struct net_device *rt_dev = rt->dst.dev;
156
157                         if (rt_idev->dev == dev) {
158                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
159                                 in6_dev_put(rt_idev);
160                         }
161
162                         if (rt_dev == dev) {
163                                 rt->dst.dev = loopback_dev;
164                                 dev_hold(rt->dst.dev);
165                                 dev_put(rt_dev);
166                         }
167                 }
168                 spin_unlock_bh(&ul->lock);
169         }
170 }
171
172 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173 {
174         return dst_metrics_write_ptr(rt->dst.from);
175 }
176
177 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178 {
179         struct rt6_info *rt = (struct rt6_info *)dst;
180
181         if (rt->rt6i_flags & RTF_PCPU)
182                 return rt6_pcpu_cow_metrics(rt);
183         else if (rt->rt6i_flags & RTF_CACHE)
184                 return NULL;
185         else
186                 return dst_cow_metrics_generic(dst, old);
187 }
188
189 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190                                              struct sk_buff *skb,
191                                              const void *daddr)
192 {
193         struct in6_addr *p = &rt->rt6i_gateway;
194
195         if (!ipv6_addr_any(p))
196                 return (const void *) p;
197         else if (skb)
198                 return &ipv6_hdr(skb)->daddr;
199         return daddr;
200 }
201
202 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203                                           struct sk_buff *skb,
204                                           const void *daddr)
205 {
206         struct rt6_info *rt = (struct rt6_info *) dst;
207         struct neighbour *n;
208
209         daddr = choose_neigh_daddr(rt, skb, daddr);
210         n = __ipv6_neigh_lookup(dst->dev, daddr);
211         if (n)
212                 return n;
213         return neigh_create(&nd_tbl, daddr, dst->dev);
214 }
215
216 static struct dst_ops ip6_dst_ops_template = {
217         .family                 =       AF_INET6,
218         .gc                     =       ip6_dst_gc,
219         .gc_thresh              =       1024,
220         .check                  =       ip6_dst_check,
221         .default_advmss         =       ip6_default_advmss,
222         .mtu                    =       ip6_mtu,
223         .cow_metrics            =       ipv6_cow_metrics,
224         .destroy                =       ip6_dst_destroy,
225         .ifdown                 =       ip6_dst_ifdown,
226         .negative_advice        =       ip6_negative_advice,
227         .link_failure           =       ip6_link_failure,
228         .update_pmtu            =       ip6_rt_update_pmtu,
229         .redirect               =       rt6_do_redirect,
230         .local_out              =       __ip6_local_out,
231         .neigh_lookup           =       ip6_neigh_lookup,
232 };
233
234 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
235 {
236         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238         return mtu ? : dst->dev->mtu;
239 }
240
241 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242                                          struct sk_buff *skb, u32 mtu)
243 {
244 }
245
246 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247                                       struct sk_buff *skb)
248 {
249 }
250
251 static struct dst_ops ip6_dst_blackhole_ops = {
252         .family                 =       AF_INET6,
253         .destroy                =       ip6_dst_destroy,
254         .check                  =       ip6_dst_check,
255         .mtu                    =       ip6_blackhole_mtu,
256         .default_advmss         =       ip6_default_advmss,
257         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
258         .redirect               =       ip6_rt_blackhole_redirect,
259         .cow_metrics            =       dst_cow_metrics_generic,
260         .neigh_lookup           =       ip6_neigh_lookup,
261 };
262
263 static const u32 ip6_template_metrics[RTAX_MAX] = {
264         [RTAX_HOPLIMIT - 1] = 0,
265 };
266
267 static const struct rt6_info ip6_null_entry_template = {
268         .dst = {
269                 .__refcnt       = ATOMIC_INIT(1),
270                 .__use          = 1,
271                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
272                 .error          = -ENETUNREACH,
273                 .input          = ip6_pkt_discard,
274                 .output         = ip6_pkt_discard_out,
275         },
276         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
277         .rt6i_protocol  = RTPROT_KERNEL,
278         .rt6i_metric    = ~(u32) 0,
279         .rt6i_ref       = ATOMIC_INIT(1),
280 };
281
282 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
283
284 static const struct rt6_info ip6_prohibit_entry_template = {
285         .dst = {
286                 .__refcnt       = ATOMIC_INIT(1),
287                 .__use          = 1,
288                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
289                 .error          = -EACCES,
290                 .input          = ip6_pkt_prohibit,
291                 .output         = ip6_pkt_prohibit_out,
292         },
293         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
294         .rt6i_protocol  = RTPROT_KERNEL,
295         .rt6i_metric    = ~(u32) 0,
296         .rt6i_ref       = ATOMIC_INIT(1),
297 };
298
299 static const struct rt6_info ip6_blk_hole_entry_template = {
300         .dst = {
301                 .__refcnt       = ATOMIC_INIT(1),
302                 .__use          = 1,
303                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
304                 .error          = -EINVAL,
305                 .input          = dst_discard,
306                 .output         = dst_discard_out,
307         },
308         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
309         .rt6i_protocol  = RTPROT_KERNEL,
310         .rt6i_metric    = ~(u32) 0,
311         .rt6i_ref       = ATOMIC_INIT(1),
312 };
313
314 #endif
315
316 static void rt6_info_init(struct rt6_info *rt)
317 {
318         struct dst_entry *dst = &rt->dst;
319
320         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321         INIT_LIST_HEAD(&rt->rt6i_siblings);
322         INIT_LIST_HEAD(&rt->rt6i_uncached);
323 }
324
325 /* allocate dst with ip6_dst_ops */
326 static struct rt6_info *__ip6_dst_alloc(struct net *net,
327                                         struct net_device *dev,
328                                         int flags)
329 {
330         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
331                                         0, DST_OBSOLETE_FORCE_CHK, flags);
332
333         if (rt)
334                 rt6_info_init(rt);
335
336         return rt;
337 }
338
339 static struct rt6_info *ip6_dst_alloc(struct net *net,
340                                       struct net_device *dev,
341                                       int flags)
342 {
343         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
344
345         if (rt) {
346                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347                 if (rt->rt6i_pcpu) {
348                         int cpu;
349
350                         for_each_possible_cpu(cpu) {
351                                 struct rt6_info **p;
352
353                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354                                 /* no one shares rt */
355                                 *p =  NULL;
356                         }
357                 } else {
358                         dst_destroy((struct dst_entry *)rt);
359                         return NULL;
360                 }
361         }
362
363         return rt;
364 }
365
366 static void ip6_dst_destroy(struct dst_entry *dst)
367 {
368         struct rt6_info *rt = (struct rt6_info *)dst;
369         struct dst_entry *from = dst->from;
370         struct inet6_dev *idev;
371
372         dst_destroy_metrics_generic(dst);
373         free_percpu(rt->rt6i_pcpu);
374         rt6_uncached_list_del(rt);
375
376         idev = rt->rt6i_idev;
377         if (idev) {
378                 rt->rt6i_idev = NULL;
379                 in6_dev_put(idev);
380         }
381
382         dst->from = NULL;
383         dst_release(from);
384 }
385
386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387                            int how)
388 {
389         struct rt6_info *rt = (struct rt6_info *)dst;
390         struct inet6_dev *idev = rt->rt6i_idev;
391         struct net_device *loopback_dev =
392                 dev_net(dev)->loopback_dev;
393
394         if (dev != loopback_dev) {
395                 if (idev && idev->dev == dev) {
396                         struct inet6_dev *loopback_idev =
397                                 in6_dev_get(loopback_dev);
398                         if (loopback_idev) {
399                                 rt->rt6i_idev = loopback_idev;
400                                 in6_dev_put(idev);
401                         }
402                 }
403         }
404 }
405
406 static bool __rt6_check_expired(const struct rt6_info *rt)
407 {
408         if (rt->rt6i_flags & RTF_EXPIRES)
409                 return time_after(jiffies, rt->dst.expires);
410         else
411                 return false;
412 }
413
414 static bool rt6_check_expired(const struct rt6_info *rt)
415 {
416         if (rt->rt6i_flags & RTF_EXPIRES) {
417                 if (time_after(jiffies, rt->dst.expires))
418                         return true;
419         } else if (rt->dst.from) {
420                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
421         }
422         return false;
423 }
424
425 /* Multipath route selection:
426  *   Hash based function using packet header and flowlabel.
427  * Adapted from fib_info_hashfn()
428  */
429 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
430                                const struct flowi6 *fl6)
431 {
432         return get_hash_from_flowi6(fl6) % candidate_count;
433 }
434
435 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
436                                              struct flowi6 *fl6, int oif,
437                                              int strict)
438 {
439         struct rt6_info *sibling, *next_sibling;
440         int route_choosen;
441
442         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
443         /* Don't change the route, if route_choosen == 0
444          * (siblings does not include ourself)
445          */
446         if (route_choosen)
447                 list_for_each_entry_safe(sibling, next_sibling,
448                                 &match->rt6i_siblings, rt6i_siblings) {
449                         route_choosen--;
450                         if (route_choosen == 0) {
451                                 if (rt6_score_route(sibling, oif, strict) < 0)
452                                         break;
453                                 match = sibling;
454                                 break;
455                         }
456                 }
457         return match;
458 }
459
460 /*
461  *      Route lookup. Any table->tb6_lock is implied.
462  */
463
464 static inline struct rt6_info *rt6_device_match(struct net *net,
465                                                     struct rt6_info *rt,
466                                                     const struct in6_addr *saddr,
467                                                     int oif,
468                                                     int flags)
469 {
470         struct rt6_info *local = NULL;
471         struct rt6_info *sprt;
472
473         if (!oif && ipv6_addr_any(saddr))
474                 goto out;
475
476         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
477                 struct net_device *dev = sprt->dst.dev;
478
479                 if (oif) {
480                         if (dev->ifindex == oif)
481                                 return sprt;
482                         if (dev->flags & IFF_LOOPBACK) {
483                                 if (!sprt->rt6i_idev ||
484                                     sprt->rt6i_idev->dev->ifindex != oif) {
485                                         if (flags & RT6_LOOKUP_F_IFACE)
486                                                 continue;
487                                         if (local &&
488                                             local->rt6i_idev->dev->ifindex == oif)
489                                                 continue;
490                                 }
491                                 local = sprt;
492                         }
493                 } else {
494                         if (ipv6_chk_addr(net, saddr, dev,
495                                           flags & RT6_LOOKUP_F_IFACE))
496                                 return sprt;
497                 }
498         }
499
500         if (oif) {
501                 if (local)
502                         return local;
503
504                 if (flags & RT6_LOOKUP_F_IFACE)
505                         return net->ipv6.ip6_null_entry;
506         }
507 out:
508         return rt;
509 }
510
511 #ifdef CONFIG_IPV6_ROUTER_PREF
512 struct __rt6_probe_work {
513         struct work_struct work;
514         struct in6_addr target;
515         struct net_device *dev;
516 };
517
518 static void rt6_probe_deferred(struct work_struct *w)
519 {
520         struct in6_addr mcaddr;
521         struct __rt6_probe_work *work =
522                 container_of(w, struct __rt6_probe_work, work);
523
524         addrconf_addr_solict_mult(&work->target, &mcaddr);
525         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
526         dev_put(work->dev);
527         kfree(work);
528 }
529
530 static void rt6_probe(struct rt6_info *rt)
531 {
532         struct __rt6_probe_work *work;
533         struct neighbour *neigh;
534         /*
535          * Okay, this does not seem to be appropriate
536          * for now, however, we need to check if it
537          * is really so; aka Router Reachability Probing.
538          *
539          * Router Reachability Probe MUST be rate-limited
540          * to no more than one per minute.
541          */
542         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
543                 return;
544         rcu_read_lock_bh();
545         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
546         if (neigh) {
547                 if (neigh->nud_state & NUD_VALID)
548                         goto out;
549
550                 work = NULL;
551                 write_lock(&neigh->lock);
552                 if (!(neigh->nud_state & NUD_VALID) &&
553                     time_after(jiffies,
554                                neigh->updated +
555                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
556                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
557                         if (work)
558                                 __neigh_set_probe_once(neigh);
559                 }
560                 write_unlock(&neigh->lock);
561         } else {
562                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563         }
564
565         if (work) {
566                 INIT_WORK(&work->work, rt6_probe_deferred);
567                 work->target = rt->rt6i_gateway;
568                 dev_hold(rt->dst.dev);
569                 work->dev = rt->dst.dev;
570                 schedule_work(&work->work);
571         }
572
573 out:
574         rcu_read_unlock_bh();
575 }
576 #else
577 static inline void rt6_probe(struct rt6_info *rt)
578 {
579 }
580 #endif
581
582 /*
583  * Default Router Selection (RFC 2461 6.3.6)
584  */
585 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
586 {
587         struct net_device *dev = rt->dst.dev;
588         if (!oif || dev->ifindex == oif)
589                 return 2;
590         if ((dev->flags & IFF_LOOPBACK) &&
591             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
592                 return 1;
593         return 0;
594 }
595
596 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
597 {
598         struct neighbour *neigh;
599         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
600
601         if (rt->rt6i_flags & RTF_NONEXTHOP ||
602             !(rt->rt6i_flags & RTF_GATEWAY))
603                 return RT6_NUD_SUCCEED;
604
605         rcu_read_lock_bh();
606         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
607         if (neigh) {
608                 read_lock(&neigh->lock);
609                 if (neigh->nud_state & NUD_VALID)
610                         ret = RT6_NUD_SUCCEED;
611 #ifdef CONFIG_IPV6_ROUTER_PREF
612                 else if (!(neigh->nud_state & NUD_FAILED))
613                         ret = RT6_NUD_SUCCEED;
614                 else
615                         ret = RT6_NUD_FAIL_PROBE;
616 #endif
617                 read_unlock(&neigh->lock);
618         } else {
619                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
620                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
621         }
622         rcu_read_unlock_bh();
623
624         return ret;
625 }
626
627 static int rt6_score_route(struct rt6_info *rt, int oif,
628                            int strict)
629 {
630         int m;
631
632         m = rt6_check_dev(rt, oif);
633         if (!m && (strict & RT6_LOOKUP_F_IFACE))
634                 return RT6_NUD_FAIL_HARD;
635 #ifdef CONFIG_IPV6_ROUTER_PREF
636         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
637 #endif
638         if (strict & RT6_LOOKUP_F_REACHABLE) {
639                 int n = rt6_check_neigh(rt);
640                 if (n < 0)
641                         return n;
642         }
643         return m;
644 }
645
646 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
647                                    int *mpri, struct rt6_info *match,
648                                    bool *do_rr)
649 {
650         int m;
651         bool match_do_rr = false;
652         struct inet6_dev *idev = rt->rt6i_idev;
653         struct net_device *dev = rt->dst.dev;
654
655         if (dev && !netif_carrier_ok(dev) &&
656             idev->cnf.ignore_routes_with_linkdown)
657                 goto out;
658
659         if (rt6_check_expired(rt))
660                 goto out;
661
662         m = rt6_score_route(rt, oif, strict);
663         if (m == RT6_NUD_FAIL_DO_RR) {
664                 match_do_rr = true;
665                 m = 0; /* lowest valid score */
666         } else if (m == RT6_NUD_FAIL_HARD) {
667                 goto out;
668         }
669
670         if (strict & RT6_LOOKUP_F_REACHABLE)
671                 rt6_probe(rt);
672
673         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
674         if (m > *mpri) {
675                 *do_rr = match_do_rr;
676                 *mpri = m;
677                 match = rt;
678         }
679 out:
680         return match;
681 }
682
683 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
684                                      struct rt6_info *rr_head,
685                                      u32 metric, int oif, int strict,
686                                      bool *do_rr)
687 {
688         struct rt6_info *rt, *match, *cont;
689         int mpri = -1;
690
691         match = NULL;
692         cont = NULL;
693         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
694                 if (rt->rt6i_metric != metric) {
695                         cont = rt;
696                         break;
697                 }
698
699                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
700         }
701
702         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
703                 if (rt->rt6i_metric != metric) {
704                         cont = rt;
705                         break;
706                 }
707
708                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
709         }
710
711         if (match || !cont)
712                 return match;
713
714         for (rt = cont; rt; rt = rt->dst.rt6_next)
715                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716
717         return match;
718 }
719
720 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
721 {
722         struct rt6_info *match, *rt0;
723         struct net *net;
724         bool do_rr = false;
725
726         rt0 = fn->rr_ptr;
727         if (!rt0)
728                 fn->rr_ptr = rt0 = fn->leaf;
729
730         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
731                              &do_rr);
732
733         if (do_rr) {
734                 struct rt6_info *next = rt0->dst.rt6_next;
735
736                 /* no entries matched; do round-robin */
737                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
738                         next = fn->leaf;
739
740                 if (next != rt0)
741                         fn->rr_ptr = next;
742         }
743
744         net = dev_net(rt0->dst.dev);
745         return match ? match : net->ipv6.ip6_null_entry;
746 }
747
748 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
749 {
750         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
751 }
752
753 #ifdef CONFIG_IPV6_ROUTE_INFO
754 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
755                   const struct in6_addr *gwaddr)
756 {
757         struct route_info *rinfo = (struct route_info *) opt;
758         struct in6_addr prefix_buf, *prefix;
759         unsigned int pref;
760         unsigned long lifetime;
761         struct rt6_info *rt;
762
763         if (len < sizeof(struct route_info)) {
764                 return -EINVAL;
765         }
766
767         /* Sanity check for prefix_len and length */
768         if (rinfo->length > 3) {
769                 return -EINVAL;
770         } else if (rinfo->prefix_len > 128) {
771                 return -EINVAL;
772         } else if (rinfo->prefix_len > 64) {
773                 if (rinfo->length < 2) {
774                         return -EINVAL;
775                 }
776         } else if (rinfo->prefix_len > 0) {
777                 if (rinfo->length < 1) {
778                         return -EINVAL;
779                 }
780         }
781
782         pref = rinfo->route_pref;
783         if (pref == ICMPV6_ROUTER_PREF_INVALID)
784                 return -EINVAL;
785
786         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
787
788         if (rinfo->length == 3)
789                 prefix = (struct in6_addr *)rinfo->prefix;
790         else {
791                 /* this function is safe */
792                 ipv6_addr_prefix(&prefix_buf,
793                                  (struct in6_addr *)rinfo->prefix,
794                                  rinfo->prefix_len);
795                 prefix = &prefix_buf;
796         }
797
798         if (rinfo->prefix_len == 0)
799                 rt = rt6_get_dflt_router(gwaddr, dev);
800         else
801                 rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
802
803         if (rt && !lifetime) {
804                 ip6_del_rt(rt);
805                 rt = NULL;
806         }
807
808         if (!rt && lifetime)
809                 rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
810         else if (rt)
811                 rt->rt6i_flags = RTF_ROUTEINFO |
812                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
813
814         if (rt) {
815                 if (!addrconf_finite_timeout(lifetime))
816                         rt6_clean_expires(rt);
817                 else
818                         rt6_set_expires(rt, jiffies + HZ * lifetime);
819
820                 ip6_rt_put(rt);
821         }
822         return 0;
823 }
824 #endif
825
826 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
827                                         struct in6_addr *saddr)
828 {
829         struct fib6_node *pn;
830         while (1) {
831                 if (fn->fn_flags & RTN_TL_ROOT)
832                         return NULL;
833                 pn = fn->parent;
834                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
835                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
836                 else
837                         fn = pn;
838                 if (fn->fn_flags & RTN_RTINFO)
839                         return fn;
840         }
841 }
842
843 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
844                                              struct fib6_table *table,
845                                              struct flowi6 *fl6, int flags)
846 {
847         struct fib6_node *fn;
848         struct rt6_info *rt;
849
850         read_lock_bh(&table->tb6_lock);
851         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
852 restart:
853         rt = fn->leaf;
854         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
855         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
856                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
857         if (rt == net->ipv6.ip6_null_entry) {
858                 fn = fib6_backtrack(fn, &fl6->saddr);
859                 if (fn)
860                         goto restart;
861         }
862         dst_use(&rt->dst, jiffies);
863         read_unlock_bh(&table->tb6_lock);
864         return rt;
865
866 }
867
868 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
869                                     int flags)
870 {
871         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
872 }
873 EXPORT_SYMBOL_GPL(ip6_route_lookup);
874
875 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
876                             const struct in6_addr *saddr, int oif, int strict)
877 {
878         struct flowi6 fl6 = {
879                 .flowi6_oif = oif,
880                 .daddr = *daddr,
881         };
882         struct dst_entry *dst;
883         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
884
885         if (saddr) {
886                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
887                 flags |= RT6_LOOKUP_F_HAS_SADDR;
888         }
889
890         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
891         if (dst->error == 0)
892                 return (struct rt6_info *) dst;
893
894         dst_release(dst);
895
896         return NULL;
897 }
898 EXPORT_SYMBOL(rt6_lookup);
899
900 /* ip6_ins_rt is called with FREE table->tb6_lock.
901    It takes new route entry, the addition fails by any reason the
902    route is freed. In any case, if caller does not hold it, it may
903    be destroyed.
904  */
905
906 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
907                         struct mx6_config *mxc)
908 {
909         int err;
910         struct fib6_table *table;
911
912         table = rt->rt6i_table;
913         write_lock_bh(&table->tb6_lock);
914         err = fib6_add(&table->tb6_root, rt, info, mxc);
915         write_unlock_bh(&table->tb6_lock);
916
917         return err;
918 }
919
920 int ip6_ins_rt(struct rt6_info *rt)
921 {
922         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
923         struct mx6_config mxc = { .mx = NULL, };
924
925         return __ip6_ins_rt(rt, &info, &mxc);
926 }
927
928 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
929                                            const struct in6_addr *daddr,
930                                            const struct in6_addr *saddr)
931 {
932         struct rt6_info *rt;
933
934         /*
935          *      Clone the route.
936          */
937
938         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
939                 ort = (struct rt6_info *)ort->dst.from;
940
941         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
942
943         if (!rt)
944                 return NULL;
945
946         ip6_rt_copy_init(rt, ort);
947         rt->rt6i_flags |= RTF_CACHE;
948         rt->rt6i_metric = 0;
949         rt->dst.flags |= DST_HOST;
950         rt->rt6i_dst.addr = *daddr;
951         rt->rt6i_dst.plen = 128;
952
953         if (!rt6_is_gw_or_nonexthop(ort)) {
954                 if (ort->rt6i_dst.plen != 128 &&
955                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
956                         rt->rt6i_flags |= RTF_ANYCAST;
957 #ifdef CONFIG_IPV6_SUBTREES
958                 if (rt->rt6i_src.plen && saddr) {
959                         rt->rt6i_src.addr = *saddr;
960                         rt->rt6i_src.plen = 128;
961                 }
962 #endif
963         }
964
965         return rt;
966 }
967
968 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
969 {
970         struct rt6_info *pcpu_rt;
971
972         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
973                                   rt->dst.dev, rt->dst.flags);
974
975         if (!pcpu_rt)
976                 return NULL;
977         ip6_rt_copy_init(pcpu_rt, rt);
978         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
979         pcpu_rt->rt6i_flags |= RTF_PCPU;
980         return pcpu_rt;
981 }
982
983 /* It should be called with read_lock_bh(&tb6_lock) acquired */
984 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
985 {
986         struct rt6_info *pcpu_rt, **p;
987
988         p = this_cpu_ptr(rt->rt6i_pcpu);
989         pcpu_rt = *p;
990
991         if (pcpu_rt) {
992                 dst_hold(&pcpu_rt->dst);
993                 rt6_dst_from_metrics_check(pcpu_rt);
994         }
995         return pcpu_rt;
996 }
997
998 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
999 {
1000         struct fib6_table *table = rt->rt6i_table;
1001         struct rt6_info *pcpu_rt, *prev, **p;
1002
1003         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1004         if (!pcpu_rt) {
1005                 struct net *net = dev_net(rt->dst.dev);
1006
1007                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1008                 return net->ipv6.ip6_null_entry;
1009         }
1010
1011         read_lock_bh(&table->tb6_lock);
1012         if (rt->rt6i_pcpu) {
1013                 p = this_cpu_ptr(rt->rt6i_pcpu);
1014                 prev = cmpxchg(p, NULL, pcpu_rt);
1015                 if (prev) {
1016                         /* If someone did it before us, return prev instead */
1017                         dst_destroy(&pcpu_rt->dst);
1018                         pcpu_rt = prev;
1019                 }
1020         } else {
1021                 /* rt has been removed from the fib6 tree
1022                  * before we have a chance to acquire the read_lock.
1023                  * In this case, don't brother to create a pcpu rt
1024                  * since rt is going away anyway.  The next
1025                  * dst_check() will trigger a re-lookup.
1026                  */
1027                 dst_destroy(&pcpu_rt->dst);
1028                 pcpu_rt = rt;
1029         }
1030         dst_hold(&pcpu_rt->dst);
1031         rt6_dst_from_metrics_check(pcpu_rt);
1032         read_unlock_bh(&table->tb6_lock);
1033         return pcpu_rt;
1034 }
1035
1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1037                                       struct flowi6 *fl6, int flags)
1038 {
1039         struct fib6_node *fn, *saved_fn;
1040         struct rt6_info *rt;
1041         int strict = 0;
1042
1043         strict |= flags & RT6_LOOKUP_F_IFACE;
1044         if (net->ipv6.devconf_all->forwarding == 0)
1045                 strict |= RT6_LOOKUP_F_REACHABLE;
1046
1047         read_lock_bh(&table->tb6_lock);
1048
1049         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1050         saved_fn = fn;
1051
1052         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1053                 oif = 0;
1054
1055 redo_rt6_select:
1056         rt = rt6_select(fn, oif, strict);
1057         if (rt->rt6i_nsiblings)
1058                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1059         if (rt == net->ipv6.ip6_null_entry) {
1060                 fn = fib6_backtrack(fn, &fl6->saddr);
1061                 if (fn)
1062                         goto redo_rt6_select;
1063                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1064                         /* also consider unreachable route */
1065                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1066                         fn = saved_fn;
1067                         goto redo_rt6_select;
1068                 }
1069         }
1070
1071
1072         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1073                 dst_use(&rt->dst, jiffies);
1074                 read_unlock_bh(&table->tb6_lock);
1075
1076                 rt6_dst_from_metrics_check(rt);
1077                 return rt;
1078         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1079                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1080                 /* Create a RTF_CACHE clone which will not be
1081                  * owned by the fib6 tree.  It is for the special case where
1082                  * the daddr in the skb during the neighbor look-up is different
1083                  * from the fl6->daddr used to look-up route here.
1084                  */
1085
1086                 struct rt6_info *uncached_rt;
1087
1088                 dst_use(&rt->dst, jiffies);
1089                 read_unlock_bh(&table->tb6_lock);
1090
1091                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1092                 dst_release(&rt->dst);
1093
1094                 if (uncached_rt)
1095                         rt6_uncached_list_add(uncached_rt);
1096                 else
1097                         uncached_rt = net->ipv6.ip6_null_entry;
1098
1099                 dst_hold(&uncached_rt->dst);
1100                 return uncached_rt;
1101
1102         } else {
1103                 /* Get a percpu copy */
1104
1105                 struct rt6_info *pcpu_rt;
1106
1107                 rt->dst.lastuse = jiffies;
1108                 rt->dst.__use++;
1109                 pcpu_rt = rt6_get_pcpu_route(rt);
1110
1111                 if (pcpu_rt) {
1112                         read_unlock_bh(&table->tb6_lock);
1113                 } else {
1114                         /* We have to do the read_unlock first
1115                          * because rt6_make_pcpu_route() may trigger
1116                          * ip6_dst_gc() which will take the write_lock.
1117                          */
1118                         dst_hold(&rt->dst);
1119                         read_unlock_bh(&table->tb6_lock);
1120                         pcpu_rt = rt6_make_pcpu_route(rt);
1121                         dst_release(&rt->dst);
1122                 }
1123
1124                 return pcpu_rt;
1125
1126         }
1127 }
1128
1129 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1130                                             struct flowi6 *fl6, int flags)
1131 {
1132         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1133 }
1134
1135 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1136                                                 struct net_device *dev,
1137                                                 struct flowi6 *fl6, int flags)
1138 {
1139         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1140                 flags |= RT6_LOOKUP_F_IFACE;
1141
1142         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1143 }
1144
1145 void ip6_route_input(struct sk_buff *skb)
1146 {
1147         const struct ipv6hdr *iph = ipv6_hdr(skb);
1148         struct net *net = dev_net(skb->dev);
1149         int flags = RT6_LOOKUP_F_HAS_SADDR;
1150         struct ip_tunnel_info *tun_info;
1151         struct flowi6 fl6 = {
1152                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1153                 .daddr = iph->daddr,
1154                 .saddr = iph->saddr,
1155                 .flowlabel = ip6_flowinfo(iph),
1156                 .flowi6_mark = skb->mark,
1157                 .flowi6_proto = iph->nexthdr,
1158         };
1159
1160         tun_info = skb_tunnel_info(skb);
1161         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1162                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1163         skb_dst_drop(skb);
1164         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1165 }
1166
1167 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1168                                              struct flowi6 *fl6, int flags)
1169 {
1170         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1171 }
1172
1173 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1174                                          struct flowi6 *fl6, int flags)
1175 {
1176         struct dst_entry *dst;
1177         bool any_src;
1178
1179         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1180         if (dst)
1181                 return dst;
1182
1183         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1184
1185         any_src = ipv6_addr_any(&fl6->saddr);
1186         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1187             (fl6->flowi6_oif && any_src))
1188                 flags |= RT6_LOOKUP_F_IFACE;
1189
1190         if (!any_src)
1191                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1192         else if (sk)
1193                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1194
1195         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1198
1199 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1200 {
1201         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1202         struct dst_entry *new = NULL;
1203
1204         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1205         if (rt) {
1206                 rt6_info_init(rt);
1207
1208                 new = &rt->dst;
1209                 new->__use = 1;
1210                 new->input = dst_discard;
1211                 new->output = dst_discard_out;
1212
1213                 dst_copy_metrics(new, &ort->dst);
1214                 rt->rt6i_idev = ort->rt6i_idev;
1215                 if (rt->rt6i_idev)
1216                         in6_dev_hold(rt->rt6i_idev);
1217
1218                 rt->rt6i_gateway = ort->rt6i_gateway;
1219                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1220                 rt->rt6i_metric = 0;
1221
1222                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223 #ifdef CONFIG_IPV6_SUBTREES
1224                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1225 #endif
1226
1227                 dst_free(new);
1228         }
1229
1230         dst_release(dst_orig);
1231         return new ? new : ERR_PTR(-ENOMEM);
1232 }
1233
1234 /*
1235  *      Destination cache support functions
1236  */
1237
1238 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1239 {
1240         if (rt->dst.from &&
1241             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1242                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1243 }
1244
1245 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1246 {
1247         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1248                 return NULL;
1249
1250         if (rt6_check_expired(rt))
1251                 return NULL;
1252
1253         return &rt->dst;
1254 }
1255
1256 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1257 {
1258         if (!__rt6_check_expired(rt) &&
1259             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1260             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1261                 return &rt->dst;
1262         else
1263                 return NULL;
1264 }
1265
1266 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1267 {
1268         struct rt6_info *rt;
1269
1270         rt = (struct rt6_info *) dst;
1271
1272         /* All IPV6 dsts are created with ->obsolete set to the value
1273          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1274          * into this function always.
1275          */
1276
1277         rt6_dst_from_metrics_check(rt);
1278
1279         if (rt->rt6i_flags & RTF_PCPU ||
1280             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1281                 return rt6_dst_from_check(rt, cookie);
1282         else
1283                 return rt6_check(rt, cookie);
1284 }
1285
1286 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1287 {
1288         struct rt6_info *rt = (struct rt6_info *) dst;
1289
1290         if (rt) {
1291                 if (rt->rt6i_flags & RTF_CACHE) {
1292                         if (rt6_check_expired(rt)) {
1293                                 ip6_del_rt(rt);
1294                                 dst = NULL;
1295                         }
1296                 } else {
1297                         dst_release(dst);
1298                         dst = NULL;
1299                 }
1300         }
1301         return dst;
1302 }
1303
1304 static void ip6_link_failure(struct sk_buff *skb)
1305 {
1306         struct rt6_info *rt;
1307
1308         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1309
1310         rt = (struct rt6_info *) skb_dst(skb);
1311         if (rt) {
1312                 if (rt->rt6i_flags & RTF_CACHE) {
1313                         dst_hold(&rt->dst);
1314                         ip6_del_rt(rt);
1315                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1316                         rt->rt6i_node->fn_sernum = -1;
1317                 }
1318         }
1319 }
1320
1321 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 {
1323         struct net *net = dev_net(rt->dst.dev);
1324
1325         rt->rt6i_flags |= RTF_MODIFIED;
1326         rt->rt6i_pmtu = mtu;
1327         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1328 }
1329
1330 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1331 {
1332         return !(rt->rt6i_flags & RTF_CACHE) &&
1333                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1334 }
1335
1336 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1337                                  const struct ipv6hdr *iph, u32 mtu)
1338 {
1339         struct rt6_info *rt6 = (struct rt6_info *)dst;
1340
1341         if (rt6->rt6i_flags & RTF_LOCAL)
1342                 return;
1343
1344         dst_confirm(dst);
1345         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1346         if (mtu >= dst_mtu(dst))
1347                 return;
1348
1349         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1350                 rt6_do_update_pmtu(rt6, mtu);
1351         } else {
1352                 const struct in6_addr *daddr, *saddr;
1353                 struct rt6_info *nrt6;
1354
1355                 if (iph) {
1356                         daddr = &iph->daddr;
1357                         saddr = &iph->saddr;
1358                 } else if (sk) {
1359                         daddr = &sk->sk_v6_daddr;
1360                         saddr = &inet6_sk(sk)->saddr;
1361                 } else {
1362                         return;
1363                 }
1364                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1365                 if (nrt6) {
1366                         rt6_do_update_pmtu(nrt6, mtu);
1367
1368                         /* ip6_ins_rt(nrt6) will bump the
1369                          * rt6->rt6i_node->fn_sernum
1370                          * which will fail the next rt6_check() and
1371                          * invalidate the sk->sk_dst_cache.
1372                          */
1373                         ip6_ins_rt(nrt6);
1374                 }
1375         }
1376 }
1377
1378 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1379                                struct sk_buff *skb, u32 mtu)
1380 {
1381         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1382 }
1383
1384 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1385                      int oif, u32 mark, kuid_t uid)
1386 {
1387         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1388         struct dst_entry *dst;
1389         struct flowi6 fl6;
1390
1391         memset(&fl6, 0, sizeof(fl6));
1392         fl6.flowi6_oif = oif;
1393         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1394         fl6.daddr = iph->daddr;
1395         fl6.saddr = iph->saddr;
1396         fl6.flowlabel = ip6_flowinfo(iph);
1397         fl6.flowi6_uid = uid;
1398
1399         dst = ip6_route_output(net, NULL, &fl6);
1400         if (!dst->error)
1401                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1402         dst_release(dst);
1403 }
1404 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1405
1406 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1407 {
1408         ip6_update_pmtu(skb, sock_net(sk), mtu,
1409                         sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk));
1410 }
1411 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1412
1413 /* Handle redirects */
1414 struct ip6rd_flowi {
1415         struct flowi6 fl6;
1416         struct in6_addr gateway;
1417 };
1418
1419 static struct rt6_info *__ip6_route_redirect(struct net *net,
1420                                              struct fib6_table *table,
1421                                              struct flowi6 *fl6,
1422                                              int flags)
1423 {
1424         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1425         struct rt6_info *rt;
1426         struct fib6_node *fn;
1427
1428         /* Get the "current" route for this destination and
1429          * check if the redirect has come from approriate router.
1430          *
1431          * RFC 4861 specifies that redirects should only be
1432          * accepted if they come from the nexthop to the target.
1433          * Due to the way the routes are chosen, this notion
1434          * is a bit fuzzy and one might need to check all possible
1435          * routes.
1436          */
1437
1438         read_lock_bh(&table->tb6_lock);
1439         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1440 restart:
1441         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1442                 if (rt6_check_expired(rt))
1443                         continue;
1444                 if (rt->dst.error)
1445                         break;
1446                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447                         continue;
1448                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1449                         continue;
1450                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1451                         continue;
1452                 break;
1453         }
1454
1455         if (!rt)
1456                 rt = net->ipv6.ip6_null_entry;
1457         else if (rt->dst.error) {
1458                 rt = net->ipv6.ip6_null_entry;
1459                 goto out;
1460         }
1461
1462         if (rt == net->ipv6.ip6_null_entry) {
1463                 fn = fib6_backtrack(fn, &fl6->saddr);
1464                 if (fn)
1465                         goto restart;
1466         }
1467
1468 out:
1469         dst_hold(&rt->dst);
1470
1471         read_unlock_bh(&table->tb6_lock);
1472
1473         return rt;
1474 };
1475
1476 static struct dst_entry *ip6_route_redirect(struct net *net,
1477                                         const struct flowi6 *fl6,
1478                                         const struct in6_addr *gateway)
1479 {
1480         int flags = RT6_LOOKUP_F_HAS_SADDR;
1481         struct ip6rd_flowi rdfl;
1482
1483         rdfl.fl6 = *fl6;
1484         rdfl.gateway = *gateway;
1485
1486         return fib6_rule_lookup(net, &rdfl.fl6,
1487                                 flags, __ip6_route_redirect);
1488 }
1489
1490 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1491 {
1492         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1493         struct dst_entry *dst;
1494         struct flowi6 fl6;
1495
1496         memset(&fl6, 0, sizeof(fl6));
1497         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1498         fl6.flowi6_oif = oif;
1499         fl6.flowi6_mark = mark;
1500         fl6.daddr = iph->daddr;
1501         fl6.saddr = iph->saddr;
1502         fl6.flowlabel = ip6_flowinfo(iph);
1503
1504         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1505         rt6_do_redirect(dst, NULL, skb);
1506         dst_release(dst);
1507 }
1508 EXPORT_SYMBOL_GPL(ip6_redirect);
1509
1510 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1511                             u32 mark)
1512 {
1513         const struct ipv6hdr *iph = ipv6_hdr(skb);
1514         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1515         struct dst_entry *dst;
1516         struct flowi6 fl6;
1517
1518         memset(&fl6, 0, sizeof(fl6));
1519         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1520         fl6.flowi6_oif = oif;
1521         fl6.flowi6_mark = mark;
1522         fl6.daddr = msg->dest;
1523         fl6.saddr = iph->daddr;
1524
1525         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1526         rt6_do_redirect(dst, NULL, skb);
1527         dst_release(dst);
1528 }
1529
1530 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1531 {
1532         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1533 }
1534 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1535
1536 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1537 {
1538         struct net_device *dev = dst->dev;
1539         unsigned int mtu = dst_mtu(dst);
1540         struct net *net = dev_net(dev);
1541
1542         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1543
1544         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1545                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1546
1547         /*
1548          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1549          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1550          * IPV6_MAXPLEN is also valid and means: "any MSS,
1551          * rely only on pmtu discovery"
1552          */
1553         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1554                 mtu = IPV6_MAXPLEN;
1555         return mtu;
1556 }
1557
1558 static unsigned int ip6_mtu(const struct dst_entry *dst)
1559 {
1560         const struct rt6_info *rt = (const struct rt6_info *)dst;
1561         unsigned int mtu = rt->rt6i_pmtu;
1562         struct inet6_dev *idev;
1563
1564         if (mtu)
1565                 goto out;
1566
1567         mtu = dst_metric_raw(dst, RTAX_MTU);
1568         if (mtu)
1569                 goto out;
1570
1571         mtu = IPV6_MIN_MTU;
1572
1573         rcu_read_lock();
1574         idev = __in6_dev_get(dst->dev);
1575         if (idev)
1576                 mtu = idev->cnf.mtu6;
1577         rcu_read_unlock();
1578
1579 out:
1580         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1581 }
1582
1583 static struct dst_entry *icmp6_dst_gc_list;
1584 static DEFINE_SPINLOCK(icmp6_dst_lock);
1585
1586 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1587                                   struct flowi6 *fl6)
1588 {
1589         struct dst_entry *dst;
1590         struct rt6_info *rt;
1591         struct inet6_dev *idev = in6_dev_get(dev);
1592         struct net *net = dev_net(dev);
1593
1594         if (unlikely(!idev))
1595                 return ERR_PTR(-ENODEV);
1596
1597         rt = ip6_dst_alloc(net, dev, 0);
1598         if (unlikely(!rt)) {
1599                 in6_dev_put(idev);
1600                 dst = ERR_PTR(-ENOMEM);
1601                 goto out;
1602         }
1603
1604         rt->dst.flags |= DST_HOST;
1605         rt->dst.output  = ip6_output;
1606         atomic_set(&rt->dst.__refcnt, 1);
1607         rt->rt6i_gateway  = fl6->daddr;
1608         rt->rt6i_dst.addr = fl6->daddr;
1609         rt->rt6i_dst.plen = 128;
1610         rt->rt6i_idev     = idev;
1611         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1612
1613         spin_lock_bh(&icmp6_dst_lock);
1614         rt->dst.next = icmp6_dst_gc_list;
1615         icmp6_dst_gc_list = &rt->dst;
1616         spin_unlock_bh(&icmp6_dst_lock);
1617
1618         fib6_force_start_gc(net);
1619
1620         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1621
1622 out:
1623         return dst;
1624 }
1625
1626 int icmp6_dst_gc(void)
1627 {
1628         struct dst_entry *dst, **pprev;
1629         int more = 0;
1630
1631         spin_lock_bh(&icmp6_dst_lock);
1632         pprev = &icmp6_dst_gc_list;
1633
1634         while ((dst = *pprev) != NULL) {
1635                 if (!atomic_read(&dst->__refcnt)) {
1636                         *pprev = dst->next;
1637                         dst_free(dst);
1638                 } else {
1639                         pprev = &dst->next;
1640                         ++more;
1641                 }
1642         }
1643
1644         spin_unlock_bh(&icmp6_dst_lock);
1645
1646         return more;
1647 }
1648
1649 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1650                             void *arg)
1651 {
1652         struct dst_entry *dst, **pprev;
1653
1654         spin_lock_bh(&icmp6_dst_lock);
1655         pprev = &icmp6_dst_gc_list;
1656         while ((dst = *pprev) != NULL) {
1657                 struct rt6_info *rt = (struct rt6_info *) dst;
1658                 if (func(rt, arg)) {
1659                         *pprev = dst->next;
1660                         dst_free(dst);
1661                 } else {
1662                         pprev = &dst->next;
1663                 }
1664         }
1665         spin_unlock_bh(&icmp6_dst_lock);
1666 }
1667
1668 static int ip6_dst_gc(struct dst_ops *ops)
1669 {
1670         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1671         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1672         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1673         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1674         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1675         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1676         int entries;
1677
1678         entries = dst_entries_get_fast(ops);
1679         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1680             entries <= rt_max_size)
1681                 goto out;
1682
1683         net->ipv6.ip6_rt_gc_expire++;
1684         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1685         entries = dst_entries_get_slow(ops);
1686         if (entries < ops->gc_thresh)
1687                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1688 out:
1689         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1690         return entries > rt_max_size;
1691 }
1692
1693 static int ip6_convert_metrics(struct mx6_config *mxc,
1694                                const struct fib6_config *cfg)
1695 {
1696         bool ecn_ca = false;
1697         struct nlattr *nla;
1698         int remaining;
1699         u32 *mp;
1700
1701         if (!cfg->fc_mx)
1702                 return 0;
1703
1704         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1705         if (unlikely(!mp))
1706                 return -ENOMEM;
1707
1708         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1709                 int type = nla_type(nla);
1710                 u32 val;
1711
1712                 if (!type)
1713                         continue;
1714                 if (unlikely(type > RTAX_MAX))
1715                         goto err;
1716
1717                 if (type == RTAX_CC_ALGO) {
1718                         char tmp[TCP_CA_NAME_MAX];
1719
1720                         nla_strlcpy(tmp, nla, sizeof(tmp));
1721                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1722                         if (val == TCP_CA_UNSPEC)
1723                                 goto err;
1724                 } else {
1725                         val = nla_get_u32(nla);
1726                 }
1727                 if (type == RTAX_HOPLIMIT && val > 255)
1728                         val = 255;
1729                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1730                         goto err;
1731
1732                 mp[type - 1] = val;
1733                 __set_bit(type - 1, mxc->mx_valid);
1734         }
1735
1736         if (ecn_ca) {
1737                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1738                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1739         }
1740
1741         mxc->mx = mp;
1742         return 0;
1743  err:
1744         kfree(mp);
1745         return -EINVAL;
1746 }
1747
1748 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1749 {
1750         struct net *net = cfg->fc_nlinfo.nl_net;
1751         struct rt6_info *rt = NULL;
1752         struct net_device *dev = NULL;
1753         struct inet6_dev *idev = NULL;
1754         struct fib6_table *table;
1755         int addr_type;
1756         int err = -EINVAL;
1757
1758         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1759                 goto out;
1760 #ifndef CONFIG_IPV6_SUBTREES
1761         if (cfg->fc_src_len)
1762                 goto out;
1763 #endif
1764         if (cfg->fc_ifindex) {
1765                 err = -ENODEV;
1766                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1767                 if (!dev)
1768                         goto out;
1769                 idev = in6_dev_get(dev);
1770                 if (!idev)
1771                         goto out;
1772         }
1773
1774         if (cfg->fc_metric == 0)
1775                 cfg->fc_metric = IP6_RT_PRIO_USER;
1776
1777         err = -ENOBUFS;
1778         if (cfg->fc_nlinfo.nlh &&
1779             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1780                 table = fib6_get_table(net, cfg->fc_table);
1781                 if (!table) {
1782                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1783                         table = fib6_new_table(net, cfg->fc_table);
1784                 }
1785         } else {
1786                 table = fib6_new_table(net, cfg->fc_table);
1787         }
1788
1789         if (!table)
1790                 goto out;
1791
1792         rt = ip6_dst_alloc(net, NULL,
1793                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1794
1795         if (!rt) {
1796                 err = -ENOMEM;
1797                 goto out;
1798         }
1799
1800         if (cfg->fc_flags & RTF_EXPIRES)
1801                 rt6_set_expires(rt, jiffies +
1802                                 clock_t_to_jiffies(cfg->fc_expires));
1803         else
1804                 rt6_clean_expires(rt);
1805
1806         if (cfg->fc_protocol == RTPROT_UNSPEC)
1807                 cfg->fc_protocol = RTPROT_BOOT;
1808         rt->rt6i_protocol = cfg->fc_protocol;
1809
1810         addr_type = ipv6_addr_type(&cfg->fc_dst);
1811
1812         if (addr_type & IPV6_ADDR_MULTICAST)
1813                 rt->dst.input = ip6_mc_input;
1814         else if (cfg->fc_flags & RTF_LOCAL)
1815                 rt->dst.input = ip6_input;
1816         else
1817                 rt->dst.input = ip6_forward;
1818
1819         rt->dst.output = ip6_output;
1820
1821         if (cfg->fc_encap) {
1822                 struct lwtunnel_state *lwtstate;
1823
1824                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1825                                            cfg->fc_encap, AF_INET6, cfg,
1826                                            &lwtstate);
1827                 if (err)
1828                         goto out;
1829                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1830                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1831                         rt->dst.lwtstate->orig_output = rt->dst.output;
1832                         rt->dst.output = lwtunnel_output;
1833                 }
1834                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1835                         rt->dst.lwtstate->orig_input = rt->dst.input;
1836                         rt->dst.input = lwtunnel_input;
1837                 }
1838         }
1839
1840         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1841         rt->rt6i_dst.plen = cfg->fc_dst_len;
1842         if (rt->rt6i_dst.plen == 128)
1843                 rt->dst.flags |= DST_HOST;
1844
1845 #ifdef CONFIG_IPV6_SUBTREES
1846         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1847         rt->rt6i_src.plen = cfg->fc_src_len;
1848 #endif
1849
1850         rt->rt6i_metric = cfg->fc_metric;
1851
1852         /* We cannot add true routes via loopback here,
1853            they would result in kernel looping; promote them to reject routes
1854          */
1855         if ((cfg->fc_flags & RTF_REJECT) ||
1856             (dev && (dev->flags & IFF_LOOPBACK) &&
1857              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1858              !(cfg->fc_flags & RTF_LOCAL))) {
1859                 /* hold loopback dev/idev if we haven't done so. */
1860                 if (dev != net->loopback_dev) {
1861                         if (dev) {
1862                                 dev_put(dev);
1863                                 in6_dev_put(idev);
1864                         }
1865                         dev = net->loopback_dev;
1866                         dev_hold(dev);
1867                         idev = in6_dev_get(dev);
1868                         if (!idev) {
1869                                 err = -ENODEV;
1870                                 goto out;
1871                         }
1872                 }
1873                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1874                 switch (cfg->fc_type) {
1875                 case RTN_BLACKHOLE:
1876                         rt->dst.error = -EINVAL;
1877                         rt->dst.output = dst_discard_out;
1878                         rt->dst.input = dst_discard;
1879                         break;
1880                 case RTN_PROHIBIT:
1881                         rt->dst.error = -EACCES;
1882                         rt->dst.output = ip6_pkt_prohibit_out;
1883                         rt->dst.input = ip6_pkt_prohibit;
1884                         break;
1885                 case RTN_THROW:
1886                 case RTN_UNREACHABLE:
1887                 default:
1888                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1889                                         : (cfg->fc_type == RTN_UNREACHABLE)
1890                                         ? -EHOSTUNREACH : -ENETUNREACH;
1891                         rt->dst.output = ip6_pkt_discard_out;
1892                         rt->dst.input = ip6_pkt_discard;
1893                         break;
1894                 }
1895                 goto install_route;
1896         }
1897
1898         if (cfg->fc_flags & RTF_GATEWAY) {
1899                 const struct in6_addr *gw_addr;
1900                 int gwa_type;
1901
1902                 gw_addr = &cfg->fc_gateway;
1903                 gwa_type = ipv6_addr_type(gw_addr);
1904
1905                 /* if gw_addr is local we will fail to detect this in case
1906                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1907                  * will return already-added prefix route via interface that
1908                  * prefix route was assigned to, which might be non-loopback.
1909                  */
1910                 err = -EINVAL;
1911                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1912                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1913                                             dev : NULL, 0, 0))
1914                         goto out;
1915
1916                 rt->rt6i_gateway = *gw_addr;
1917
1918                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1919                         struct rt6_info *grt;
1920
1921                         /* IPv6 strictly inhibits using not link-local
1922                            addresses as nexthop address.
1923                            Otherwise, router will not able to send redirects.
1924                            It is very good, but in some (rare!) circumstances
1925                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1926                            some exceptions. --ANK
1927                          */
1928                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1929                                 goto out;
1930
1931                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1932
1933                         err = -EHOSTUNREACH;
1934                         if (!grt)
1935                                 goto out;
1936                         if (dev) {
1937                                 if (dev != grt->dst.dev) {
1938                                         ip6_rt_put(grt);
1939                                         goto out;
1940                                 }
1941                         } else {
1942                                 dev = grt->dst.dev;
1943                                 idev = grt->rt6i_idev;
1944                                 dev_hold(dev);
1945                                 in6_dev_hold(grt->rt6i_idev);
1946                         }
1947                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1948                                 err = 0;
1949                         ip6_rt_put(grt);
1950
1951                         if (err)
1952                                 goto out;
1953                 }
1954                 err = -EINVAL;
1955                 if (!dev || (dev->flags & IFF_LOOPBACK))
1956                         goto out;
1957         }
1958
1959         err = -ENODEV;
1960         if (!dev)
1961                 goto out;
1962
1963         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1964                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1965                         err = -EINVAL;
1966                         goto out;
1967                 }
1968                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1969                 rt->rt6i_prefsrc.plen = 128;
1970         } else
1971                 rt->rt6i_prefsrc.plen = 0;
1972
1973         rt->rt6i_flags = cfg->fc_flags;
1974
1975 install_route:
1976         rt->dst.dev = dev;
1977         rt->rt6i_idev = idev;
1978         rt->rt6i_table = table;
1979
1980         cfg->fc_nlinfo.nl_net = dev_net(dev);
1981
1982         return rt;
1983 out:
1984         if (dev)
1985                 dev_put(dev);
1986         if (idev)
1987                 in6_dev_put(idev);
1988         if (rt)
1989                 dst_free(&rt->dst);
1990
1991         return ERR_PTR(err);
1992 }
1993
1994 int ip6_route_add(struct fib6_config *cfg)
1995 {
1996         struct mx6_config mxc = { .mx = NULL, };
1997         struct rt6_info *rt;
1998         int err;
1999
2000         rt = ip6_route_info_create(cfg);
2001         if (IS_ERR(rt)) {
2002                 err = PTR_ERR(rt);
2003                 rt = NULL;
2004                 goto out;
2005         }
2006
2007         err = ip6_convert_metrics(&mxc, cfg);
2008         if (err)
2009                 goto out;
2010
2011         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2012
2013         kfree(mxc.mx);
2014
2015         return err;
2016 out:
2017         if (rt)
2018                 dst_free(&rt->dst);
2019
2020         return err;
2021 }
2022
2023 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2024 {
2025         int err;
2026         struct fib6_table *table;
2027         struct net *net = dev_net(rt->dst.dev);
2028
2029         if (rt == net->ipv6.ip6_null_entry ||
2030             rt->dst.flags & DST_NOCACHE) {
2031                 err = -ENOENT;
2032                 goto out;
2033         }
2034
2035         table = rt->rt6i_table;
2036         write_lock_bh(&table->tb6_lock);
2037         err = fib6_del(rt, info);
2038         write_unlock_bh(&table->tb6_lock);
2039
2040 out:
2041         ip6_rt_put(rt);
2042         return err;
2043 }
2044
2045 int ip6_del_rt(struct rt6_info *rt)
2046 {
2047         struct nl_info info = {
2048                 .nl_net = dev_net(rt->dst.dev),
2049         };
2050         return __ip6_del_rt(rt, &info);
2051 }
2052
2053 static int ip6_route_del(struct fib6_config *cfg)
2054 {
2055         struct fib6_table *table;
2056         struct fib6_node *fn;
2057         struct rt6_info *rt;
2058         int err = -ESRCH;
2059
2060         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2061         if (!table)
2062                 return err;
2063
2064         read_lock_bh(&table->tb6_lock);
2065
2066         fn = fib6_locate(&table->tb6_root,
2067                          &cfg->fc_dst, cfg->fc_dst_len,
2068                          &cfg->fc_src, cfg->fc_src_len);
2069
2070         if (fn) {
2071                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2072                         if ((rt->rt6i_flags & RTF_CACHE) &&
2073                             !(cfg->fc_flags & RTF_CACHE))
2074                                 continue;
2075                         if (cfg->fc_ifindex &&
2076                             (!rt->dst.dev ||
2077                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2078                                 continue;
2079                         if (cfg->fc_flags & RTF_GATEWAY &&
2080                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2081                                 continue;
2082                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2083                                 continue;
2084                         dst_hold(&rt->dst);
2085                         read_unlock_bh(&table->tb6_lock);
2086
2087                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2088                 }
2089         }
2090         read_unlock_bh(&table->tb6_lock);
2091
2092         return err;
2093 }
2094
2095 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2096 {
2097         struct netevent_redirect netevent;
2098         struct rt6_info *rt, *nrt = NULL;
2099         struct ndisc_options ndopts;
2100         struct inet6_dev *in6_dev;
2101         struct neighbour *neigh;
2102         struct rd_msg *msg;
2103         int optlen, on_link;
2104         u8 *lladdr;
2105
2106         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2107         optlen -= sizeof(*msg);
2108
2109         if (optlen < 0) {
2110                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2111                 return;
2112         }
2113
2114         msg = (struct rd_msg *)icmp6_hdr(skb);
2115
2116         if (ipv6_addr_is_multicast(&msg->dest)) {
2117                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2118                 return;
2119         }
2120
2121         on_link = 0;
2122         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2123                 on_link = 1;
2124         } else if (ipv6_addr_type(&msg->target) !=
2125                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2126                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2127                 return;
2128         }
2129
2130         in6_dev = __in6_dev_get(skb->dev);
2131         if (!in6_dev)
2132                 return;
2133         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2134                 return;
2135
2136         /* RFC2461 8.1:
2137          *      The IP source address of the Redirect MUST be the same as the current
2138          *      first-hop router for the specified ICMP Destination Address.
2139          */
2140
2141         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2142                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2143                 return;
2144         }
2145
2146         lladdr = NULL;
2147         if (ndopts.nd_opts_tgt_lladdr) {
2148                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2149                                              skb->dev);
2150                 if (!lladdr) {
2151                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2152                         return;
2153                 }
2154         }
2155
2156         rt = (struct rt6_info *) dst;
2157         if (rt->rt6i_flags & RTF_REJECT) {
2158                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2159                 return;
2160         }
2161
2162         /* Redirect received -> path was valid.
2163          * Look, redirects are sent only in response to data packets,
2164          * so that this nexthop apparently is reachable. --ANK
2165          */
2166         dst_confirm(&rt->dst);
2167
2168         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2169         if (!neigh)
2170                 return;
2171
2172         /*
2173          *      We have finally decided to accept it.
2174          */
2175
2176         neigh_update(neigh, lladdr, NUD_STALE,
2177                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2178                      NEIGH_UPDATE_F_OVERRIDE|
2179                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2180                                      NEIGH_UPDATE_F_ISROUTER))
2181                      );
2182
2183         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2184         if (!nrt)
2185                 goto out;
2186
2187         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2188         if (on_link)
2189                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2190
2191         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2192
2193         if (ip6_ins_rt(nrt))
2194                 goto out;
2195
2196         netevent.old = &rt->dst;
2197         netevent.new = &nrt->dst;
2198         netevent.daddr = &msg->dest;
2199         netevent.neigh = neigh;
2200         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2201
2202         if (rt->rt6i_flags & RTF_CACHE) {
2203                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2204                 ip6_del_rt(rt);
2205         }
2206
2207 out:
2208         neigh_release(neigh);
2209 }
2210
2211 /*
2212  *      Misc support functions
2213  */
2214
2215 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2216 {
2217         BUG_ON(from->dst.from);
2218
2219         rt->rt6i_flags &= ~RTF_EXPIRES;
2220         dst_hold(&from->dst);
2221         rt->dst.from = &from->dst;
2222         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2223 }
2224
2225 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2226 {
2227         rt->dst.input = ort->dst.input;
2228         rt->dst.output = ort->dst.output;
2229         rt->rt6i_dst = ort->rt6i_dst;
2230         rt->dst.error = ort->dst.error;
2231         rt->rt6i_idev = ort->rt6i_idev;
2232         if (rt->rt6i_idev)
2233                 in6_dev_hold(rt->rt6i_idev);
2234         rt->dst.lastuse = jiffies;
2235         rt->rt6i_gateway = ort->rt6i_gateway;
2236         rt->rt6i_flags = ort->rt6i_flags;
2237         rt6_set_from(rt, ort);
2238         rt->rt6i_metric = ort->rt6i_metric;
2239 #ifdef CONFIG_IPV6_SUBTREES
2240         rt->rt6i_src = ort->rt6i_src;
2241 #endif
2242         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2243         rt->rt6i_table = ort->rt6i_table;
2244         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2245 }
2246
2247 #ifdef CONFIG_IPV6_ROUTE_INFO
2248 static struct rt6_info *rt6_get_route_info(struct net_device *dev,
2249                                            const struct in6_addr *prefix, int prefixlen,
2250                                            const struct in6_addr *gwaddr)
2251 {
2252         struct fib6_node *fn;
2253         struct rt6_info *rt = NULL;
2254         struct fib6_table *table;
2255
2256         table = fib6_get_table(dev_net(dev),
2257                                addrconf_rt_table(dev, RT6_TABLE_INFO));
2258         if (!table)
2259                 return NULL;
2260
2261         read_lock_bh(&table->tb6_lock);
2262         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2263         if (!fn)
2264                 goto out;
2265
2266         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2267                 if (rt->dst.dev->ifindex != dev->ifindex)
2268                         continue;
2269                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2270                         continue;
2271                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2272                         continue;
2273                 dst_hold(&rt->dst);
2274                 break;
2275         }
2276 out:
2277         read_unlock_bh(&table->tb6_lock);
2278         return rt;
2279 }
2280
2281 static struct rt6_info *rt6_add_route_info(struct net_device *dev,
2282                                            const struct in6_addr *prefix, int prefixlen,
2283                                            const struct in6_addr *gwaddr, unsigned int pref)
2284 {
2285         struct fib6_config cfg = {
2286                 .fc_metric      = IP6_RT_PRIO_USER,
2287                 .fc_ifindex     = dev->ifindex,
2288                 .fc_dst_len     = prefixlen,
2289                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2290                                   RTF_UP | RTF_PREF(pref),
2291                 .fc_nlinfo.portid = 0,
2292                 .fc_nlinfo.nlh = NULL,
2293                 .fc_nlinfo.nl_net = dev_net(dev),
2294         };
2295
2296         cfg.fc_table = l3mdev_fib_table_by_index(dev_net(dev), dev->ifindex) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2297         cfg.fc_dst = *prefix;
2298         cfg.fc_gateway = *gwaddr;
2299
2300         /* We should treat it as a default route if prefix length is 0. */
2301         if (!prefixlen)
2302                 cfg.fc_flags |= RTF_DEFAULT;
2303
2304         ip6_route_add(&cfg);
2305
2306         return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
2307 }
2308 #endif
2309
2310 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2311 {
2312         struct rt6_info *rt;
2313         struct fib6_table *table;
2314
2315         table = fib6_get_table(dev_net(dev),
2316                                addrconf_rt_table(dev, RT6_TABLE_MAIN));
2317         if (!table)
2318                 return NULL;
2319
2320         read_lock_bh(&table->tb6_lock);
2321         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2322                 if (dev == rt->dst.dev &&
2323                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2324                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2325                         break;
2326         }
2327         if (rt)
2328                 dst_hold(&rt->dst);
2329         read_unlock_bh(&table->tb6_lock);
2330         return rt;
2331 }
2332
2333 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2334                                      struct net_device *dev,
2335                                      unsigned int pref)
2336 {
2337         struct fib6_config cfg = {
2338                 .fc_table       = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2339                 .fc_metric      = IP6_RT_PRIO_USER,
2340                 .fc_ifindex     = dev->ifindex,
2341                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2342                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2343                 .fc_nlinfo.portid = 0,
2344                 .fc_nlinfo.nlh = NULL,
2345                 .fc_nlinfo.nl_net = dev_net(dev),
2346         };
2347
2348         cfg.fc_gateway = *gwaddr;
2349
2350         ip6_route_add(&cfg);
2351
2352         return rt6_get_dflt_router(gwaddr, dev);
2353 }
2354
2355
2356 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2357         if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2358             (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2359                 return -1;
2360         return 0;
2361 }
2362
2363 void rt6_purge_dflt_routers(struct net *net)
2364 {
2365         fib6_clean_all(net, rt6_addrconf_purge, NULL);
2366 }
2367
2368 static void rtmsg_to_fib6_config(struct net *net,
2369                                  struct in6_rtmsg *rtmsg,
2370                                  struct fib6_config *cfg)
2371 {
2372         memset(cfg, 0, sizeof(*cfg));
2373
2374         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2375                          : RT6_TABLE_MAIN;
2376         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2377         cfg->fc_metric = rtmsg->rtmsg_metric;
2378         cfg->fc_expires = rtmsg->rtmsg_info;
2379         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2380         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2381         cfg->fc_flags = rtmsg->rtmsg_flags;
2382
2383         cfg->fc_nlinfo.nl_net = net;
2384
2385         cfg->fc_dst = rtmsg->rtmsg_dst;
2386         cfg->fc_src = rtmsg->rtmsg_src;
2387         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2388 }
2389
2390 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2391 {
2392         struct fib6_config cfg;
2393         struct in6_rtmsg rtmsg;
2394         int err;
2395
2396         switch (cmd) {
2397         case SIOCADDRT:         /* Add a route */
2398         case SIOCDELRT:         /* Delete a route */
2399                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2400                         return -EPERM;
2401                 err = copy_from_user(&rtmsg, arg,
2402                                      sizeof(struct in6_rtmsg));
2403                 if (err)
2404                         return -EFAULT;
2405
2406                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2407
2408                 rtnl_lock();
2409                 switch (cmd) {
2410                 case SIOCADDRT:
2411                         err = ip6_route_add(&cfg);
2412                         break;
2413                 case SIOCDELRT:
2414                         err = ip6_route_del(&cfg);
2415                         break;
2416                 default:
2417                         err = -EINVAL;
2418                 }
2419                 rtnl_unlock();
2420
2421                 return err;
2422         }
2423
2424         return -EINVAL;
2425 }
2426
2427 /*
2428  *      Drop the packet on the floor
2429  */
2430
2431 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2432 {
2433         int type;
2434         struct dst_entry *dst = skb_dst(skb);
2435         switch (ipstats_mib_noroutes) {
2436         case IPSTATS_MIB_INNOROUTES:
2437                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2438                 if (type == IPV6_ADDR_ANY) {
2439                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2440                                       IPSTATS_MIB_INADDRERRORS);
2441                         break;
2442                 }
2443                 /* FALLTHROUGH */
2444         case IPSTATS_MIB_OUTNOROUTES:
2445                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2446                               ipstats_mib_noroutes);
2447                 break;
2448         }
2449         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2450         kfree_skb(skb);
2451         return 0;
2452 }
2453
2454 static int ip6_pkt_discard(struct sk_buff *skb)
2455 {
2456         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2457 }
2458
2459 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2460 {
2461         skb->dev = skb_dst(skb)->dev;
2462         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2463 }
2464
2465 static int ip6_pkt_prohibit(struct sk_buff *skb)
2466 {
2467         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2468 }
2469
2470 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2471 {
2472         skb->dev = skb_dst(skb)->dev;
2473         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2474 }
2475
2476 /*
2477  *      Allocate a dst for local (unicast / anycast) address.
2478  */
2479
2480 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2481                                     const struct in6_addr *addr,
2482                                     bool anycast)
2483 {
2484         u32 tb_id;
2485         struct net *net = dev_net(idev->dev);
2486         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2487                                             DST_NOCOUNT);
2488         if (!rt)
2489                 return ERR_PTR(-ENOMEM);
2490
2491         in6_dev_hold(idev);
2492
2493         rt->dst.flags |= DST_HOST;
2494         rt->dst.input = ip6_input;
2495         rt->dst.output = ip6_output;
2496         rt->rt6i_idev = idev;
2497
2498         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2499         if (anycast)
2500                 rt->rt6i_flags |= RTF_ANYCAST;
2501         else
2502                 rt->rt6i_flags |= RTF_LOCAL;
2503
2504         rt->rt6i_gateway  = *addr;
2505         rt->rt6i_dst.addr = *addr;
2506         rt->rt6i_dst.plen = 128;
2507         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2508         rt->rt6i_table = fib6_get_table(net, tb_id);
2509         rt->dst.flags |= DST_NOCACHE;
2510
2511         atomic_set(&rt->dst.__refcnt, 1);
2512
2513         return rt;
2514 }
2515
2516 int ip6_route_get_saddr(struct net *net,
2517                         struct rt6_info *rt,
2518                         const struct in6_addr *daddr,
2519                         unsigned int prefs,
2520                         struct in6_addr *saddr)
2521 {
2522         struct inet6_dev *idev =
2523                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2524         int err = 0;
2525         if (rt && rt->rt6i_prefsrc.plen)
2526                 *saddr = rt->rt6i_prefsrc.addr;
2527         else
2528                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2529                                          daddr, prefs, saddr);
2530         return err;
2531 }
2532
2533 /* remove deleted ip from prefsrc entries */
2534 struct arg_dev_net_ip {
2535         struct net_device *dev;
2536         struct net *net;
2537         struct in6_addr *addr;
2538 };
2539
2540 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2541 {
2542         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2543         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2544         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2545
2546         if (((void *)rt->dst.dev == dev || !dev) &&
2547             rt != net->ipv6.ip6_null_entry &&
2548             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2549                 /* remove prefsrc entry */
2550                 rt->rt6i_prefsrc.plen = 0;
2551         }
2552         return 0;
2553 }
2554
2555 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2556 {
2557         struct net *net = dev_net(ifp->idev->dev);
2558         struct arg_dev_net_ip adni = {
2559                 .dev = ifp->idev->dev,
2560                 .net = net,
2561                 .addr = &ifp->addr,
2562         };
2563         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2564 }
2565
2566 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2567 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2568
2569 /* Remove routers and update dst entries when gateway turn into host. */
2570 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2571 {
2572         struct in6_addr *gateway = (struct in6_addr *)arg;
2573
2574         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2575              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2576              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2577                 return -1;
2578         }
2579         return 0;
2580 }
2581
2582 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2583 {
2584         fib6_clean_all(net, fib6_clean_tohost, gateway);
2585 }
2586
2587 struct arg_dev_net {
2588         struct net_device *dev;
2589         struct net *net;
2590 };
2591
2592 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2593 {
2594         const struct arg_dev_net *adn = arg;
2595         const struct net_device *dev = adn->dev;
2596
2597         if ((rt->dst.dev == dev || !dev) &&
2598             rt != adn->net->ipv6.ip6_null_entry)
2599                 return -1;
2600
2601         return 0;
2602 }
2603
2604 void rt6_ifdown(struct net *net, struct net_device *dev)
2605 {
2606         struct arg_dev_net adn = {
2607                 .dev = dev,
2608                 .net = net,
2609         };
2610
2611         fib6_clean_all(net, fib6_ifdown, &adn);
2612         icmp6_clean_all(fib6_ifdown, &adn);
2613         if (dev)
2614                 rt6_uncached_list_flush_dev(net, dev);
2615 }
2616
2617 struct rt6_mtu_change_arg {
2618         struct net_device *dev;
2619         unsigned int mtu;
2620 };
2621
2622 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2623 {
2624         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2625         struct inet6_dev *idev;
2626
2627         /* In IPv6 pmtu discovery is not optional,
2628            so that RTAX_MTU lock cannot disable it.
2629            We still use this lock to block changes
2630            caused by addrconf/ndisc.
2631         */
2632
2633         idev = __in6_dev_get(arg->dev);
2634         if (!idev)
2635                 return 0;
2636
2637         /* For administrative MTU increase, there is no way to discover
2638            IPv6 PMTU increase, so PMTU increase should be updated here.
2639            Since RFC 1981 doesn't include administrative MTU increase
2640            update PMTU increase is a MUST. (i.e. jumbo frame)
2641          */
2642         /*
2643            If new MTU is less than route PMTU, this new MTU will be the
2644            lowest MTU in the path, update the route PMTU to reflect PMTU
2645            decreases; if new MTU is greater than route PMTU, and the
2646            old MTU is the lowest MTU in the path, update the route PMTU
2647            to reflect the increase. In this case if the other nodes' MTU
2648            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2649            PMTU discouvery.
2650          */
2651         if (rt->dst.dev == arg->dev &&
2652             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2653                 if (rt->rt6i_flags & RTF_CACHE) {
2654                         /* For RTF_CACHE with rt6i_pmtu == 0
2655                          * (i.e. a redirected route),
2656                          * the metrics of its rt->dst.from has already
2657                          * been updated.
2658                          */
2659                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2660                                 rt->rt6i_pmtu = arg->mtu;
2661                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2662                            (dst_mtu(&rt->dst) < arg->mtu &&
2663                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2664                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2665                 }
2666         }
2667         return 0;
2668 }
2669
2670 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2671 {
2672         struct rt6_mtu_change_arg arg = {
2673                 .dev = dev,
2674                 .mtu = mtu,
2675         };
2676
2677         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2678 }
2679
2680 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2681         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2682         [RTA_OIF]               = { .type = NLA_U32 },
2683         [RTA_IIF]               = { .type = NLA_U32 },
2684         [RTA_PRIORITY]          = { .type = NLA_U32 },
2685         [RTA_METRICS]           = { .type = NLA_NESTED },
2686         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2687         [RTA_PREF]              = { .type = NLA_U8 },
2688         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2689         [RTA_ENCAP]             = { .type = NLA_NESTED },
2690         [RTA_UID]               = { .type = NLA_U32 },
2691 };
2692
2693 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2694                               struct fib6_config *cfg)
2695 {
2696         struct rtmsg *rtm;
2697         struct nlattr *tb[RTA_MAX+1];
2698         unsigned int pref;
2699         int err;
2700
2701         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2702         if (err < 0)
2703                 goto errout;
2704
2705         err = -EINVAL;
2706         rtm = nlmsg_data(nlh);
2707         memset(cfg, 0, sizeof(*cfg));
2708
2709         cfg->fc_table = rtm->rtm_table;
2710         cfg->fc_dst_len = rtm->rtm_dst_len;
2711         cfg->fc_src_len = rtm->rtm_src_len;
2712         cfg->fc_flags = RTF_UP;
2713         cfg->fc_protocol = rtm->rtm_protocol;
2714         cfg->fc_type = rtm->rtm_type;
2715
2716         if (rtm->rtm_type == RTN_UNREACHABLE ||
2717             rtm->rtm_type == RTN_BLACKHOLE ||
2718             rtm->rtm_type == RTN_PROHIBIT ||
2719             rtm->rtm_type == RTN_THROW)
2720                 cfg->fc_flags |= RTF_REJECT;
2721
2722         if (rtm->rtm_type == RTN_LOCAL)
2723                 cfg->fc_flags |= RTF_LOCAL;
2724
2725         if (rtm->rtm_flags & RTM_F_CLONED)
2726                 cfg->fc_flags |= RTF_CACHE;
2727
2728         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2729         cfg->fc_nlinfo.nlh = nlh;
2730         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2731
2732         if (tb[RTA_GATEWAY]) {
2733                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2734                 cfg->fc_flags |= RTF_GATEWAY;
2735         }
2736
2737         if (tb[RTA_DST]) {
2738                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2739
2740                 if (nla_len(tb[RTA_DST]) < plen)
2741                         goto errout;
2742
2743                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2744         }
2745
2746         if (tb[RTA_SRC]) {
2747                 int plen = (rtm->rtm_src_len + 7) >> 3;
2748
2749                 if (nla_len(tb[RTA_SRC]) < plen)
2750                         goto errout;
2751
2752                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2753         }
2754
2755         if (tb[RTA_PREFSRC])
2756                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2757
2758         if (tb[RTA_OIF])
2759                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2760
2761         if (tb[RTA_PRIORITY])
2762                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2763
2764         if (tb[RTA_METRICS]) {
2765                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2766                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2767         }
2768
2769         if (tb[RTA_TABLE])
2770                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2771
2772         if (tb[RTA_MULTIPATH]) {
2773                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2774                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2775         }
2776
2777         if (tb[RTA_PREF]) {
2778                 pref = nla_get_u8(tb[RTA_PREF]);
2779                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2780                     pref != ICMPV6_ROUTER_PREF_HIGH)
2781                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2782                 cfg->fc_flags |= RTF_PREF(pref);
2783         }
2784
2785         if (tb[RTA_ENCAP])
2786                 cfg->fc_encap = tb[RTA_ENCAP];
2787
2788         if (tb[RTA_ENCAP_TYPE])
2789                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2790
2791         err = 0;
2792 errout:
2793         return err;
2794 }
2795
2796 struct rt6_nh {
2797         struct rt6_info *rt6_info;
2798         struct fib6_config r_cfg;
2799         struct mx6_config mxc;
2800         struct list_head next;
2801 };
2802
2803 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2804 {
2805         struct rt6_nh *nh;
2806
2807         list_for_each_entry(nh, rt6_nh_list, next) {
2808                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2809                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2810                         nh->r_cfg.fc_ifindex);
2811         }
2812 }
2813
2814 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2815                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2816 {
2817         struct rt6_nh *nh;
2818         struct rt6_info *rtnh;
2819         int err = -EEXIST;
2820
2821         list_for_each_entry(nh, rt6_nh_list, next) {
2822                 /* check if rt6_info already exists */
2823                 rtnh = nh->rt6_info;
2824
2825                 if (rtnh->dst.dev == rt->dst.dev &&
2826                     rtnh->rt6i_idev == rt->rt6i_idev &&
2827                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2828                                     &rt->rt6i_gateway))
2829                         return err;
2830         }
2831
2832         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2833         if (!nh)
2834                 return -ENOMEM;
2835         nh->rt6_info = rt;
2836         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2837         if (err) {
2838                 kfree(nh);
2839                 return err;
2840         }
2841         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2842         list_add_tail(&nh->next, rt6_nh_list);
2843
2844         return 0;
2845 }
2846
2847 static int ip6_route_multipath_add(struct fib6_config *cfg)
2848 {
2849         struct fib6_config r_cfg;
2850         struct rtnexthop *rtnh;
2851         struct rt6_info *rt;
2852         struct rt6_nh *err_nh;
2853         struct rt6_nh *nh, *nh_safe;
2854         int remaining;
2855         int attrlen;
2856         int err = 1;
2857         int nhn = 0;
2858         int replace = (cfg->fc_nlinfo.nlh &&
2859                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2860         LIST_HEAD(rt6_nh_list);
2861
2862         remaining = cfg->fc_mp_len;
2863         rtnh = (struct rtnexthop *)cfg->fc_mp;
2864
2865         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2866          * rt6_info structs per nexthop
2867          */
2868         while (rtnh_ok(rtnh, remaining)) {
2869                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2870                 if (rtnh->rtnh_ifindex)
2871                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2872
2873                 attrlen = rtnh_attrlen(rtnh);
2874                 if (attrlen > 0) {
2875                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2876
2877                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2878                         if (nla) {
2879                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2880                                 r_cfg.fc_flags |= RTF_GATEWAY;
2881                         }
2882                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2883                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2884                         if (nla)
2885                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2886                 }
2887
2888                 rt = ip6_route_info_create(&r_cfg);
2889                 if (IS_ERR(rt)) {
2890                         err = PTR_ERR(rt);
2891                         rt = NULL;
2892                         goto cleanup;
2893                 }
2894
2895                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2896                 if (err) {
2897                         dst_free(&rt->dst);
2898                         goto cleanup;
2899                 }
2900
2901                 rtnh = rtnh_next(rtnh, &remaining);
2902         }
2903
2904         err_nh = NULL;
2905         list_for_each_entry(nh, &rt6_nh_list, next) {
2906                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2907                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2908                 nh->rt6_info = NULL;
2909                 if (err) {
2910                         if (replace && nhn)
2911                                 ip6_print_replace_route_err(&rt6_nh_list);
2912                         err_nh = nh;
2913                         goto add_errout;
2914                 }
2915
2916                 /* Because each route is added like a single route we remove
2917                  * these flags after the first nexthop: if there is a collision,
2918                  * we have already failed to add the first nexthop:
2919                  * fib6_add_rt2node() has rejected it; when replacing, old
2920                  * nexthops have been replaced by first new, the rest should
2921                  * be added to it.
2922                  */
2923                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2924                                                      NLM_F_REPLACE);
2925                 nhn++;
2926         }
2927
2928         goto cleanup;
2929
2930 add_errout:
2931         /* Delete routes that were already added */
2932         list_for_each_entry(nh, &rt6_nh_list, next) {
2933                 if (err_nh == nh)
2934                         break;
2935                 ip6_route_del(&nh->r_cfg);
2936         }
2937
2938 cleanup:
2939         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2940                 if (nh->rt6_info)
2941                         dst_free(&nh->rt6_info->dst);
2942                 kfree(nh->mxc.mx);
2943                 list_del(&nh->next);
2944                 kfree(nh);
2945         }
2946
2947         return err;
2948 }
2949
2950 static int ip6_route_multipath_del(struct fib6_config *cfg)
2951 {
2952         struct fib6_config r_cfg;
2953         struct rtnexthop *rtnh;
2954         int remaining;
2955         int attrlen;
2956         int err = 1, last_err = 0;
2957
2958         remaining = cfg->fc_mp_len;
2959         rtnh = (struct rtnexthop *)cfg->fc_mp;
2960
2961         /* Parse a Multipath Entry */
2962         while (rtnh_ok(rtnh, remaining)) {
2963                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2964                 if (rtnh->rtnh_ifindex)
2965                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2966
2967                 attrlen = rtnh_attrlen(rtnh);
2968                 if (attrlen > 0) {
2969                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2970
2971                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2972                         if (nla) {
2973                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2974                                 r_cfg.fc_flags |= RTF_GATEWAY;
2975                         }
2976                 }
2977                 err = ip6_route_del(&r_cfg);
2978                 if (err)
2979                         last_err = err;
2980
2981                 rtnh = rtnh_next(rtnh, &remaining);
2982         }
2983
2984         return last_err;
2985 }
2986
2987 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2988 {
2989         struct fib6_config cfg;
2990         int err;
2991
2992         err = rtm_to_fib6_config(skb, nlh, &cfg);
2993         if (err < 0)
2994                 return err;
2995
2996         if (cfg.fc_mp)
2997                 return ip6_route_multipath_del(&cfg);
2998         else
2999                 return ip6_route_del(&cfg);
3000 }
3001
3002 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3003 {
3004         struct fib6_config cfg;
3005         int err;
3006
3007         err = rtm_to_fib6_config(skb, nlh, &cfg);
3008         if (err < 0)
3009                 return err;
3010
3011         if (cfg.fc_mp)
3012                 return ip6_route_multipath_add(&cfg);
3013         else
3014                 return ip6_route_add(&cfg);
3015 }
3016
3017 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3018 {
3019         return NLMSG_ALIGN(sizeof(struct rtmsg))
3020                + nla_total_size(16) /* RTA_SRC */
3021                + nla_total_size(16) /* RTA_DST */
3022                + nla_total_size(16) /* RTA_GATEWAY */
3023                + nla_total_size(16) /* RTA_PREFSRC */
3024                + nla_total_size(4) /* RTA_TABLE */
3025                + nla_total_size(4) /* RTA_IIF */
3026                + nla_total_size(4) /* RTA_OIF */
3027                + nla_total_size(4) /* RTA_PRIORITY */
3028                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3029                + nla_total_size(sizeof(struct rta_cacheinfo))
3030                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3031                + nla_total_size(1) /* RTA_PREF */
3032                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3033 }
3034
3035 static int rt6_fill_node(struct net *net,
3036                          struct sk_buff *skb, struct rt6_info *rt,
3037                          struct in6_addr *dst, struct in6_addr *src,
3038                          int iif, int type, u32 portid, u32 seq,
3039                          int prefix, int nowait, unsigned int flags)
3040 {
3041         u32 metrics[RTAX_MAX];
3042         struct rtmsg *rtm;
3043         struct nlmsghdr *nlh;
3044         long expires;
3045         u32 table;
3046
3047         if (prefix) {   /* user wants prefix routes only */
3048                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3049                         /* success since this is not a prefix route */
3050                         return 1;
3051                 }
3052         }
3053
3054         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3055         if (!nlh)
3056                 return -EMSGSIZE;
3057
3058         rtm = nlmsg_data(nlh);
3059         rtm->rtm_family = AF_INET6;
3060         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3061         rtm->rtm_src_len = rt->rt6i_src.plen;
3062         rtm->rtm_tos = 0;
3063         if (rt->rt6i_table)
3064                 table = rt->rt6i_table->tb6_id;
3065         else
3066                 table = RT6_TABLE_UNSPEC;
3067         rtm->rtm_table = table;
3068         if (nla_put_u32(skb, RTA_TABLE, table))
3069                 goto nla_put_failure;
3070         if (rt->rt6i_flags & RTF_REJECT) {
3071                 switch (rt->dst.error) {
3072                 case -EINVAL:
3073                         rtm->rtm_type = RTN_BLACKHOLE;
3074                         break;
3075                 case -EACCES:
3076                         rtm->rtm_type = RTN_PROHIBIT;
3077                         break;
3078                 case -EAGAIN:
3079                         rtm->rtm_type = RTN_THROW;
3080                         break;
3081                 default:
3082                         rtm->rtm_type = RTN_UNREACHABLE;
3083                         break;
3084                 }
3085         }
3086         else if (rt->rt6i_flags & RTF_LOCAL)
3087                 rtm->rtm_type = RTN_LOCAL;
3088         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3089                 rtm->rtm_type = RTN_LOCAL;
3090         else
3091                 rtm->rtm_type = RTN_UNICAST;
3092         rtm->rtm_flags = 0;
3093         if (!netif_carrier_ok(rt->dst.dev)) {
3094                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3095                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3096                         rtm->rtm_flags |= RTNH_F_DEAD;
3097         }
3098         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3099         rtm->rtm_protocol = rt->rt6i_protocol;
3100         if (rt->rt6i_flags & RTF_DYNAMIC)
3101                 rtm->rtm_protocol = RTPROT_REDIRECT;
3102         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3103                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3104                         rtm->rtm_protocol = RTPROT_RA;
3105                 else
3106                         rtm->rtm_protocol = RTPROT_KERNEL;
3107         }
3108
3109         if (rt->rt6i_flags & RTF_CACHE)
3110                 rtm->rtm_flags |= RTM_F_CLONED;
3111
3112         if (dst) {
3113                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3114                         goto nla_put_failure;
3115                 rtm->rtm_dst_len = 128;
3116         } else if (rtm->rtm_dst_len)
3117                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3118                         goto nla_put_failure;
3119 #ifdef CONFIG_IPV6_SUBTREES
3120         if (src) {
3121                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3122                         goto nla_put_failure;
3123                 rtm->rtm_src_len = 128;
3124         } else if (rtm->rtm_src_len &&
3125                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3126                 goto nla_put_failure;
3127 #endif
3128         if (iif) {
3129 #ifdef CONFIG_IPV6_MROUTE
3130                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3131                         int err = ip6mr_get_route(net, skb, rtm, nowait);
3132                         if (err <= 0) {
3133                                 if (!nowait) {
3134                                         if (err == 0)
3135                                                 return 0;
3136                                         goto nla_put_failure;
3137                                 } else {
3138                                         if (err == -EMSGSIZE)
3139                                                 goto nla_put_failure;
3140                                 }
3141                         }
3142                 } else
3143 #endif
3144                         if (nla_put_u32(skb, RTA_IIF, iif))
3145                                 goto nla_put_failure;
3146         } else if (dst) {
3147                 struct in6_addr saddr_buf;
3148                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3149                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3150                         goto nla_put_failure;
3151         }
3152
3153         if (rt->rt6i_prefsrc.plen) {
3154                 struct in6_addr saddr_buf;
3155                 saddr_buf = rt->rt6i_prefsrc.addr;
3156                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3157                         goto nla_put_failure;
3158         }
3159
3160         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3161         if (rt->rt6i_pmtu)
3162                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3163         if (rtnetlink_put_metrics(skb, metrics) < 0)
3164                 goto nla_put_failure;
3165
3166         if (rt->rt6i_flags & RTF_GATEWAY) {
3167                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3168                         goto nla_put_failure;
3169         }
3170
3171         if (rt->dst.dev &&
3172             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3173                 goto nla_put_failure;
3174         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3175                 goto nla_put_failure;
3176
3177         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3178
3179         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3180                 goto nla_put_failure;
3181
3182         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3183                 goto nla_put_failure;
3184
3185         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3186
3187         nlmsg_end(skb, nlh);
3188         return 0;
3189
3190 nla_put_failure:
3191         nlmsg_cancel(skb, nlh);
3192         return -EMSGSIZE;
3193 }
3194
3195 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3196 {
3197         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3198         int prefix;
3199
3200         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3201                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3202                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3203         } else
3204                 prefix = 0;
3205
3206         return rt6_fill_node(arg->net,
3207                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3208                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3209                      prefix, 0, NLM_F_MULTI);
3210 }
3211
3212 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3213 {
3214         struct net *net = sock_net(in_skb->sk);
3215         struct nlattr *tb[RTA_MAX+1];
3216         struct rt6_info *rt;
3217         struct sk_buff *skb;
3218         struct rtmsg *rtm;
3219         struct flowi6 fl6;
3220         int err, iif = 0, oif = 0;
3221
3222         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3223         if (err < 0)
3224                 goto errout;
3225
3226         err = -EINVAL;
3227         memset(&fl6, 0, sizeof(fl6));
3228
3229         if (tb[RTA_SRC]) {
3230                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3231                         goto errout;
3232
3233                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3234         }
3235
3236         if (tb[RTA_DST]) {
3237                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3238                         goto errout;
3239
3240                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3241         }
3242
3243         if (tb[RTA_IIF])
3244                 iif = nla_get_u32(tb[RTA_IIF]);
3245
3246         if (tb[RTA_OIF])
3247                 oif = nla_get_u32(tb[RTA_OIF]);
3248
3249         if (tb[RTA_MARK])
3250                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3251
3252         if (tb[RTA_UID])
3253                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3254                                            nla_get_u32(tb[RTA_UID]));
3255         else
3256                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3257         if (iif) {
3258                 struct net_device *dev;
3259                 int flags = 0;
3260
3261                 dev = __dev_get_by_index(net, iif);
3262                 if (!dev) {
3263                         err = -ENODEV;
3264                         goto errout;
3265                 }
3266
3267                 fl6.flowi6_iif = iif;
3268
3269                 if (!ipv6_addr_any(&fl6.saddr))
3270                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3271
3272                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3273                                                                flags);
3274         } else {
3275                 fl6.flowi6_oif = oif;
3276
3277                 if (netif_index_is_l3_master(net, oif)) {
3278                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3279                                            FLOWI_FLAG_SKIP_NH_OIF;
3280                 }
3281
3282                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3283         }
3284
3285         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3286         if (!skb) {
3287                 ip6_rt_put(rt);
3288                 err = -ENOBUFS;
3289                 goto errout;
3290         }
3291
3292         /* Reserve room for dummy headers, this skb can pass
3293            through good chunk of routing engine.
3294          */
3295         skb_reset_mac_header(skb);
3296         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3297
3298         skb_dst_set(skb, &rt->dst);
3299
3300         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3301                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3302                             nlh->nlmsg_seq, 0, 0, 0);
3303         if (err < 0) {
3304                 kfree_skb(skb);
3305                 goto errout;
3306         }
3307
3308         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3309 errout:
3310         return err;
3311 }
3312
3313 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3314                      unsigned int nlm_flags)
3315 {
3316         struct sk_buff *skb;
3317         struct net *net = info->nl_net;
3318         u32 seq;
3319         int err;
3320
3321         err = -ENOBUFS;
3322         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3323
3324         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3325         if (!skb)
3326                 goto errout;
3327
3328         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3329                                 event, info->portid, seq, 0, 0, nlm_flags);
3330         if (err < 0) {
3331                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3332                 WARN_ON(err == -EMSGSIZE);
3333                 kfree_skb(skb);
3334                 goto errout;
3335         }
3336         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3337                     info->nlh, gfp_any());
3338         return;
3339 errout:
3340         if (err < 0)
3341                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3342 }
3343
3344 static int ip6_route_dev_notify(struct notifier_block *this,
3345                                 unsigned long event, void *ptr)
3346 {
3347         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3348         struct net *net = dev_net(dev);
3349
3350         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3351                 net->ipv6.ip6_null_entry->dst.dev = dev;
3352                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3353 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3354                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3355                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3356                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3357                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3358 #endif
3359         }
3360
3361         return NOTIFY_OK;
3362 }
3363
3364 /*
3365  *      /proc
3366  */
3367
3368 #ifdef CONFIG_PROC_FS
3369
3370 static const struct file_operations ipv6_route_proc_fops = {
3371         .owner          = THIS_MODULE,
3372         .open           = ipv6_route_open,
3373         .read           = seq_read,
3374         .llseek         = seq_lseek,
3375         .release        = seq_release_net,
3376 };
3377
3378 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3379 {
3380         struct net *net = (struct net *)seq->private;
3381         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3382                    net->ipv6.rt6_stats->fib_nodes,
3383                    net->ipv6.rt6_stats->fib_route_nodes,
3384                    net->ipv6.rt6_stats->fib_rt_alloc,
3385                    net->ipv6.rt6_stats->fib_rt_entries,
3386                    net->ipv6.rt6_stats->fib_rt_cache,
3387                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3388                    net->ipv6.rt6_stats->fib_discarded_routes);
3389
3390         return 0;
3391 }
3392
3393 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3394 {
3395         return single_open_net(inode, file, rt6_stats_seq_show);
3396 }
3397
3398 static const struct file_operations rt6_stats_seq_fops = {
3399         .owner   = THIS_MODULE,
3400         .open    = rt6_stats_seq_open,
3401         .read    = seq_read,
3402         .llseek  = seq_lseek,
3403         .release = single_release_net,
3404 };
3405 #endif  /* CONFIG_PROC_FS */
3406
3407 #ifdef CONFIG_SYSCTL
3408
3409 static
3410 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3411                               void __user *buffer, size_t *lenp, loff_t *ppos)
3412 {
3413         struct net *net;
3414         int delay;
3415         if (!write)
3416                 return -EINVAL;
3417
3418         net = (struct net *)ctl->extra1;
3419         delay = net->ipv6.sysctl.flush_delay;
3420         proc_dointvec(ctl, write, buffer, lenp, ppos);
3421         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3422         return 0;
3423 }
3424
3425 struct ctl_table ipv6_route_table_template[] = {
3426         {
3427                 .procname       =       "flush",
3428                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3429                 .maxlen         =       sizeof(int),
3430                 .mode           =       0200,
3431                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3432         },
3433         {
3434                 .procname       =       "gc_thresh",
3435                 .data           =       &ip6_dst_ops_template.gc_thresh,
3436                 .maxlen         =       sizeof(int),
3437                 .mode           =       0644,
3438                 .proc_handler   =       proc_dointvec,
3439         },
3440         {
3441                 .procname       =       "max_size",
3442                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3443                 .maxlen         =       sizeof(int),
3444                 .mode           =       0644,
3445                 .proc_handler   =       proc_dointvec,
3446         },
3447         {
3448                 .procname       =       "gc_min_interval",
3449                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3450                 .maxlen         =       sizeof(int),
3451                 .mode           =       0644,
3452                 .proc_handler   =       proc_dointvec_jiffies,
3453         },
3454         {
3455                 .procname       =       "gc_timeout",
3456                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3457                 .maxlen         =       sizeof(int),
3458                 .mode           =       0644,
3459                 .proc_handler   =       proc_dointvec_jiffies,
3460         },
3461         {
3462                 .procname       =       "gc_interval",
3463                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3464                 .maxlen         =       sizeof(int),
3465                 .mode           =       0644,
3466                 .proc_handler   =       proc_dointvec_jiffies,
3467         },
3468         {
3469                 .procname       =       "gc_elasticity",
3470                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3471                 .maxlen         =       sizeof(int),
3472                 .mode           =       0644,
3473                 .proc_handler   =       proc_dointvec,
3474         },
3475         {
3476                 .procname       =       "mtu_expires",
3477                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3478                 .maxlen         =       sizeof(int),
3479                 .mode           =       0644,
3480                 .proc_handler   =       proc_dointvec_jiffies,
3481         },
3482         {
3483                 .procname       =       "min_adv_mss",
3484                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3485                 .maxlen         =       sizeof(int),
3486                 .mode           =       0644,
3487                 .proc_handler   =       proc_dointvec,
3488         },
3489         {
3490                 .procname       =       "gc_min_interval_ms",
3491                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3492                 .maxlen         =       sizeof(int),
3493                 .mode           =       0644,
3494                 .proc_handler   =       proc_dointvec_ms_jiffies,
3495         },
3496         { }
3497 };
3498
3499 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3500 {
3501         struct ctl_table *table;
3502
3503         table = kmemdup(ipv6_route_table_template,
3504                         sizeof(ipv6_route_table_template),
3505                         GFP_KERNEL);
3506
3507         if (table) {
3508                 table[0].data = &net->ipv6.sysctl.flush_delay;
3509                 table[0].extra1 = net;
3510                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3511                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3512                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3513                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3514                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3515                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3516                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3517                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3518                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3519
3520                 /* Don't export sysctls to unprivileged users */
3521                 if (net->user_ns != &init_user_ns)
3522                         table[0].procname = NULL;
3523         }
3524
3525         return table;
3526 }
3527 #endif
3528
3529 static int __net_init ip6_route_net_init(struct net *net)
3530 {
3531         int ret = -ENOMEM;
3532
3533         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3534                sizeof(net->ipv6.ip6_dst_ops));
3535
3536         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3537                 goto out_ip6_dst_ops;
3538
3539         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3540                                            sizeof(*net->ipv6.ip6_null_entry),
3541                                            GFP_KERNEL);
3542         if (!net->ipv6.ip6_null_entry)
3543                 goto out_ip6_dst_entries;
3544         net->ipv6.ip6_null_entry->dst.path =
3545                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3546         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3547         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3548                          ip6_template_metrics, true);
3549
3550 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3551         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3552                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3553                                                GFP_KERNEL);
3554         if (!net->ipv6.ip6_prohibit_entry)
3555                 goto out_ip6_null_entry;
3556         net->ipv6.ip6_prohibit_entry->dst.path =
3557                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3558         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3559         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3560                          ip6_template_metrics, true);
3561
3562         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3563                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3564                                                GFP_KERNEL);
3565         if (!net->ipv6.ip6_blk_hole_entry)
3566                 goto out_ip6_prohibit_entry;
3567         net->ipv6.ip6_blk_hole_entry->dst.path =
3568                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3569         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3570         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3571                          ip6_template_metrics, true);
3572 #endif
3573
3574         net->ipv6.sysctl.flush_delay = 0;
3575         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3576         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3577         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3578         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3579         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3580         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3581         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3582
3583         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3584
3585         ret = 0;
3586 out:
3587         return ret;
3588
3589 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3590 out_ip6_prohibit_entry:
3591         kfree(net->ipv6.ip6_prohibit_entry);
3592 out_ip6_null_entry:
3593         kfree(net->ipv6.ip6_null_entry);
3594 #endif
3595 out_ip6_dst_entries:
3596         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3597 out_ip6_dst_ops:
3598         goto out;
3599 }
3600
3601 static void __net_exit ip6_route_net_exit(struct net *net)
3602 {
3603         kfree(net->ipv6.ip6_null_entry);
3604 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3605         kfree(net->ipv6.ip6_prohibit_entry);
3606         kfree(net->ipv6.ip6_blk_hole_entry);
3607 #endif
3608         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3609 }
3610
3611 static int __net_init ip6_route_net_init_late(struct net *net)
3612 {
3613 #ifdef CONFIG_PROC_FS
3614         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3615         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3616 #endif
3617         return 0;
3618 }
3619
3620 static void __net_exit ip6_route_net_exit_late(struct net *net)
3621 {
3622 #ifdef CONFIG_PROC_FS
3623         remove_proc_entry("ipv6_route", net->proc_net);
3624         remove_proc_entry("rt6_stats", net->proc_net);
3625 #endif
3626 }
3627
3628 static struct pernet_operations ip6_route_net_ops = {
3629         .init = ip6_route_net_init,
3630         .exit = ip6_route_net_exit,
3631 };
3632
3633 static int __net_init ipv6_inetpeer_init(struct net *net)
3634 {
3635         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3636
3637         if (!bp)
3638                 return -ENOMEM;
3639         inet_peer_base_init(bp);
3640         net->ipv6.peers = bp;
3641         return 0;
3642 }
3643
3644 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3645 {
3646         struct inet_peer_base *bp = net->ipv6.peers;
3647
3648         net->ipv6.peers = NULL;
3649         inetpeer_invalidate_tree(bp);
3650         kfree(bp);
3651 }
3652
3653 static struct pernet_operations ipv6_inetpeer_ops = {
3654         .init   =       ipv6_inetpeer_init,
3655         .exit   =       ipv6_inetpeer_exit,
3656 };
3657
3658 static struct pernet_operations ip6_route_net_late_ops = {
3659         .init = ip6_route_net_init_late,
3660         .exit = ip6_route_net_exit_late,
3661 };
3662
3663 static struct notifier_block ip6_route_dev_notifier = {
3664         .notifier_call = ip6_route_dev_notify,
3665         .priority = 0,
3666 };
3667
3668 int __init ip6_route_init(void)
3669 {
3670         int ret;
3671         int cpu;
3672
3673         ret = -ENOMEM;
3674         ip6_dst_ops_template.kmem_cachep =
3675                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3676                                   SLAB_HWCACHE_ALIGN, NULL);
3677         if (!ip6_dst_ops_template.kmem_cachep)
3678                 goto out;
3679
3680         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3681         if (ret)
3682                 goto out_kmem_cache;
3683
3684         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3685         if (ret)
3686                 goto out_dst_entries;
3687
3688         ret = register_pernet_subsys(&ip6_route_net_ops);
3689         if (ret)
3690                 goto out_register_inetpeer;
3691
3692         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3693
3694         /* Registering of the loopback is done before this portion of code,
3695          * the loopback reference in rt6_info will not be taken, do it
3696          * manually for init_net */
3697         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3698         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3699   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3700         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3701         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3702         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3703         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3704   #endif
3705         ret = fib6_init();
3706         if (ret)
3707                 goto out_register_subsys;
3708
3709         ret = xfrm6_init();
3710         if (ret)
3711                 goto out_fib6_init;
3712
3713         ret = fib6_rules_init();
3714         if (ret)
3715                 goto xfrm6_init;
3716
3717         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3718         if (ret)
3719                 goto fib6_rules_init;
3720
3721         ret = -ENOBUFS;
3722         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3723             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3724             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3725                 goto out_register_late_subsys;
3726
3727         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3728         if (ret)
3729                 goto out_register_late_subsys;
3730
3731         for_each_possible_cpu(cpu) {
3732                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3733
3734                 INIT_LIST_HEAD(&ul->head);
3735                 spin_lock_init(&ul->lock);
3736         }
3737
3738 out:
3739         return ret;
3740
3741 out_register_late_subsys:
3742         unregister_pernet_subsys(&ip6_route_net_late_ops);
3743 fib6_rules_init:
3744         fib6_rules_cleanup();
3745 xfrm6_init:
3746         xfrm6_fini();
3747 out_fib6_init:
3748         fib6_gc_cleanup();
3749 out_register_subsys:
3750         unregister_pernet_subsys(&ip6_route_net_ops);
3751 out_register_inetpeer:
3752         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3753 out_dst_entries:
3754         dst_entries_destroy(&ip6_dst_blackhole_ops);
3755 out_kmem_cache:
3756         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3757         goto out;
3758 }
3759
3760 void ip6_route_cleanup(void)
3761 {
3762         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3763         unregister_pernet_subsys(&ip6_route_net_late_ops);
3764         fib6_rules_cleanup();
3765         xfrm6_fini();
3766         fib6_gc_cleanup();
3767         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3768         unregister_pernet_subsys(&ip6_route_net_ops);
3769         dst_entries_destroy(&ip6_dst_blackhole_ops);
3770         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3771 }