net: ipv6: autoconf routes into per-device tables
authorLorenzo Colitti <lorenzo@google.com>
Wed, 26 Mar 2014 10:35:41 +0000 (19:35 +0900)
committerJP Abgrall <jpa@google.com>
Fri, 16 May 2014 18:19:20 +0000 (18:19 +0000)
Currently, IPv6 router discovery always puts routes into
RT6_TABLE_MAIN. This causes problems for connection managers
that want to support multiple simultaneous network connections
and want control over which one is used by default (e.g., wifi
and wired).

To work around this connection managers typically take the routes
they prefer and copy them to static routes with low metrics in
the main table. This puts the burden on the connection manager
to watch netlink to see if the routes have changed, delete the
routes when their lifetime expires, etc.

Instead, this patch adds a per-interface sysctl to have the
kernel put autoconf routes into different tables. This allows
each interface to have its own autoconf table, and choosing the
default interface (or using different interfaces at the same
time for different types of traffic) can be done using
appropriate ip rules.

The sysctl behaves as follows:

- = 0: default. Put routes into RT6_TABLE_MAIN as before.
- > 0: manual. Put routes into the specified table.
- < 0: automatic. Add the absolute value of the sysctl to the
       device's ifindex, and use that table.

The automatic mode is most useful in conjunction with
net.ipv6.conf.default.accept_ra_rt_table. A connection manager
or distribution could set it to, say, -100 on boot, and
thereafter just use IP rules.

Change-Id: I82d16e3737d9cdfa6489e649e247894d0d60cbb1
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
include/linux/ipv6.h
include/net/addrconf.h
include/uapi/linux/ipv6.h
net/ipv6/addrconf.c
net/ipv6/route.c

index 850e95bc766c8504d4fbc2c592c1327ef5994431..867833ba6bd166dbfcc4e1259b6dfbcdd00d0650 100644 (file)
@@ -36,6 +36,7 @@ struct ipv6_devconf {
        __s32           accept_ra_rt_info_max_plen;
 #endif
 #endif
+       __s32           accept_ra_rt_table;
        __s32           proxy_ndp;
        __s32           accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
index 21f702704f2444272e1554c87112594d40acd421..96a8afe33c52d946d64d3f4b6e82a4099f1ad993 100644 (file)
@@ -183,6 +183,8 @@ static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset)
 extern void addrconf_prefix_rcv(struct net_device *dev,
                                u8 *opt, int len, bool sllao);
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table);
+
 /*
  *     anycast prototypes (anycast.c)
  */
index 4bda4cf5b0f56d84651497df86bd9fa09909fa85..4214fac1bf4fbeea0c702c440f39aaa6cad94291 100644 (file)
@@ -160,6 +160,7 @@ enum {
        DEVCONF_ACCEPT_DAD,
        DEVCONF_FORCE_TLLAO,
        DEVCONF_NDISC_NOTIFY,
+       DEVCONF_ACCEPT_RA_RT_TABLE,
        DEVCONF_MAX
 };
 
index 4ab4c38958c6857afd7cfe998fc8866a0da00382..cec8cb4d292db398f3d7cdd8d420f0cb3333be37 100644 (file)
@@ -198,6 +198,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+       .accept_ra_rt_table     = 0,
        .proxy_ndp              = 0,
        .accept_source_route    = 0,    /* we do not accept RH0 by default. */
        .disable_ipv6           = 0,
@@ -232,6 +233,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+       .accept_ra_rt_table     = 0,
        .proxy_ndp              = 0,
        .accept_source_route    = 0,    /* we do not accept RH0 by default. */
        .disable_ipv6           = 0,
@@ -1910,6 +1912,31 @@ static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp
 }
 #endif
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) {
+       /* Determines into what table to put autoconf PIO/RIO/default routes
+        * learned on this device.
+        *
+        * - If 0, use the same table for every device. This puts routes into
+        *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
+        *   (but note that these three are currently all equal to
+        *   RT6_TABLE_MAIN).
+        * - If > 0, use the specified table.
+        * - If < 0, put routes into table dev->ifindex + (-rt_table).
+        */
+       struct inet6_dev *idev = in6_dev_get(dev);
+       u32 table;
+       int sysctl = idev->cnf.accept_ra_rt_table;
+       if (sysctl == 0) {
+               table = default_table;
+       } else if (sysctl > 0) {
+               table = (u32) sysctl;
+       } else {
+               table = (unsigned) dev->ifindex + (-sysctl);
+       }
+       in6_dev_put(idev);
+       return table;
+}
+
 /*
  *     Add prefix route.
  */
@@ -1919,7 +1946,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
                      unsigned long expires, u32 flags)
 {
        struct fib6_config cfg = {
-               .fc_table = RT6_TABLE_PREFIX,
+               .fc_table = addrconf_rt_table(dev, RT6_TABLE_PREFIX),
                .fc_metric = IP6_RT_PRIO_ADDRCONF,
                .fc_ifindex = dev->ifindex,
                .fc_expires = expires,
@@ -1953,7 +1980,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
        struct rt6_info *rt = NULL;
        struct fib6_table *table;
 
-       table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+       table = fib6_get_table(dev_net(dev),
+                              addrconf_rt_table(dev, RT6_TABLE_PREFIX));
        if (table == NULL)
                return NULL;
 
@@ -4159,6 +4187,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
+       array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table;
        array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
        array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -4867,6 +4896,13 @@ static struct addrconf_sysctl_table
                },
 #endif
 #endif
+               {
+                       .procname       = "accept_ra_rt_table",
+                       .data           = &ipv6_devconf.accept_ra_rt_table,
+                       .maxlen         = sizeof(int),
+                       .mode           = 0644,
+                       .proc_handler   = proc_dointvec,
+               },
                {
                        .procname       = "proxy_ndp",
                        .data           = &ipv6_devconf.proxy_ndp,
index ad0aa6b0b86ae02f80b6b2184588605a3d5d7a6c..29f389caf522d897971aa0eba90c735ded9a340e 100644 (file)
@@ -85,13 +85,12 @@ static void         rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
                                        struct sk_buff *skb);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct rt6_info *rt6_add_route_info(struct net_device *dev,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex,
-                                          unsigned int pref);
-static struct rt6_info *rt6_get_route_info(struct net *net,
+                                          const struct in6_addr *gwaddr, unsigned int pref);
+static struct rt6_info *rt6_get_route_info(struct net_device *dev,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex);
+                                          const struct in6_addr *gwaddr);
 #endif
 
 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -643,7 +642,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                  const struct in6_addr *gwaddr)
 {
-       struct net *net = dev_net(dev);
        struct route_info *rinfo = (struct route_info *) opt;
        struct in6_addr prefix_buf, *prefix;
        unsigned int pref;
@@ -685,8 +683,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                prefix = &prefix_buf;
        }
 
-       rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
-                               dev->ifindex);
+       rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
 
        if (rt && !lifetime) {
                ip6_del_rt(rt);
@@ -694,8 +691,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
        }
 
        if (!rt && lifetime)
-               rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
-                                       pref);
+               rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
        else if (rt)
                rt->rt6i_flags = RTF_ROUTEINFO |
                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
@@ -1796,15 +1792,16 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct rt6_info *rt6_get_route_info(struct net_device *dev,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex)
+                                          const struct in6_addr *gwaddr)
 {
        struct fib6_node *fn;
        struct rt6_info *rt = NULL;
        struct fib6_table *table;
 
-       table = fib6_get_table(net, RT6_TABLE_INFO);
+       table = fib6_get_table(dev_net(dev),
+                              addrconf_rt_table(dev, RT6_TABLE_INFO));
        if (!table)
                return NULL;
 
@@ -1814,7 +1811,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
                goto out;
 
        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-               if (rt->dst.dev->ifindex != ifindex)
+               if (rt->dst.dev->ifindex != dev->ifindex)
                        continue;
                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
                        continue;
@@ -1828,21 +1825,20 @@ out:
        return rt;
 }
 
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct rt6_info *rt6_add_route_info(struct net_device *dev,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex,
-                                          unsigned int pref)
+                                          const struct in6_addr *gwaddr, unsigned int pref)
 {
        struct fib6_config cfg = {
-               .fc_table       = RT6_TABLE_INFO,
+               .fc_table       = addrconf_rt_table(dev, RT6_TABLE_INFO),
                .fc_metric      = IP6_RT_PRIO_USER,
-               .fc_ifindex     = ifindex,
+               .fc_ifindex     = dev->ifindex,
                .fc_dst_len     = prefixlen,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
                                  RTF_UP | RTF_PREF(pref),
                .fc_nlinfo.portid = 0,
                .fc_nlinfo.nlh = NULL,
-               .fc_nlinfo.nl_net = net,
+               .fc_nlinfo.nl_net = dev_net(dev),
        };
 
        cfg.fc_dst = *prefix;
@@ -1854,7 +1850,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 
        ip6_route_add(&cfg);
 
-       return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+       return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
 }
 #endif
 
@@ -1863,7 +1859,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
        struct rt6_info *rt;
        struct fib6_table *table;
 
-       table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+       table = fib6_get_table(dev_net(dev),
+                              addrconf_rt_table(dev, RT6_TABLE_MAIN));
        if (!table)
                return NULL;
 
@@ -1885,7 +1882,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
                                     unsigned int pref)
 {
        struct fib6_config cfg = {
-               .fc_table       = RT6_TABLE_DFLT,
+               .fc_table       = addrconf_rt_table(dev, RT6_TABLE_DFLT),
                .fc_metric      = IP6_RT_PRIO_USER,
                .fc_ifindex     = dev->ifindex,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -1902,28 +1899,17 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
        return rt6_get_dflt_router(gwaddr, dev);
 }
 
-void rt6_purge_dflt_routers(struct net *net)
-{
-       struct rt6_info *rt;
-       struct fib6_table *table;
 
-       /* NOTE: Keep consistent with rt6_get_dflt_router */
-       table = fib6_get_table(net, RT6_TABLE_DFLT);
-       if (!table)
-               return;
+int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
+       if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+           (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
+               return -1;
+       return 0;
+}
 
-restart:
-       read_lock_bh(&table->tb6_lock);
-       for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
-               if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
-                   (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-                       dst_hold(&rt->dst);
-                       read_unlock_bh(&table->tb6_lock);
-                       ip6_del_rt(rt);
-                       goto restart;
-               }
-       }
-       read_unlock_bh(&table->tb6_lock);
+void rt6_purge_dflt_routers(struct net *net)
+{
+       fib6_clean_all(net, rt6_addrconf_purge, 0, NULL);
 }
 
 static void rtmsg_to_fib6_config(struct net *net,