ipv4: try to cache dst_entries which would cause a redirect
authorHannes Frederic Sowa <hannes@stressinduktion.org>
Fri, 23 Jan 2015 11:01:26 +0000 (12:01 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 27 Feb 2015 01:48:48 +0000 (17:48 -0800)
[ Upstream commit df4d92549f23e1c037e83323aff58a21b3de7fe0 ]

Not caching dst_entries which cause redirects could be exploited by hosts
on the same subnet, causing a severe DoS attack. This effect aggravated
since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()").

Lookups causing redirects will be allocated with DST_NOCACHE set which
will force dst_release to free them via RCU.  Unfortunately waiting for
RCU grace period just takes too long, we can end up with >1M dst_entries
waiting to be released and the system will run OOM. rcuos threads cannot
catch up under high softirq load.

Attaching the flag to emit a redirect later on to the specific skb allows
us to cache those dst_entries thus reducing the pressure on allocation
and deallocation.

This issue was discovered by Marcelo Leitner.

Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Leitner <mleitner@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/net/ip.h
net/ipv4/ip_forward.c
net/ipv4/route.c

index 8695359982d1c62b98662787019cc7cd2982ba6e..e47ad4c01608ed2942b88f6c1271f9c5798c6340 100644 (file)
@@ -37,11 +37,12 @@ struct inet_skb_parm {
        struct ip_options       opt;            /* Compiled IP options          */
        unsigned char           flags;
 
        struct ip_options       opt;            /* Compiled IP options          */
        unsigned char           flags;
 
-#define IPSKB_FORWARDED                1
-#define IPSKB_XFRM_TUNNEL_SIZE 2
-#define IPSKB_XFRM_TRANSFORMED 4
-#define IPSKB_FRAG_COMPLETE    8
-#define IPSKB_REROUTED         16
+#define IPSKB_FORWARDED                BIT(0)
+#define IPSKB_XFRM_TUNNEL_SIZE BIT(1)
+#define IPSKB_XFRM_TRANSFORMED BIT(2)
+#define IPSKB_FRAG_COMPLETE    BIT(3)
+#define IPSKB_REROUTED         BIT(4)
+#define IPSKB_DOREDIRECT       BIT(5)
 
        u16                     frag_max_size;
 };
 
        u16                     frag_max_size;
 };
index bd1c5baf69bef5c3511a787284cf8a7b4b1b9ef0..31ee5c6033dfe6ec87d9adfacb688025faeda2bc 100644 (file)
@@ -175,7 +175,8 @@ int ip_forward(struct sk_buff *skb)
         *      We now generate an ICMP HOST REDIRECT giving the route
         *      we calculated.
         */
         *      We now generate an ICMP HOST REDIRECT giving the route
         *      we calculated.
         */
-       if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb))
+       if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr &&
+           !skb_sec_path(skb))
                ip_rt_send_redirect(skb);
 
        skb->priority = rt_tos2priority(iph->tos);
                ip_rt_send_redirect(skb);
 
        skb->priority = rt_tos2priority(iph->tos);
index d4d162eac4df095821a20191e41904e575d6e533..e23c5f64286b6c1611b5b7ebd5126b5064e2ce94 100644 (file)
@@ -1514,11 +1514,10 @@ static int __mkroute_input(struct sk_buff *skb,
 
        do_cache = res->fi && !itag;
        if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
 
        do_cache = res->fi && !itag;
        if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
+           skb->protocol == htons(ETH_P_IP) &&
            (IN_DEV_SHARED_MEDIA(out_dev) ||
            (IN_DEV_SHARED_MEDIA(out_dev) ||
-            inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
-               flags |= RTCF_DOREDIRECT;
-               do_cache = false;
-       }
+            inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+               IPCB(skb)->flags |= IPSKB_DOREDIRECT;
 
        if (skb->protocol != htons(ETH_P_IP)) {
                /* Not IP (i.e. ARP). Do not create route, if it is
 
        if (skb->protocol != htons(ETH_P_IP)) {
                /* Not IP (i.e. ARP). Do not create route, if it is
@@ -2255,6 +2254,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
        r->rtm_flags    = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
        if (rt->rt_flags & RTCF_NOTIFY)
                r->rtm_flags |= RTM_F_NOTIFY;
        r->rtm_flags    = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
        if (rt->rt_flags & RTCF_NOTIFY)
                r->rtm_flags |= RTM_F_NOTIFY;
+       if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
+               r->rtm_flags |= RTCF_DOREDIRECT;
 
        if (nla_put_be32(skb, RTA_DST, dst))
                goto nla_put_failure;
 
        if (nla_put_be32(skb, RTA_DST, dst))
                goto nla_put_failure;