dctcp: avoid bogus doubling of cwnd after loss
authorFlorian Westphal <fw@strlen.de>
Fri, 28 Oct 2016 16:43:11 +0000 (18:43 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 Nov 2016 09:06:39 +0000 (10:06 +0100)
[ Upstream commit ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee ]

If a congestion control module doesn't provide .undo_cwnd function,
tcp_undo_cwnd_reduction() will set cwnd to

   tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);

... which makes sense for reno (it sets ssthresh to half the current cwnd),
but it makes no sense for dctcp, which sets ssthresh based on the current
congestion estimate.

This can cause severe growth of cwnd (eventually overflowing u32).

Fix this by saving last cwnd on loss and restore cwnd based on that,
similar to cubic and other algorithms.

Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm")
Cc: Lawrence Brakmo <brakmo@fb.com>
Cc: Andrew Shewmaker <agshew@gmail.com>
Cc: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/ipv4/tcp_dctcp.c

index 7e538f71f5fbae087c3e3e4367d60e08cd609ac5..55d7da1d2ce9912ad9efc654206a402913d8b831 100644 (file)
@@ -56,6 +56,7 @@ struct dctcp {
        u32 next_seq;
        u32 ce_state;
        u32 delayed_ack_reserved;
+       u32 loss_cwnd;
 };
 
 static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
                ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
 
                ca->delayed_ack_reserved = 0;
+               ca->loss_cwnd = 0;
                ca->ce_state = 0;
 
                dctcp_reset(tp, ca);
@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
 
 static u32 dctcp_ssthresh(struct sock *sk)
 {
-       const struct dctcp *ca = inet_csk_ca(sk);
+       struct dctcp *ca = inet_csk_ca(sk);
        struct tcp_sock *tp = tcp_sk(sk);
 
+       ca->loss_cwnd = tp->snd_cwnd;
        return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
 }
 
@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
        return 0;
 }
 
+static u32 dctcp_cwnd_undo(struct sock *sk)
+{
+       const struct dctcp *ca = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
 static struct tcp_congestion_ops dctcp __read_mostly = {
        .init           = dctcp_init,
        .in_ack_event   = dctcp_update_alpha,
        .cwnd_event     = dctcp_cwnd_event,
        .ssthresh       = dctcp_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,
+       .undo_cwnd      = dctcp_cwnd_undo,
        .set_state      = dctcp_state,
        .get_info       = dctcp_get_info,
        .flags          = TCP_CONG_NEEDS_ECN,