Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

[firefly-linux-kernel-4.4.55.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 00a41499d52c89c0961f6229d79f58022c25ba46..88fa2d1606859de25419d0d45c3095f6d410d42b 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -68,6 +68,7 @@
  #include <linux/module.h>
  #include <linux/sysctl.h>
  #include <linux/kernel.h>
+#include <linux/prefetch.h>
  #include <net/dst.h>
  #include <net/tcp.h>
  #include <net/inet_common.h>
@@ -2314,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
  
  /* Undo procedures. */
  
+/* We can clear retrans_stamp when there are no retransmissions in the
+ * window. It would seem that it is trivially available for us in
+ * tp->retrans_out, however, that kind of assumptions doesn't consider
+ * what will happen if errors occur when sending retransmission for the
+ * second time. ...It could the that such segment has only
+ * TCPCB_EVER_RETRANS set at the present time. It seems that checking
+ * the head skb is enough except for some reneging corner cases that
+ * are not worth the effort.
+ *
+ * Main reason for all this complexity is the fact that connection dying
+ * time now depends on the validity of the retrans_stamp, in particular,
+ * that successive retransmissions of a segment must not advance
+ * retrans_stamp under any conditions.
+ */
+static bool tcp_any_retrans_done(const struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *skb;
+
+       if (tp->retrans_out)
+               return true;
+
+       skb = tcp_write_queue_head(sk);
+       if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
+               return true;
+
+       return false;
+}
+
  #if FASTRETRANS_DEBUG > 1
  static void DBGUNDO(struct sock *sk, const char *msg)
  {
@@ -2409,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                  * is ACKed. For Reno it is MUST to prevent false
                  * fast retransmits (RFC2582). SACK TCP is safe. */
                 tcp_moderate_cwnd(tp);
+               if (!tcp_any_retrans_done(sk))
+                       tp->retrans_stamp = 0;
                 return true;
         }
         tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2429,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk)
         return false;
  }
  
-/* We can clear retrans_stamp when there are no retransmissions in the
- * window. It would seem that it is trivially available for us in
- * tp->retrans_out, however, that kind of assumptions doesn't consider
- * what will happen if errors occur when sending retransmission for the
- * second time. ...It could the that such segment has only
- * TCPCB_EVER_RETRANS set at the present time. It seems that checking
- * the head skb is enough except for some reneging corner cases that
- * are not worth the effort.
- *
- * Main reason for all this complexity is the fact that connection dying
- * time now depends on the validity of the retrans_stamp, in particular,
- * that successive retransmissions of a segment must not advance
- * retrans_stamp under any conditions.
- */
-static bool tcp_any_retrans_done(const struct sock *sk)
-{
-       const struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb;
-
-       if (tp->retrans_out)
-               return true;
-
-       skb = tcp_write_queue_head(sk);
-       if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
-               return true;
-
-       return false;
-}
-
  /* Undo during loss recovery after partial ACK or using F-RTO. */
  static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
  {
@@ -3029,6 +3032,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
         return packets_acked;
  }
  
+static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
+                          u32 prior_snd_una)
+{
+       const struct skb_shared_info *shinfo;
+
+       /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
+       if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
+               return;
+
+       shinfo = skb_shinfo(skb);
+       if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+           between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
+               __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+}
+
  /* Remove acknowledged frames from the retransmission queue. If our packet
   * is before the ack sequence we can discard it as it's confirmed to have
   * arrived at the other end.
@@ -3052,14 +3070,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
         first_ackt.v64 = 0;
  
         while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
-               struct skb_shared_info *shinfo = skb_shinfo(skb);
                 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
                 u8 sacked = scb->sacked;
                 u32 acked_pcount;
  
-               if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-                   between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
-                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+               tcp_ack_tstamp(sk, skb, prior_snd_una);
  
                 /* Determine how many packets and what bytes were acked, tso and else */
                 if (after(scb->end_seq, tp->snd_una)) {
@@ -3073,10 +3088,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  
                         fully_acked = false;
                 } else {
+                       /* Speedup tcp_unlink_write_queue() and next loop */
+                       prefetchw(skb->next);
                         acked_pcount = tcp_skb_pcount(skb);
                 }
  
-               if (sacked & TCPCB_RETRANS) {
+               if (unlikely(sacked & TCPCB_RETRANS)) {
                         if (sacked & TCPCB_SACKED_RETRANS)
                                 tp->retrans_out -= acked_pcount;
                         flag |= FLAG_RETRANS_DATA_ACKED;
@@ -3107,7 +3124,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                  * connection startup slow start one packet too
                  * quickly.  This is severely frowned upon behavior.
                  */
-               if (!(scb->tcp_flags & TCPHDR_SYN)) {
+               if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
                         flag |= FLAG_DATA_ACKED;
                 } else {
                         flag |= FLAG_SYN_ACKED;
@@ -3119,9 +3136,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  
                 tcp_unlink_write_queue(skb, sk);
                 sk_wmem_free_skb(sk, skb);
-               if (skb == tp->retransmit_skb_hint)
+               if (unlikely(skb == tp->retransmit_skb_hint))
                         tp->retransmit_skb_hint = NULL;
-               if (skb == tp->lost_skb_hint)
+               if (unlikely(skb == tp->lost_skb_hint))
                         tp->lost_skb_hint = NULL;
         }
  
@@ -3132,7 +3149,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 flag |= FLAG_SACK_RENEGING;
  
         skb_mstamp_get(&now);
-       if (first_ackt.v64) {
+       if (likely(first_ackt.v64)) {
                 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
                 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
         }
@@ -3394,6 +3411,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         int acked = 0; /* Number of packets newly acked */
         long sack_rtt_us = -1L;
  
+       /* We very likely will need to access write queue head. */
+       prefetchw(sk->sk_write_queue.next);
+
         /* If the ack is older than previous acks
          * then we can probably ignore it.
          */