Merge remote-tracking branch 'lsk/v3.10/topic/juno' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / tcp.c
index ab450c099aa49a3d4b68ca531e7f5426a8eabaf1..39bdb14b32140181193d31630a96257d42d839a7 100644 (file)
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
+
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
@@ -786,14 +788,24 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
        xmit_size_goal = mss_now;
 
        if (large_allowed && sk_can_gso(sk)) {
-               xmit_size_goal = ((sk->sk_gso_max_size - 1) -
-                                 inet_csk(sk)->icsk_af_ops->net_header_len -
-                                 inet_csk(sk)->icsk_ext_hdr_len -
-                                 tp->tcp_header_len);
+               u32 gso_size, hlen;
+
+               /* Maybe we should/could use sk->sk_prot->max_header here ? */
+               hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+                      inet_csk(sk)->icsk_ext_hdr_len +
+                      tp->tcp_header_len;
+
+               /* Goal is to send at least one packet per ms,
+                * not one big TSO packet every 100 ms.
+                * This preserves ACK clocking and is consistent
+                * with tcp_tso_should_defer() heuristic.
+                */
+               gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+               gso_size = max_t(u32, gso_size,
+                                sysctl_tcp_min_tso_segs * mss_now);
 
-               /* TSQ : try to have two TSO segments in flight */
-               xmit_size_goal = min_t(u32, xmit_size_goal,
-                                      sysctl_tcp_limit_output_bytes >> 1);
+               xmit_size_goal = min_t(u32, gso_size,
+                                      sk->sk_gso_max_size - 1 - hlen);
 
                xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
 
@@ -989,7 +1001,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
        }
 }
 
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+                               int *copied, size_t size)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int err, flags;
@@ -1004,11 +1017,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
        if (unlikely(tp->fastopen_req == NULL))
                return -ENOBUFS;
        tp->fastopen_req->data = msg;
+       tp->fastopen_req->size = size;
 
        flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
        err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
                                    msg->msg_namelen, flags);
-       *size = tp->fastopen_req->copied;
+       *copied = tp->fastopen_req->copied;
        tcp_free_fastopen_req(tp);
        return err;
 }
@@ -1028,7 +1042,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        flags = msg->msg_flags;
        if (flags & MSG_FASTOPEN) {
-               err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+               err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
                if (err == -EINPROGRESS && copied_syn > 0)
                        goto out;
                else if (err)
@@ -1117,6 +1131,13 @@ new_segment:
                                if (!skb)
                                        goto wait_for_memory;
 
+                               /*
+                                * All packets are restored as if they have
+                                * already been sent.
+                                */
+                               if (tp->repair)
+                                       TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
                                /*
                                 * Check whether we can use HW checksum.
                                 */
@@ -2440,10 +2461,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
        case TCP_THIN_DUPACK:
                if (val < 0 || val > 1)
                        err = -EINVAL;
-               else
+               else {
                        tp->thin_dupack = val;
                        if (tp->thin_dupack)
                                tcp_disable_early_retrans(tp);
+               }
                break;
 
        case TCP_REPAIR:
@@ -2879,6 +2901,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
        netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
+       unsigned int sum_truesize = 0;
        struct tcphdr *th;
        unsigned int thlen;
        unsigned int seq;
@@ -2962,13 +2985,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
                if (copy_destructor) {
                        skb->destructor = gso_skb->destructor;
                        skb->sk = gso_skb->sk;
-                       /* {tcp|sock}_wfree() use exact truesize accounting :
-                        * sum(skb->truesize) MUST be exactly be gso_skb->truesize
-                        * So we account mss bytes of 'true size' for each segment.
-                        * The last segment will contain the remaining.
-                        */
-                       skb->truesize = mss;
-                       gso_skb->truesize -= mss;
+                       sum_truesize += skb->truesize;
                }
                skb = skb->next;
                th = tcp_hdr(skb);
@@ -2985,7 +3002,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
        if (copy_destructor) {
                swap(gso_skb->sk, skb->sk);
                swap(gso_skb->destructor, skb->destructor);
-               swap(gso_skb->truesize, skb->truesize);
+               sum_truesize += skb->truesize;
+               atomic_add(sum_truesize - gso_skb->truesize,
+                          &skb->sk->sk_wmem_alloc);
        }
 
        delta = htonl(oldlen + (skb->tail - skb->transport_header) +