Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / tcp.c
index f1377f2a0472ec26e88b92be2346cbc3c8a69b41..7f4056785accb76eec60e22dc0bb19febc98f75f 100644 (file)
@@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
        struct tcp_splice_state *tss = rd_desc->arg.data;
        int ret;
 
-       ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
-                             tss->flags);
+       ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
+                             min(rd_desc->count, len), tss->flags,
+                             skb_socket_splice);
        if (ret > 0)
                rd_desc->count -= ret;
        return ret;
@@ -809,16 +810,28 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 }
 EXPORT_SYMBOL(tcp_splice_read);
 
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+                                   bool force_schedule)
 {
        struct sk_buff *skb;
 
        /* The TCP header must be at least 32-bit aligned.  */
        size = ALIGN(size, 4);
 
+       if (unlikely(tcp_under_memory_pressure(sk)))
+               sk_mem_reclaim_partial(sk);
+
        skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
-       if (skb) {
-               if (sk_wmem_schedule(sk, skb->truesize)) {
+       if (likely(skb)) {
+               bool mem_scheduled;
+
+               if (force_schedule) {
+                       mem_scheduled = true;
+                       sk_forced_mem_schedule(sk, skb->truesize);
+               } else {
+                       mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
+               }
+               if (likely(mem_scheduled)) {
                        skb_reserve(skb, sk->sk_prot->max_header);
                        /*
                         * Make sure that we have exactly size bytes
@@ -908,7 +921,8 @@ new_segment:
                        if (!sk_stream_memory_free(sk))
                                goto wait_for_sndbuf;
 
-                       skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+                       skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
+                                                 skb_queue_empty(&sk->sk_write_queue));
                        if (!skb)
                                goto wait_for_memory;
 
@@ -987,6 +1001,9 @@ do_error:
        if (copied)
                goto out;
 out_err:
+       /* make sure we wake any epoll edge trigger waiter */
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+               sk->sk_write_space(sk);
        return sk_stream_error(sk, flags, err);
 }
 
@@ -1144,7 +1161,8 @@ new_segment:
 
                        skb = sk_stream_alloc_skb(sk,
                                                  select_size(sk, sg),
-                                                 sk->sk_allocation);
+                                                 sk->sk_allocation,
+                                                 skb_queue_empty(&sk->sk_write_queue));
                        if (!skb)
                                goto wait_for_memory;
 
@@ -1275,6 +1293,9 @@ do_error:
                goto out;
 out_err:
        err = sk_stream_error(sk, flags, err);
+       /* make sure we wake any epoll edge trigger waiter */
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+               sk->sk_write_space(sk);
        release_sock(sk);
        return err;
 }
@@ -2483,6 +2504,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                        icsk->icsk_syn_retries = val;
                break;
 
+       case TCP_SAVE_SYN:
+               if (val < 0 || val > 1)
+                       err = -EINVAL;
+               else
+                       tp->save_syn = val;
+               break;
+
        case TCP_LINGER2:
                if (val < 0)
                        tp->linger2 = -1;
@@ -2545,10 +2573,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 
        case TCP_FASTOPEN:
                if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
-                   TCPF_LISTEN)))
+                   TCPF_LISTEN))) {
+                       tcp_fastopen_init_key_once(true);
+
                        err = fastopen_init_queue(sk, val);
-               else
+               } else {
                        err = -EINVAL;
+               }
                break;
        case TCP_TIMESTAMP:
                if (!tp->repair)
@@ -2596,13 +2627,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 /* Return information about state of tcp endpoint in API format. */
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
-       const struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
        unsigned int start;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
+       if (sk->sk_type != SOCK_STREAM)
+               return;
 
        info->tcpi_state = sk->sk_state;
        info->tcpi_ca_state = icsk->icsk_ca_state;
@@ -2672,6 +2705,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
                info->tcpi_bytes_acked = tp->bytes_acked;
                info->tcpi_bytes_received = tp->bytes_received;
        } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
+       info->tcpi_segs_out = tp->segs_out;
+       info->tcpi_segs_in = tp->segs_in;
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
@@ -2821,6 +2856,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
        case TCP_NOTSENT_LOWAT:
                val = tp->notsent_lowat;
                break;
+       case TCP_SAVE_SYN:
+               val = tp->save_syn;
+               break;
+       case TCP_SAVED_SYN: {
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               lock_sock(sk);
+               if (tp->saved_syn) {
+                       if (len < tp->saved_syn[0]) {
+                               if (put_user(tp->saved_syn[0], optlen)) {
+                                       release_sock(sk);
+                                       return -EFAULT;
+                               }
+                               release_sock(sk);
+                               return -EINVAL;
+                       }
+                       len = tp->saved_syn[0];
+                       if (put_user(len, optlen)) {
+                               release_sock(sk);
+                               return -EFAULT;
+                       }
+                       if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+                               release_sock(sk);
+                               return -EFAULT;
+                       }
+                       tcp_saved_syn_free(tp);
+                       release_sock(sk);
+               } else {
+                       release_sock(sk);
+                       len = 0;
+                       if (put_user(len, optlen))
+                               return -EFAULT;
+               }
+               return 0;
+       }
        default:
                return -ENOPROTOOPT;
        }
@@ -3025,11 +3096,12 @@ __setup("thash_entries=", set_thash_entries);
 
 static void __init tcp_init_mem(void)
 {
-       unsigned long limit = nr_free_buffer_pages() / 8;
+       unsigned long limit = nr_free_buffer_pages() / 16;
+
        limit = max(limit, 128UL);
-       sysctl_tcp_mem[0] = limit / 4 * 3;
-       sysctl_tcp_mem[1] = limit;
-       sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
+       sysctl_tcp_mem[0] = limit / 4 * 3;              /* 4.68 % */
+       sysctl_tcp_mem[1] = limit;                      /* 6.25 % */
+       sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;      /* 9.37 % */
 }
 
 void __init tcp_init(void)