Merge remote-tracking branch 'stable/linux-3.0.y' into develop-3.0
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / tcp.c
index 886721da4888fbacfbe77edc2c52b594cacf8be7..4a2d6f50be8592b4634b0612775a11829c132aeb 100644 (file)
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
 #include <net/netdma.h>
 #include <net/sock.h>
 
@@ -284,7 +286,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
 int sysctl_tcp_wmem[3] __read_mostly;
 int sysctl_tcp_rmem[3] __read_mostly;
 
@@ -292,7 +294,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
 EXPORT_SYMBOL(sysctl_tcp_rmem);
 EXPORT_SYMBOL(sysctl_tcp_wmem);
 
-atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_long_t tcp_memory_allocated;    /* Current allocated memory. */
 EXPORT_SYMBOL(tcp_memory_allocated);
 
 /*
@@ -507,6 +509,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                else
                        answ = tp->write_seq - tp->snd_una;
                break;
+       case SIOCOUTQNSD:
+               if (sk->sk_state == TCP_LISTEN)
+                       return -EINVAL;
+
+               if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+                       answ = 0;
+               else
+                       answ = tp->write_seq - tp->snd_nxt;
+               break;
        default:
                return -ENOIOCTLCMD;
        }
@@ -732,7 +743,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                           old_size_goal + mss_now > xmit_size_goal)) {
                        xmit_size_goal = old_size_goal;
                } else {
-                       tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+                       tp->xmit_size_goal_segs =
+                               min_t(u16, xmit_size_goal / mss_now,
+                                     sk->sk_gso_max_segs);
                        xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
                }
        }
@@ -843,8 +856,7 @@ new_segment:
 wait_for_sndbuf:
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-               if (copied)
-                       tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+               tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
                if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
                        goto do_error;
@@ -853,7 +865,7 @@ wait_for_memory:
        }
 
 out:
-       if (copied)
+       if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
                tcp_push(sk, flags, mss_now, tp->nonagle);
        return copied;
 
@@ -875,9 +887,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
                                        flags);
 
        lock_sock(sk);
-       TCP_CHECK_TIMER(sk);
        res = do_tcp_sendpages(sk, &page, offset, size, flags);
-       TCP_CHECK_TIMER(sk);
        release_sock(sk);
        return res;
 }
@@ -918,7 +928,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        long timeo;
 
        lock_sock(sk);
-       TCP_CHECK_TIMER(sk);
 
        flags = msg->msg_flags;
        timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -995,7 +1004,8 @@ new_segment:
                                /* We have some space in skb head. Superb! */
                                if (copy > skb_tailroom(skb))
                                        copy = skb_tailroom(skb);
-                               if ((err = skb_add_data(skb, from, copy)) != 0)
+                               err = skb_add_data_nocache(sk, skb, from, copy);
+                               if (err)
                                        goto do_fault;
                        } else {
                                int merge = 0;
@@ -1038,8 +1048,8 @@ new_segment:
 
                                /* Time to copy data. We are close to
                                 * the end! */
-                               err = skb_copy_to_page(sk, from, skb, page,
-                                                      off, copy);
+                               err = skb_copy_to_page_nocache(sk, from, skb,
+                                                              page, off, copy);
                                if (err) {
                                        /* If this page was new, give it to the
                                         * socket so it does not get leaked.
@@ -1106,7 +1116,6 @@ wait_for_memory:
 out:
        if (copied)
                tcp_push(sk, flags, mss_now, tp->nonagle);
-       TCP_CHECK_TIMER(sk);
        release_sock(sk);
 
        if (copied > 0)
@@ -1128,7 +1137,6 @@ do_error:
                goto out;
 out_err:
        err = sk_stream_error(sk, flags, err);
-       TCP_CHECK_TIMER(sk);
        release_sock(sk);
        return err;
 }
@@ -1198,7 +1206,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
        struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
        WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
-            KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
+            "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
             tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
 #endif
 
@@ -1423,8 +1431,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        lock_sock(sk);
 
-       TCP_CHECK_TIMER(sk);
-
        err = -ENOTCONN;
        if (sk->sk_state == TCP_LISTEN)
                goto out;
@@ -1485,10 +1491,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                         * shouldn't happen.
                         */
                        if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
-                            KERN_INFO "recvmsg bug: copied %X "
-                                      "seq %X rcvnxt %X fl %X\n", *seq,
-                                      TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
-                                      flags))
+                                "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+                                *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
+                                flags))
                                break;
 
                        offset = *seq - TCP_SKB_CB(skb)->seq;
@@ -1498,10 +1503,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                                goto found_ok_skb;
                        if (tcp_hdr(skb)->fin)
                                goto found_fin_ok;
-                       WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: "
-                                       "copied %X seq %X rcvnxt %X fl %X\n",
-                                       *seq, TCP_SKB_CB(skb)->seq,
-                                       tp->rcv_nxt, flags);
+                       WARN(!(flags & MSG_PEEK),
+                            "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+                            *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
                }
 
                /* Well, if we have backlog, try to process it now yet. */
@@ -1777,7 +1781,6 @@ skip_copy:
        /* Clean up data we have read: This will do ACK frames. */
        tcp_cleanup_rbuf(sk, copied);
 
-       TCP_CHECK_TIMER(sk);
        release_sock(sk);
 
        if (copied > 0)
@@ -1785,7 +1788,6 @@ skip_copy:
        return copied;
 
 out:
-       TCP_CHECK_TIMER(sk);
        release_sock(sk);
        return err;
 
@@ -2405,7 +2407,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                err = tp->af_specific->md5_parse(sk, optval, optlen);
                break;
 #endif
-
+       case TCP_USER_TIMEOUT:
+               /* Cap the max timeout in ms TCP will retry/retrans
+                * before giving up and aborting (ETIMEDOUT) a connection.
+                */
+               if (val < 0)
+                       err = -EINVAL;
+               else
+                       icsk->icsk_user_timeout = msecs_to_jiffies(val);
+               break;
        default:
                err = -ENOPROTOOPT;
                break;
@@ -2624,6 +2634,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
        case TCP_THIN_DUPACK:
                val = tp->thin_dupack;
                break;
+
+       case TCP_USER_TIMEOUT:
+               val = jiffies_to_msecs(icsk->icsk_user_timeout);
+               break;
        default:
                return -ENOPROTOOPT;
        }
@@ -2659,7 +2673,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        struct tcphdr *th;
@@ -3225,8 +3239,8 @@ __setup("thash_entries=", set_thash_entries);
 void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
-       unsigned long nr_pages, limit;
-       int i, max_share, cnt;
+       unsigned long limit;
+       int i, max_rshare, max_wshare, cnt;
        unsigned long jiffy = jiffies;
 
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3282,13 +3296,7 @@ void __init tcp_init(void)
        sysctl_tcp_max_orphans = cnt / 2;
        sysctl_max_syn_backlog = max(128, cnt / 256);
 
-       /* Set the pressure threshold to be a fraction of global memory that
-        * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
-        * memory, with a floor of 128 pages.
-        */
-       nr_pages = totalram_pages - totalhigh_pages;
-       limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
-       limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+       limit = nr_free_buffer_pages() / 8;
        limit = max(limit, 128UL);
        sysctl_tcp_mem[0] = limit / 4 * 3;
        sysctl_tcp_mem[1] = limit;
@@ -3296,15 +3304,16 @@ void __init tcp_init(void)
 
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
        limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
-       max_share = min(4UL*1024*1024, limit);
+       max_wshare = min(4UL*1024*1024, limit);
+       max_rshare = min(6UL*1024*1024, limit);
 
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_wmem[1] = 16*1024;
-       sysctl_tcp_wmem[2] = max(64*1024, max_share);
+       sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
        sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_rmem[1] = 87380;
-       sysctl_tcp_rmem[2] = max(87380, max_share);
+       sysctl_tcp_rmem[2] = max(87380, max_rshare);
 
        printk(KERN_INFO "TCP: Hash tables configured "
               "(established %u bind %u)\n",
@@ -3324,8 +3333,9 @@ void __init tcp_init(void)
 
 static int tcp_is_local(struct net *net, __be32 addr) {
        struct rtable *rt;
-       struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
-       if (ip_route_output_key(net, &rt, &fl) || !rt)
+       struct flowi4 fl4 = { .daddr = addr };
+       rt = ip_route_output_key(net, &fl4);
+       if (IS_ERR_OR_NULL(rt))
                return 0;
        return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK);
 }
@@ -3371,8 +3381,16 @@ restart:
                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
                        struct inet_sock *inet = inet_sk(sk);
 
+                       if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
+                               continue;
+                       if (sock_flag(sk, SOCK_DEAD))
+                               continue;
+
                        if (family == AF_INET) {
                                __be32 s4 = inet->inet_rcv_saddr;
+                               if (s4 == LOOPBACK4_IPV6)
+                                       continue;
+
                                if (in->s_addr != s4 &&
                                    !(in->s_addr == INADDR_ANY &&
                                      !tcp_is_local(net, s4)))
@@ -3384,7 +3402,11 @@ restart:
                                struct in6_addr *s6;
                                if (!inet->pinet6)
                                        continue;
+
                                s6 = &inet->pinet6->rcv_saddr;
+                               if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED)
+                                       continue;
+
                                if (!ipv6_addr_equal(in6, s6) &&
                                    !(ipv6_addr_equal(in6, &in6addr_any) &&
                                      !tcp_is_local6(net, s6)))
@@ -3392,11 +3414,6 @@ restart:
                        }
 #endif
 
-                       if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
-                               continue;
-                       if (sock_flag(sk, SOCK_DEAD))
-                               continue;
-
                        sock_hold(sk);
                        spin_unlock_bh(lock);