Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[linux-drm-fsl-dcu.git] / net / ipv4 / tcp.c
index 8e8529d3c8c92a4473a3ed9fd8db876ca83533bd..0ca87547becbe939c19e93c6f68983c40087da86 100644 (file)
@@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
 int sysctl_tcp_min_tso_segs __read_mostly = 2;
 
+int sysctl_tcp_autocorking __read_mostly = 1;
+
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
@@ -619,19 +621,52 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
                tp->snd_up = tp->write_seq;
 }
 
-static inline void tcp_push(struct sock *sk, int flags, int mss_now,
-                           int nonagle)
+/* If a not yet filled skb is pushed, do not send it if
+ * we have packets in Qdisc or NIC queues :
+ * Because TX completion will happen shortly, it gives a chance
+ * to coalesce future sendmsg() payload into this skb, without
+ * need for a timer, and with no latency trade off.
+ * As packets containing data payload have a bigger truesize
+ * than pure acks (dataless) packets, the last check prevents
+ * autocorking if we only have an ACK in Qdisc/NIC queues.
+ */
+static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+                               int size_goal)
 {
-       if (tcp_send_head(sk)) {
-               struct tcp_sock *tp = tcp_sk(sk);
+       return skb->len < size_goal &&
+              sysctl_tcp_autocorking &&
+              atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
+}
+
+static void tcp_push(struct sock *sk, int flags, int mss_now,
+                    int nonagle, int size_goal)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *skb;
+
+       if (!tcp_send_head(sk))
+               return;
+
+       skb = tcp_write_queue_tail(sk);
+       if (!(flags & MSG_MORE) || forced_push(tp))
+               tcp_mark_push(tp, skb);
+
+       tcp_mark_urg(tp, flags);
 
-               if (!(flags & MSG_MORE) || forced_push(tp))
-                       tcp_mark_push(tp, tcp_write_queue_tail(sk));
+       if (tcp_should_autocork(sk, skb, size_goal)) {
 
-               tcp_mark_urg(tp, flags);
-               __tcp_push_pending_frames(sk, mss_now,
-                                         (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
+               /* avoid atomic op if TSQ_THROTTLED bit is already set */
+               if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
+                       set_bit(TSQ_THROTTLED, &tp->tsq_flags);
+               }
+               return;
        }
+
+       if (flags & MSG_MORE)
+               nonagle = TCP_NAGLE_CORK;
+
+       __tcp_push_pending_frames(sk, mss_now, nonagle);
 }
 
 static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
@@ -808,12 +843,6 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                xmit_size_goal = min_t(u32, gso_size,
                                       sk->sk_gso_max_size - 1 - hlen);
 
-               /* TSQ : try to have at least two segments in flight
-                * (one in NIC TX ring, another in Qdisc)
-                */
-               xmit_size_goal = min_t(u32, xmit_size_goal,
-                                      sysctl_tcp_limit_output_bytes >> 1);
-
                xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
 
                /* We try hard to avoid divides here */
@@ -940,7 +969,8 @@ new_segment:
 wait_for_sndbuf:
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-               tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+               tcp_push(sk, flags & ~MSG_MORE, mss_now,
+                        TCP_NAGLE_PUSH, size_goal);
 
                if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
                        goto do_error;
@@ -950,7 +980,7 @@ wait_for_memory:
 
 out:
        if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
-               tcp_push(sk, flags, mss_now, tp->nonagle);
+               tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
        return copied;
 
 do_error:
@@ -1231,7 +1261,8 @@ wait_for_sndbuf:
                        set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
                        if (copied)
-                               tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+                               tcp_push(sk, flags & ~MSG_MORE, mss_now,
+                                        TCP_NAGLE_PUSH, size_goal);
 
                        if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
                                goto do_error;
@@ -1242,7 +1273,7 @@ wait_for_memory:
 
 out:
        if (copied)
-               tcp_push(sk, flags, mss_now, tp->nonagle);
+               tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
        release_sock(sk);
        return copied + copied_syn;
 
@@ -1431,7 +1462,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
        do {
                if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
                                              last_issued, &done,
-                                             &used) == DMA_SUCCESS) {
+                                             &used) == DMA_COMPLETE) {
                        /* Safe to free early-copied skbs now */
                        __skb_queue_purge(&sk->sk_async_wait_queue);
                        break;
@@ -1439,7 +1470,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
                        struct sk_buff *skb;
                        while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
                               (dma_async_is_complete(skb->dma_cookie, done,
-                                                     used) == DMA_SUCCESS)) {
+                                                     used) == DMA_COMPLETE)) {
                                __skb_dequeue(&sk->sk_async_wait_queue);
                                kfree_skb(skb);
                        }