*/
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
+ shinfo->gso_segs = 1;
+ shinfo->gso_size = 0;
+ shinfo->gso_type = 0;
TCP_SKB_CB(skb)->seq = seq;
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
* Beware: Something in the Internet is very sensitive to the ordering of
* TCP options, we learned this through the hard way, so be careful here.
* Luckily we can at least blame others for their non-compliance but from
- * inter-operatibility perspective it seems that we're somewhat stuck with
+ * inter-operability perspective it seems that we're somewhat stuck with
* the ordering which we have been using if we want to keep working with
* those broken things (not that it currently hurts anybody as there isn't
* particular reason why the ordering would need to be changed).
*
* Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
* needs to be reallocated in a driver.
- * The invariant being skb->truesize substracted from sk->sk_wmem_alloc
+ * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
*
* Since transmit from skb destructor is forbidden, we use a tasklet
* to process all sockets that eventually need to send more skbs.
tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
}
/*
- * One tasklest per cpu tries to send more skbs.
+ * One tasklet per cpu tries to send more skbs.
* We run in tasklet context but need to disable irqs when
- * transfering tsq->head because tcp_wfree() might
+ * transferring tsq->head because tcp_wfree() might
* interrupt us (non NAPI drivers)
*/
static void tcp_tasklet_func(unsigned long data)
/*
* Write buffer destructor automatically called from kfree_skb.
- * We cant xmit new skbs from this context, as we might already
+ * We can't xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
void tcp_wfree(struct sk_buff *skb)
BUG_ON(!skb || !tcp_skb_pcount(skb));
- /* If congestion control is doing timestamping, we must
- * take such a timestamp before we potentially clone/copy.
- */
- if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
- __net_timestamp(skb);
-
- if (likely(clone_it)) {
+ if (clone_it) {
const struct sk_buff *fclone = skb + 1;
+ /* If congestion control is doing timestamping, we must
+ * take such a timestamp before we potentially clone/copy.
+ */
+ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
+ __net_timestamp(skb);
+
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE))
NET_INC_STATS_BH(sock_net(sk),
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
/* Make sure we own this skb before messing gso_size/gso_segs */
WARN_ON_ONCE(skb_cloned(skb));
/* Avoid the costly divide in the normal
* non-TSO case.
*/
- skb_shinfo(skb)->gso_segs = 1;
- skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type = 0;
+ shinfo->gso_segs = 1;
+ shinfo->gso_size = 0;
+ shinfo->gso_type = 0;
} else {
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
- skb_shinfo(skb)->gso_size = mss_now;
- skb_shinfo(skb)->gso_type = sk->sk_gso_type;
+ shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
+ shinfo->gso_size = mss_now;
+ shinfo->gso_type = sk->sk_gso_type;
}
}
*/
static void __pskb_trim_head(struct sk_buff *skb, int len)
{
+ struct skb_shared_info *shinfo;
int i, k, eat;
eat = min_t(int, len, skb_headlen(skb));
}
eat = len;
k = 0;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+ shinfo = skb_shinfo(skb);
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ int size = skb_frag_size(&shinfo->frags[i]);
if (size <= eat) {
skb_frag_unref(skb, i);
eat -= size;
} else {
- skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+ shinfo->frags[k] = shinfo->frags[i];
if (eat) {
- skb_shinfo(skb)->frags[k].page_offset += eat;
- skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+ shinfo->frags[k].page_offset += eat;
+ skb_frag_size_sub(&shinfo->frags[k], eat);
eat = 0;
}
k++;
}
}
- skb_shinfo(skb)->nr_frags = k;
+ shinfo->nr_frags = k;
skb_reset_tail_pointer(skb);
skb->data_len -= len;
* - better RTT estimation and ACK scheduling
* - faster recovery
* - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
*/
- limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+ limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
+ sk->sk_pacing_rate >> 10);
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
tcp_retrans_try_collapse(sk, skb, cur_mss);
- /* Some Solaris stacks overoptimize and ignore the FIN on a
- * retransmit when old data is attached. So strip it off
- * since it is cheap to do so and saves bytes on the network.
- */
- if (skb->len > 0 &&
- (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
- tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
- if (!pskb_trim(skb, 0)) {
- /* Reuse, even though it does some unnecessary work */
- tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
- TCP_SKB_CB(skb)->tcp_flags);
- skb->ip_summed = CHECKSUM_NONE;
- }
- }
-
/* Make a copy, if the first transmission SKB clone we made
* is still in somebody's hands, else make a clone.
*/
th->syn = 1;
th->ack = 1;
TCP_ECN_make_synack(req, th);
- th->source = ireq->loc_port;
- th->dest = ireq->rmt_port;
+ th->source = htons(ireq->ir_num);
+ th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
* not even correctly set)
*/
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
- tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
tcp_xmit_probe_skb(sk, 0);
}
}