Merge branch 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus
[linux-drm-fsl-dcu.git] / net / ipv4 / ip_output.c
index a2ede167e045b32340e3a1346b8ccde641cb6b68..bb0bb8f07c54aa5172d5a62bcd38d3a73123aa56 100644 (file)
@@ -22,7 +22,7 @@
  *     Fixes:
  *             Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  *             Mike Kilburn    :       htons() missing in ip_build_xmit.
- *             Bradford Johnson:       Fix faulty handling of some frames when 
+ *             Bradford Johnson:       Fix faulty handling of some frames when
  *                                     no route is found.
  *             Alexander Demenshin:    Missing sk/skb free in ip_queue_xmit
  *                                     (in case if packet not accepted by
@@ -33,9 +33,9 @@
  *                                     some redundant tests.
  *     Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
  *             Andi Kleen      :       Replace ip_reply with ip_send_reply.
- *             Andi Kleen      :       Split fast and slow ip_build_xmit path 
- *                                     for decreased register pressure on x86 
- *                                     and more readibility. 
+ *             Andi Kleen      :       Split fast and slow ip_build_xmit path
+ *                                     for decreased register pressure on x86
+ *                                     and more readibility.
  *             Marc Boucher    :       When call_out_firewall returns FW_QUEUE,
  *                                     silently drop skb instead of failing with -EPERM.
  *             Detlev Wengorz  :       Copy protocol for fragments.
@@ -53,6 +53,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/highmem.h>
 
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -83,7 +84,7 @@
 #include <linux/netlink.h>
 #include <linux/tcp.h>
 
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
 
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
@@ -113,12 +114,12 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
        return ttl;
 }
 
-/* 
+/*
  *             Add an ip header to a skbuff and send it out.
  *
  */
 int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
-                         u32 saddr, u32 daddr, struct ip_options *opt)
+                         __be32 saddr, __be32 daddr, struct ip_options *opt)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct rtable *rt = (struct rtable *)skb->dst;
@@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
-       struct hh_cache *hh = dst->hh;
        struct net_device *dev = dst->dev;
        int hh_len = LL_RESERVED_SPACE(dev);
 
@@ -182,16 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
                skb = skb2;
        }
 
-       if (hh) {
-               int hh_alen;
-
-               read_lock_bh(&hh->hh_lock);
-               hh_alen = HH_DATA_ALIGN(hh->hh_len);
-               memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
-               read_unlock_bh(&hh->hh_lock);
-               skb_push(skb, hh->hh_len);
-               return hh->hh_output(skb);
-       } else if (dst->neighbour)
+       if (dst->hh)
+               return neigh_hh_output(dst->hh, skb);
+       else if (dst->neighbour)
                return dst->neighbour->output(skb);
 
        if (net_ratelimit())
@@ -250,7 +243,7 @@ int ip_mc_output(struct sk_buff *skb)
                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
                        if (newskb)
                                NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
-                                       newskb->dev, 
+                                       newskb->dev,
                                        ip_dev_loopback_xmit);
                }
 
@@ -284,7 +277,7 @@ int ip_output(struct sk_buff *skb)
        skb->protocol = htons(ETH_P_IP);
 
        return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
-                           ip_finish_output,
+                           ip_finish_output,
                            !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
@@ -306,7 +299,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
        /* Make sure we can route this packet. */
        rt = (struct rtable *)__sk_dst_check(sk, 0);
        if (rt == NULL) {
-               u32 daddr;
+               __be32 daddr;
 
                /* Use correct destination address if we have options. */
                daddr = inet->daddr;
@@ -328,6 +321,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
                         * keep trying until route appears or the connection times
                         * itself out.
                         */
+                       security_sk_classify_flow(sk, &fl);
                        if (ip_route_output_flow(&rt, &fl, sk, 0))
                                goto no_route;
                }
@@ -341,7 +335,7 @@ packet_routed:
 
        /* OK, we know where to send it, allocate and build IP header. */
        iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
-       *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+       *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
        iph->tot_len = htons(skb->len);
        if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
                iph->frag_off = htons(IP_DF);
@@ -385,6 +379,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
        dst_release(to->dst);
        to->dst = dst_clone(from->dst);
        to->dev = from->dev;
+       to->mark = from->mark;
 
        /* Copy the flags to each fragment. */
        IPCB(to)->flags = IPCB(from)->flags;
@@ -393,7 +388,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
        to->tc_index = from->tc_index;
 #endif
 #ifdef CONFIG_NETFILTER
-       to->nfmark = from->nfmark;
        /* Connection association is same as pre-frag packet */
        nf_conntrack_put(to->nfct);
        to->nfct = from->nfct;
@@ -425,7 +419,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
        int ptr;
        struct net_device *dev;
        struct sk_buff *skb2;
-       unsigned int mtu, hlen, left, len, ll_rs;
+       unsigned int mtu, hlen, left, len, ll_rs, pad;
        int offset;
        __be16 not_last_frag;
        struct rtable *rt = (struct rtable*)skb->dst;
@@ -555,14 +549,13 @@ slow_path:
        left = skb->len - hlen;         /* Space per frame */
        ptr = raw + hlen;               /* Where to start from */
 
-#ifdef CONFIG_BRIDGE_NETFILTER
        /* for bridged IP traffic encapsulated inside f.e. a vlan header,
-        * we need to make room for the encapsulating header */
-       ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
-       mtu -= nf_bridge_pad(skb);
-#else
-       ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
-#endif
+        * we need to make room for the encapsulating header
+        */
+       pad = nf_bridge_pad(skb);
+       ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+       mtu -= pad;
+
        /*
         *      Fragment the datagram.
         */
@@ -667,7 +660,7 @@ slow_path:
        return err;
 
 fail:
-       kfree_skb(skb); 
+       kfree_skb(skb);
        IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
        return err;
 }
@@ -679,11 +672,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 {
        struct iovec *iov = from;
 
-       if (skb->ip_summed == CHECKSUM_HW) {
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
                if (memcpy_fromiovecend(to, iov, offset, len) < 0)
                        return -EFAULT;
        } else {
-               unsigned int csum = 0;
+               __wsum csum = 0;
                if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
                        return -EFAULT;
                skb->csum = csum_block_add(skb->csum, csum, odd);
@@ -691,11 +684,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
        return 0;
 }
 
-static inline unsigned int
+static inline __wsum
 csum_page(struct page *page, int offset, int copy)
 {
        char *kaddr;
-       unsigned int csum;
+       __wsum csum;
        kaddr = kmap(page);
        csum = csum_partial(kaddr + offset, copy, 0);
        kunmap(page);
@@ -735,7 +728,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
                /* initialize protocol header pointer */
                skb->h.raw = skb->data + fragheaderlen;
 
-               skb->ip_summed = CHECKSUM_HW;
+               skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum = 0;
                sk->sk_sndmsg_off = 0;
        }
@@ -762,7 +755,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
  *     from many pieces of data. Each pieces will be holded on the socket
  *     until ip_push_pending_frames() is called. Each piece can be a page
  *     or non-page data.
- *     
+ *
  *     Not only UDP, other transport protocols - e.g. raw sockets - can use
  *     this interface potentially.
  *
@@ -843,7 +836,7 @@ int ip_append_data(struct sock *sk,
            length + fragheaderlen <= mtu &&
            rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
            !exthdrlen)
-               csummode = CHECKSUM_HW;
+               csummode = CHECKSUM_PARTIAL;
 
        inet->cork.length += length;
        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -895,7 +888,7 @@ alloc_new_skb:
                                datalen = maxfraglen - fragheaderlen;
                        fraglen = datalen + fragheaderlen;
 
-                       if ((flags & MSG_MORE) && 
+                       if ((flags & MSG_MORE) &&
                            !(rt->u.dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
                        else
@@ -910,14 +903,14 @@ alloc_new_skb:
                                alloclen += rt->u.dst.trailer_len;
 
                        if (transhdrlen) {
-                               skb = sock_alloc_send_skb(sk, 
+                               skb = sock_alloc_send_skb(sk,
                                                alloclen + hh_len + 15,
                                                (flags & MSG_DONTWAIT), &err);
                        } else {
                                skb = NULL;
                                if (atomic_read(&sk->sk_wmem_alloc) <=
                                    2 * sk->sk_sndbuf)
-                                       skb = sock_wmalloc(sk, 
+                                       skb = sock_wmalloc(sk,
                                                           alloclen + hh_len + 15, 1,
                                                           sk->sk_allocation);
                                if (unlikely(skb == NULL))
@@ -978,7 +971,7 @@ alloc_new_skb:
                        unsigned int off;
 
                        off = skb->len;
-                       if (getfrag(from, skb_put(skb, copy), 
+                       if (getfrag(from, skb_put(skb, copy),
                                        offset, copy, off, skb) < 0) {
                                __skb_trim(skb, off);
                                err = -EFAULT;
@@ -1000,7 +993,7 @@ alloc_new_skb:
                                                goto error;
                                        }
                                        get_page(page);
-                                       skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+                                       skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
                                        frag = &skb_shinfo(skb)->frags[i];
                                }
                        } else if (i < MAX_SKB_FRAGS) {
@@ -1040,7 +1033,7 @@ alloc_new_skb:
 error:
        inet->cork.length -= length;
        IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
-       return err; 
+       return err;
 }
 
 ssize_t        ip_append_page(struct sock *sk, struct page *page,
@@ -1167,7 +1160,7 @@ ssize_t   ip_append_page(struct sock *sk, struct page *page,
                }
 
                if (skb->ip_summed == CHECKSUM_NONE) {
-                       unsigned int csum;
+                       __wsum csum;
                        csum = csum_page(page, offset, len);
                        skb->csum = csum_block_add(skb->csum, csum, skb->len);
                }
@@ -1264,7 +1257,7 @@ int ip_push_pending_frames(struct sock *sk)
        skb->dst = dst_clone(&rt->u.dst);
 
        /* Netfilter gets whole the not fragmented skb. */
-       err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
+       err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
                      skb->dst->dev, dst_output);
        if (err) {
                if (err > 0)
@@ -1312,21 +1305,21 @@ void ip_flush_pending_frames(struct sock *sk)
 /*
  *     Fetch data from kernel space and fill in checksum if needed.
  */
-static int ip_reply_glue_bits(void *dptr, char *to, int offset, 
+static int ip_reply_glue_bits(void *dptr, char *to, int offset,
                              int len, int odd, struct sk_buff *skb)
 {
-       unsigned int csum;
+       __wsum csum;
 
        csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
        skb->csum = csum_block_add(skb->csum, csum, odd);
-       return 0;  
+       return 0;
 }
 
-/* 
+/*
  *     Generic function to send a packet as reply to another packet.
  *     Used to send TCP resets so far. ICMP should use this function too.
  *
- *     Should run single threaded per socket because it uses the sock 
+ *     Should run single threaded per socket because it uses the sock
  *             structure to pass arguments.
  *
  *     LATER: switch from ip_build_xmit to ip_append_*
@@ -1340,7 +1333,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
                char                    data[40];
        } replyopts;
        struct ipcm_cookie ipc;
-       u32 daddr;
+       __be32 daddr;
        struct rtable *rt = (struct rtable*)skb->dst;
 
        if (ip_options_echo(&replyopts.opt, skb))
@@ -1364,8 +1357,9 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
                                    /* Not quite clean, but right. */
                                    .uli_u = { .ports =
                                               { .sport = skb->h.th->dest,
-                                                .dport = skb->h.th->source } },
+                                                .dport = skb->h.th->source } },
                                    .proto = sk->sk_protocol };
+               security_skb_classify_flow(skb, &fl);
                if (ip_route_output_key(&rt, &fl))
                        return;
        }
@@ -1384,7 +1378,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
                       &ipc, rt, MSG_DONTWAIT);
        if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
                if (arg->csumoffset >= 0)
-                       *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+                       *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
                skb->ip_summed = CHECKSUM_NONE;
                ip_push_pending_frames(sk);
        }