Merge remote-tracking branches 'asoc/fix/tlv320aic3x' and 'asoc/fix/wm8962' into...
[linux-drm-fsl-dcu.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         struct neighbour *neigh;
64         struct in6_addr *nexthop;
65         int ret;
66
67         skb->protocol = htons(ETH_P_IPV6);
68         skb->dev = dev;
69
70         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74                     ((mroute6_socket(dev_net(dev), skb) &&
75                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77                                          &ipv6_hdr(skb)->saddr))) {
78                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80                         /* Do not check for IFF_ALLMULTI; multicast routing
81                            is not supported in any case.
82                          */
83                         if (newskb)
84                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85                                         sk, newskb, NULL, newskb->dev,
86                                         dev_loopback_xmit);
87
88                         if (ipv6_hdr(skb)->hop_limit == 0) {
89                                 IP6_INC_STATS(dev_net(dev), idev,
90                                               IPSTATS_MIB_OUTDISCARDS);
91                                 kfree_skb(skb);
92                                 return 0;
93                         }
94                 }
95
96                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97                                 skb->len);
98
99                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100                     IPV6_ADDR_SCOPE_NODELOCAL &&
101                     !(dev->flags & IFF_LOOPBACK)) {
102                         kfree_skb(skb);
103                         return 0;
104                 }
105         }
106
107         rcu_read_lock_bh();
108         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110         if (unlikely(!neigh))
111                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112         if (!IS_ERR(neigh)) {
113                 ret = dst_neigh_output(dst, neigh, skb);
114                 rcu_read_unlock_bh();
115                 return ret;
116         }
117         rcu_read_unlock_bh();
118
119         IP6_INC_STATS(dev_net(dst->dev),
120                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121         kfree_skb(skb);
122         return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
126 {
127         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128             dst_allfrag(skb_dst(skb)) ||
129             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130                 return ip6_fragment(sk, skb, ip6_finish_output2);
131         else
132                 return ip6_finish_output2(sk, skb);
133 }
134
135 int ip6_output(struct sock *sk, struct sk_buff *skb)
136 {
137         struct net_device *dev = skb_dst(skb)->dev;
138         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139         if (unlikely(idev->cnf.disable_ipv6)) {
140                 IP6_INC_STATS(dev_net(dev), idev,
141                               IPSTATS_MIB_OUTDISCARDS);
142                 kfree_skb(skb);
143                 return 0;
144         }
145
146         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
147                             NULL, dev,
148                             ip6_finish_output,
149                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
150 }
151
152 /*
153  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
154  */
155
156 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
157              struct ipv6_txoptions *opt, int tclass)
158 {
159         struct net *net = sock_net(sk);
160         struct ipv6_pinfo *np = inet6_sk(sk);
161         struct in6_addr *first_hop = &fl6->daddr;
162         struct dst_entry *dst = skb_dst(skb);
163         struct ipv6hdr *hdr;
164         u8  proto = fl6->flowi6_proto;
165         int seg_len = skb->len;
166         int hlimit = -1;
167         u32 mtu;
168
169         if (opt) {
170                 unsigned int head_room;
171
172                 /* First: exthdrs may take lots of space (~8K for now)
173                    MAX_HEADER is not enough.
174                  */
175                 head_room = opt->opt_nflen + opt->opt_flen;
176                 seg_len += head_room;
177                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
178
179                 if (skb_headroom(skb) < head_room) {
180                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
181                         if (!skb2) {
182                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
183                                               IPSTATS_MIB_OUTDISCARDS);
184                                 kfree_skb(skb);
185                                 return -ENOBUFS;
186                         }
187                         consume_skb(skb);
188                         skb = skb2;
189                         skb_set_owner_w(skb, sk);
190                 }
191                 if (opt->opt_flen)
192                         ipv6_push_frag_opts(skb, opt, &proto);
193                 if (opt->opt_nflen)
194                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
195         }
196
197         skb_push(skb, sizeof(struct ipv6hdr));
198         skb_reset_network_header(skb);
199         hdr = ipv6_hdr(skb);
200
201         /*
202          *      Fill in the IPv6 header
203          */
204         if (np)
205                 hlimit = np->hop_limit;
206         if (hlimit < 0)
207                 hlimit = ip6_dst_hoplimit(dst);
208
209         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
210                                                      np->autoflowlabel, fl6));
211
212         hdr->payload_len = htons(seg_len);
213         hdr->nexthdr = proto;
214         hdr->hop_limit = hlimit;
215
216         hdr->saddr = fl6->saddr;
217         hdr->daddr = *first_hop;
218
219         skb->protocol = htons(ETH_P_IPV6);
220         skb->priority = sk->sk_priority;
221         skb->mark = sk->sk_mark;
222
223         mtu = dst_mtu(dst);
224         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
225                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
226                               IPSTATS_MIB_OUT, skb->len);
227                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
228                                NULL, dst->dev, dst_output_sk);
229         }
230
231         skb->dev = dst->dev;
232         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
233         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
234         kfree_skb(skb);
235         return -EMSGSIZE;
236 }
237 EXPORT_SYMBOL(ip6_xmit);
238
239 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
240 {
241         struct ip6_ra_chain *ra;
242         struct sock *last = NULL;
243
244         read_lock(&ip6_ra_lock);
245         for (ra = ip6_ra_chain; ra; ra = ra->next) {
246                 struct sock *sk = ra->sk;
247                 if (sk && ra->sel == sel &&
248                     (!sk->sk_bound_dev_if ||
249                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
250                         if (last) {
251                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
252                                 if (skb2)
253                                         rawv6_rcv(last, skb2);
254                         }
255                         last = sk;
256                 }
257         }
258
259         if (last) {
260                 rawv6_rcv(last, skb);
261                 read_unlock(&ip6_ra_lock);
262                 return 1;
263         }
264         read_unlock(&ip6_ra_lock);
265         return 0;
266 }
267
268 static int ip6_forward_proxy_check(struct sk_buff *skb)
269 {
270         struct ipv6hdr *hdr = ipv6_hdr(skb);
271         u8 nexthdr = hdr->nexthdr;
272         __be16 frag_off;
273         int offset;
274
275         if (ipv6_ext_hdr(nexthdr)) {
276                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
277                 if (offset < 0)
278                         return 0;
279         } else
280                 offset = sizeof(struct ipv6hdr);
281
282         if (nexthdr == IPPROTO_ICMPV6) {
283                 struct icmp6hdr *icmp6;
284
285                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
286                                          offset + 1 - skb->data)))
287                         return 0;
288
289                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
290
291                 switch (icmp6->icmp6_type) {
292                 case NDISC_ROUTER_SOLICITATION:
293                 case NDISC_ROUTER_ADVERTISEMENT:
294                 case NDISC_NEIGHBOUR_SOLICITATION:
295                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
296                 case NDISC_REDIRECT:
297                         /* For reaction involving unicast neighbor discovery
298                          * message destined to the proxied address, pass it to
299                          * input function.
300                          */
301                         return 1;
302                 default:
303                         break;
304                 }
305         }
306
307         /*
308          * The proxying router can't forward traffic sent to a link-local
309          * address, so signal the sender and discard the packet. This
310          * behavior is clarified by the MIPv6 specification.
311          */
312         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
313                 dst_link_failure(skb);
314                 return -1;
315         }
316
317         return 0;
318 }
319
320 static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
321 {
322         skb_sender_cpu_clear(skb);
323         return dst_output_sk(sk, skb);
324 }
325
326 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
327 {
328         unsigned int mtu;
329         struct inet6_dev *idev;
330
331         if (dst_metric_locked(dst, RTAX_MTU)) {
332                 mtu = dst_metric_raw(dst, RTAX_MTU);
333                 if (mtu)
334                         return mtu;
335         }
336
337         mtu = IPV6_MIN_MTU;
338         rcu_read_lock();
339         idev = __in6_dev_get(dst->dev);
340         if (idev)
341                 mtu = idev->cnf.mtu6;
342         rcu_read_unlock();
343
344         return mtu;
345 }
346
347 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
348 {
349         if (skb->len <= mtu)
350                 return false;
351
352         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
353         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
354                 return true;
355
356         if (skb->ignore_df)
357                 return false;
358
359         if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
360                 return false;
361
362         return true;
363 }
364
365 int ip6_forward(struct sk_buff *skb)
366 {
367         struct dst_entry *dst = skb_dst(skb);
368         struct ipv6hdr *hdr = ipv6_hdr(skb);
369         struct inet6_skb_parm *opt = IP6CB(skb);
370         struct net *net = dev_net(dst->dev);
371         u32 mtu;
372
373         if (net->ipv6.devconf_all->forwarding == 0)
374                 goto error;
375
376         if (skb->pkt_type != PACKET_HOST)
377                 goto drop;
378
379         if (skb_warn_if_lro(skb))
380                 goto drop;
381
382         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
383                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
384                                  IPSTATS_MIB_INDISCARDS);
385                 goto drop;
386         }
387
388         skb_forward_csum(skb);
389
390         /*
391          *      We DO NOT make any processing on
392          *      RA packets, pushing them to user level AS IS
393          *      without ane WARRANTY that application will be able
394          *      to interpret them. The reason is that we
395          *      cannot make anything clever here.
396          *
397          *      We are not end-node, so that if packet contains
398          *      AH/ESP, we cannot make anything.
399          *      Defragmentation also would be mistake, RA packets
400          *      cannot be fragmented, because there is no warranty
401          *      that different fragments will go along one path. --ANK
402          */
403         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
404                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
405                         return 0;
406         }
407
408         /*
409          *      check and decrement ttl
410          */
411         if (hdr->hop_limit <= 1) {
412                 /* Force OUTPUT device used as source address */
413                 skb->dev = dst->dev;
414                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
415                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
416                                  IPSTATS_MIB_INHDRERRORS);
417
418                 kfree_skb(skb);
419                 return -ETIMEDOUT;
420         }
421
422         /* XXX: idev->cnf.proxy_ndp? */
423         if (net->ipv6.devconf_all->proxy_ndp &&
424             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
425                 int proxied = ip6_forward_proxy_check(skb);
426                 if (proxied > 0)
427                         return ip6_input(skb);
428                 else if (proxied < 0) {
429                         IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
430                                          IPSTATS_MIB_INDISCARDS);
431                         goto drop;
432                 }
433         }
434
435         if (!xfrm6_route_forward(skb)) {
436                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
437                                  IPSTATS_MIB_INDISCARDS);
438                 goto drop;
439         }
440         dst = skb_dst(skb);
441
442         /* IPv6 specs say nothing about it, but it is clear that we cannot
443            send redirects to source routed frames.
444            We don't send redirects to frames decapsulated from IPsec.
445          */
446         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
447                 struct in6_addr *target = NULL;
448                 struct inet_peer *peer;
449                 struct rt6_info *rt;
450
451                 /*
452                  *      incoming and outgoing devices are the same
453                  *      send a redirect.
454                  */
455
456                 rt = (struct rt6_info *) dst;
457                 if (rt->rt6i_flags & RTF_GATEWAY)
458                         target = &rt->rt6i_gateway;
459                 else
460                         target = &hdr->daddr;
461
462                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
463
464                 /* Limit redirects both by destination (here)
465                    and by source (inside ndisc_send_redirect)
466                  */
467                 if (inet_peer_xrlim_allow(peer, 1*HZ))
468                         ndisc_send_redirect(skb, target);
469                 if (peer)
470                         inet_putpeer(peer);
471         } else {
472                 int addrtype = ipv6_addr_type(&hdr->saddr);
473
474                 /* This check is security critical. */
475                 if (addrtype == IPV6_ADDR_ANY ||
476                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
477                         goto error;
478                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
479                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
480                                     ICMPV6_NOT_NEIGHBOUR, 0);
481                         goto error;
482                 }
483         }
484
485         mtu = ip6_dst_mtu_forward(dst);
486         if (mtu < IPV6_MIN_MTU)
487                 mtu = IPV6_MIN_MTU;
488
489         if (ip6_pkt_too_big(skb, mtu)) {
490                 /* Again, force OUTPUT device used as source address */
491                 skb->dev = dst->dev;
492                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
493                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
494                                  IPSTATS_MIB_INTOOBIGERRORS);
495                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
496                                  IPSTATS_MIB_FRAGFAILS);
497                 kfree_skb(skb);
498                 return -EMSGSIZE;
499         }
500
501         if (skb_cow(skb, dst->dev->hard_header_len)) {
502                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
503                                  IPSTATS_MIB_OUTDISCARDS);
504                 goto drop;
505         }
506
507         hdr = ipv6_hdr(skb);
508
509         /* Mangling hops number delayed to point after skb COW */
510
511         hdr->hop_limit--;
512
513         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
514         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
515         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
516                        skb->dev, dst->dev,
517                        ip6_forward_finish);
518
519 error:
520         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
521 drop:
522         kfree_skb(skb);
523         return -EINVAL;
524 }
525
526 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
527 {
528         to->pkt_type = from->pkt_type;
529         to->priority = from->priority;
530         to->protocol = from->protocol;
531         skb_dst_drop(to);
532         skb_dst_set(to, dst_clone(skb_dst(from)));
533         to->dev = from->dev;
534         to->mark = from->mark;
535
536 #ifdef CONFIG_NET_SCHED
537         to->tc_index = from->tc_index;
538 #endif
539         nf_copy(to, from);
540         skb_copy_secmark(to, from);
541 }
542
543 int ip6_fragment(struct sock *sk, struct sk_buff *skb,
544                  int (*output)(struct sock *, struct sk_buff *))
545 {
546         struct sk_buff *frag;
547         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
548         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
549                                 inet6_sk(skb->sk) : NULL;
550         struct ipv6hdr *tmp_hdr;
551         struct frag_hdr *fh;
552         unsigned int mtu, hlen, left, len;
553         int hroom, troom;
554         __be32 frag_id;
555         int ptr, offset = 0, err = 0;
556         u8 *prevhdr, nexthdr = 0;
557         struct net *net = dev_net(skb_dst(skb)->dev);
558
559         hlen = ip6_find_1stfragopt(skb, &prevhdr);
560         nexthdr = *prevhdr;
561
562         mtu = ip6_skb_dst_mtu(skb);
563
564         /* We must not fragment if the socket is set to force MTU discovery
565          * or if the skb it not generated by a local socket.
566          */
567         if (unlikely(!skb->ignore_df && skb->len > mtu))
568                 goto fail_toobig;
569
570         if (IP6CB(skb)->frag_max_size) {
571                 if (IP6CB(skb)->frag_max_size > mtu)
572                         goto fail_toobig;
573
574                 /* don't send fragments larger than what we received */
575                 mtu = IP6CB(skb)->frag_max_size;
576                 if (mtu < IPV6_MIN_MTU)
577                         mtu = IPV6_MIN_MTU;
578         }
579
580         if (np && np->frag_size < mtu) {
581                 if (np->frag_size)
582                         mtu = np->frag_size;
583         }
584         mtu -= hlen + sizeof(struct frag_hdr);
585
586         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
587                                     &ipv6_hdr(skb)->saddr);
588
589         hroom = LL_RESERVED_SPACE(rt->dst.dev);
590         if (skb_has_frag_list(skb)) {
591                 int first_len = skb_pagelen(skb);
592                 struct sk_buff *frag2;
593
594                 if (first_len - hlen > mtu ||
595                     ((first_len - hlen) & 7) ||
596                     skb_cloned(skb) ||
597                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
598                         goto slow_path;
599
600                 skb_walk_frags(skb, frag) {
601                         /* Correct geometry. */
602                         if (frag->len > mtu ||
603                             ((frag->len & 7) && frag->next) ||
604                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
605                                 goto slow_path_clean;
606
607                         /* Partially cloned skb? */
608                         if (skb_shared(frag))
609                                 goto slow_path_clean;
610
611                         BUG_ON(frag->sk);
612                         if (skb->sk) {
613                                 frag->sk = skb->sk;
614                                 frag->destructor = sock_wfree;
615                         }
616                         skb->truesize -= frag->truesize;
617                 }
618
619                 err = 0;
620                 offset = 0;
621                 /* BUILD HEADER */
622
623                 *prevhdr = NEXTHDR_FRAGMENT;
624                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
625                 if (!tmp_hdr) {
626                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
627                                       IPSTATS_MIB_FRAGFAILS);
628                         err = -ENOMEM;
629                         goto fail;
630                 }
631                 frag = skb_shinfo(skb)->frag_list;
632                 skb_frag_list_init(skb);
633
634                 __skb_pull(skb, hlen);
635                 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
636                 __skb_push(skb, hlen);
637                 skb_reset_network_header(skb);
638                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
639
640                 fh->nexthdr = nexthdr;
641                 fh->reserved = 0;
642                 fh->frag_off = htons(IP6_MF);
643                 fh->identification = frag_id;
644
645                 first_len = skb_pagelen(skb);
646                 skb->data_len = first_len - skb_headlen(skb);
647                 skb->len = first_len;
648                 ipv6_hdr(skb)->payload_len = htons(first_len -
649                                                    sizeof(struct ipv6hdr));
650
651                 dst_hold(&rt->dst);
652
653                 for (;;) {
654                         /* Prepare header of the next frame,
655                          * before previous one went down. */
656                         if (frag) {
657                                 frag->ip_summed = CHECKSUM_NONE;
658                                 skb_reset_transport_header(frag);
659                                 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
660                                 __skb_push(frag, hlen);
661                                 skb_reset_network_header(frag);
662                                 memcpy(skb_network_header(frag), tmp_hdr,
663                                        hlen);
664                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
665                                 fh->nexthdr = nexthdr;
666                                 fh->reserved = 0;
667                                 fh->frag_off = htons(offset);
668                                 if (frag->next)
669                                         fh->frag_off |= htons(IP6_MF);
670                                 fh->identification = frag_id;
671                                 ipv6_hdr(frag)->payload_len =
672                                                 htons(frag->len -
673                                                       sizeof(struct ipv6hdr));
674                                 ip6_copy_metadata(frag, skb);
675                         }
676
677                         err = output(sk, skb);
678                         if (!err)
679                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
680                                               IPSTATS_MIB_FRAGCREATES);
681
682                         if (err || !frag)
683                                 break;
684
685                         skb = frag;
686                         frag = skb->next;
687                         skb->next = NULL;
688                 }
689
690                 kfree(tmp_hdr);
691
692                 if (err == 0) {
693                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
694                                       IPSTATS_MIB_FRAGOKS);
695                         ip6_rt_put(rt);
696                         return 0;
697                 }
698
699                 kfree_skb_list(frag);
700
701                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
702                               IPSTATS_MIB_FRAGFAILS);
703                 ip6_rt_put(rt);
704                 return err;
705
706 slow_path_clean:
707                 skb_walk_frags(skb, frag2) {
708                         if (frag2 == frag)
709                                 break;
710                         frag2->sk = NULL;
711                         frag2->destructor = NULL;
712                         skb->truesize += frag2->truesize;
713                 }
714         }
715
716 slow_path:
717         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
718             skb_checksum_help(skb))
719                 goto fail;
720
721         left = skb->len - hlen;         /* Space per frame */
722         ptr = hlen;                     /* Where to start from */
723
724         /*
725          *      Fragment the datagram.
726          */
727
728         *prevhdr = NEXTHDR_FRAGMENT;
729         troom = rt->dst.dev->needed_tailroom;
730
731         /*
732          *      Keep copying data until we run out.
733          */
734         while (left > 0)        {
735                 len = left;
736                 /* IF: it doesn't fit, use 'mtu' - the data space left */
737                 if (len > mtu)
738                         len = mtu;
739                 /* IF: we are not sending up to and including the packet end
740                    then align the next start on an eight byte boundary */
741                 if (len < left) {
742                         len &= ~7;
743                 }
744
745                 /* Allocate buffer */
746                 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
747                                  hroom + troom, GFP_ATOMIC);
748                 if (!frag) {
749                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
750                                       IPSTATS_MIB_FRAGFAILS);
751                         err = -ENOMEM;
752                         goto fail;
753                 }
754
755                 /*
756                  *      Set up data on packet
757                  */
758
759                 ip6_copy_metadata(frag, skb);
760                 skb_reserve(frag, hroom);
761                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
762                 skb_reset_network_header(frag);
763                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
764                 frag->transport_header = (frag->network_header + hlen +
765                                           sizeof(struct frag_hdr));
766
767                 /*
768                  *      Charge the memory for the fragment to any owner
769                  *      it might possess
770                  */
771                 if (skb->sk)
772                         skb_set_owner_w(frag, skb->sk);
773
774                 /*
775                  *      Copy the packet header into the new buffer.
776                  */
777                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
778
779                 /*
780                  *      Build fragment header.
781                  */
782                 fh->nexthdr = nexthdr;
783                 fh->reserved = 0;
784                 fh->identification = frag_id;
785
786                 /*
787                  *      Copy a block of the IP datagram.
788                  */
789                 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
790                                      len));
791                 left -= len;
792
793                 fh->frag_off = htons(offset);
794                 if (left > 0)
795                         fh->frag_off |= htons(IP6_MF);
796                 ipv6_hdr(frag)->payload_len = htons(frag->len -
797                                                     sizeof(struct ipv6hdr));
798
799                 ptr += len;
800                 offset += len;
801
802                 /*
803                  *      Put this fragment into the sending queue.
804                  */
805                 err = output(sk, frag);
806                 if (err)
807                         goto fail;
808
809                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
810                               IPSTATS_MIB_FRAGCREATES);
811         }
812         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
813                       IPSTATS_MIB_FRAGOKS);
814         consume_skb(skb);
815         return err;
816
817 fail_toobig:
818         if (skb->sk && dst_allfrag(skb_dst(skb)))
819                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
820
821         skb->dev = skb_dst(skb)->dev;
822         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
823         err = -EMSGSIZE;
824
825 fail:
826         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
827                       IPSTATS_MIB_FRAGFAILS);
828         kfree_skb(skb);
829         return err;
830 }
831
832 static inline int ip6_rt_check(const struct rt6key *rt_key,
833                                const struct in6_addr *fl_addr,
834                                const struct in6_addr *addr_cache)
835 {
836         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
837                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
838 }
839
840 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
841                                           struct dst_entry *dst,
842                                           const struct flowi6 *fl6)
843 {
844         struct ipv6_pinfo *np = inet6_sk(sk);
845         struct rt6_info *rt;
846
847         if (!dst)
848                 goto out;
849
850         if (dst->ops->family != AF_INET6) {
851                 dst_release(dst);
852                 return NULL;
853         }
854
855         rt = (struct rt6_info *)dst;
856         /* Yes, checking route validity in not connected
857          * case is not very simple. Take into account,
858          * that we do not support routing by source, TOS,
859          * and MSG_DONTROUTE            --ANK (980726)
860          *
861          * 1. ip6_rt_check(): If route was host route,
862          *    check that cached destination is current.
863          *    If it is network route, we still may
864          *    check its validity using saved pointer
865          *    to the last used address: daddr_cache.
866          *    We do not want to save whole address now,
867          *    (because main consumer of this service
868          *    is tcp, which has not this problem),
869          *    so that the last trick works only on connected
870          *    sockets.
871          * 2. oif also should be the same.
872          */
873         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
874 #ifdef CONFIG_IPV6_SUBTREES
875             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
876 #endif
877             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
878                 dst_release(dst);
879                 dst = NULL;
880         }
881
882 out:
883         return dst;
884 }
885
886 static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
887                                struct dst_entry **dst, struct flowi6 *fl6)
888 {
889 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
890         struct neighbour *n;
891         struct rt6_info *rt;
892 #endif
893         int err;
894
895         /* The correct way to handle this would be to do
896          * ip6_route_get_saddr, and then ip6_route_output; however,
897          * the route-specific preferred source forces the
898          * ip6_route_output call _before_ ip6_route_get_saddr.
899          *
900          * In source specific routing (no src=any default route),
901          * ip6_route_output will fail given src=any saddr, though, so
902          * that's why we try it again later.
903          */
904         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
905                 struct rt6_info *rt;
906                 bool had_dst = *dst != NULL;
907
908                 if (!had_dst)
909                         *dst = ip6_route_output(net, sk, fl6);
910                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
911                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
912                                           sk ? inet6_sk(sk)->srcprefs : 0,
913                                           &fl6->saddr);
914                 if (err)
915                         goto out_err_release;
916
917                 /* If we had an erroneous initial result, pretend it
918                  * never existed and let the SA-enabled version take
919                  * over.
920                  */
921                 if (!had_dst && (*dst)->error) {
922                         dst_release(*dst);
923                         *dst = NULL;
924                 }
925         }
926
927         if (!*dst)
928                 *dst = ip6_route_output(net, sk, fl6);
929
930         err = (*dst)->error;
931         if (err)
932                 goto out_err_release;
933
934 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
935         /*
936          * Here if the dst entry we've looked up
937          * has a neighbour entry that is in the INCOMPLETE
938          * state and the src address from the flow is
939          * marked as OPTIMISTIC, we release the found
940          * dst entry and replace it instead with the
941          * dst entry of the nexthop router
942          */
943         rt = (struct rt6_info *) *dst;
944         rcu_read_lock_bh();
945         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
946                                       rt6_nexthop(rt, &fl6->daddr));
947         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
948         rcu_read_unlock_bh();
949
950         if (err) {
951                 struct inet6_ifaddr *ifp;
952                 struct flowi6 fl_gw6;
953                 int redirect;
954
955                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
956                                       (*dst)->dev, 1);
957
958                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959                 if (ifp)
960                         in6_ifa_put(ifp);
961
962                 if (redirect) {
963                         /*
964                          * We need to get the dst entry for the
965                          * default router instead
966                          */
967                         dst_release(*dst);
968                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
969                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
970                         *dst = ip6_route_output(net, sk, &fl_gw6);
971                         err = (*dst)->error;
972                         if (err)
973                                 goto out_err_release;
974                 }
975         }
976 #endif
977
978         return 0;
979
980 out_err_release:
981         if (err == -ENETUNREACH)
982                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
983         dst_release(*dst);
984         *dst = NULL;
985         return err;
986 }
987
988 /**
989  *      ip6_dst_lookup - perform route lookup on flow
990  *      @sk: socket which provides route info
991  *      @dst: pointer to dst_entry * for result
992  *      @fl6: flow to lookup
993  *
994  *      This function performs a route lookup on the given flow.
995  *
996  *      It returns zero on success, or a standard errno code on error.
997  */
998 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
999                    struct flowi6 *fl6)
1000 {
1001         *dst = NULL;
1002         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1003 }
1004 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1005
1006 /**
1007  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1008  *      @sk: socket which provides route info
1009  *      @fl6: flow to lookup
1010  *      @final_dst: final destination address for ipsec lookup
1011  *
1012  *      This function performs a route lookup on the given flow.
1013  *
1014  *      It returns a valid dst pointer on success, or a pointer encoded
1015  *      error code.
1016  */
1017 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1018                                       const struct in6_addr *final_dst)
1019 {
1020         struct dst_entry *dst = NULL;
1021         int err;
1022
1023         err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1024         if (err)
1025                 return ERR_PTR(err);
1026         if (final_dst)
1027                 fl6->daddr = *final_dst;
1028         if (!fl6->flowi6_oif)
1029                 fl6->flowi6_oif = dst->dev->ifindex;
1030
1031         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1032 }
1033 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1034
1035 /**
1036  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1037  *      @sk: socket which provides the dst cache and route info
1038  *      @fl6: flow to lookup
1039  *      @final_dst: final destination address for ipsec lookup
1040  *
1041  *      This function performs a route lookup on the given flow with the
1042  *      possibility of using the cached route in the socket if it is valid.
1043  *      It will take the socket dst lock when operating on the dst cache.
1044  *      As a result, this function can only be used in process context.
1045  *
1046  *      It returns a valid dst pointer on success, or a pointer encoded
1047  *      error code.
1048  */
1049 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1050                                          const struct in6_addr *final_dst)
1051 {
1052         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1053         int err;
1054
1055         dst = ip6_sk_dst_check(sk, dst, fl6);
1056
1057         err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1058         if (err)
1059                 return ERR_PTR(err);
1060         if (final_dst)
1061                 fl6->daddr = *final_dst;
1062
1063         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1064 }
1065 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1066
1067 static inline int ip6_ufo_append_data(struct sock *sk,
1068                         struct sk_buff_head *queue,
1069                         int getfrag(void *from, char *to, int offset, int len,
1070                         int odd, struct sk_buff *skb),
1071                         void *from, int length, int hh_len, int fragheaderlen,
1072                         int transhdrlen, int mtu, unsigned int flags,
1073                         const struct flowi6 *fl6)
1074
1075 {
1076         struct sk_buff *skb;
1077         int err;
1078
1079         /* There is support for UDP large send offload by network
1080          * device, so create one single skb packet containing complete
1081          * udp datagram
1082          */
1083         skb = skb_peek_tail(queue);
1084         if (!skb) {
1085                 skb = sock_alloc_send_skb(sk,
1086                         hh_len + fragheaderlen + transhdrlen + 20,
1087                         (flags & MSG_DONTWAIT), &err);
1088                 if (!skb)
1089                         return err;
1090
1091                 /* reserve space for Hardware header */
1092                 skb_reserve(skb, hh_len);
1093
1094                 /* create space for UDP/IP header */
1095                 skb_put(skb, fragheaderlen + transhdrlen);
1096
1097                 /* initialize network header pointer */
1098                 skb_reset_network_header(skb);
1099
1100                 /* initialize protocol header pointer */
1101                 skb->transport_header = skb->network_header + fragheaderlen;
1102
1103                 skb->protocol = htons(ETH_P_IPV6);
1104                 skb->csum = 0;
1105
1106                 __skb_queue_tail(queue, skb);
1107         } else if (skb_is_gso(skb)) {
1108                 goto append;
1109         }
1110
1111         skb->ip_summed = CHECKSUM_PARTIAL;
1112         /* Specify the length of each IPv6 datagram fragment.
1113          * It has to be a multiple of 8.
1114          */
1115         skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1116                                      sizeof(struct frag_hdr)) & ~7;
1117         skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1118         skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1119                                                          &fl6->daddr,
1120                                                          &fl6->saddr);
1121
1122 append:
1123         return skb_append_datato_frags(sk, skb, getfrag, from,
1124                                        (length - transhdrlen));
1125 }
1126
1127 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1128                                                gfp_t gfp)
1129 {
1130         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1131 }
1132
1133 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1134                                                 gfp_t gfp)
1135 {
1136         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1137 }
1138
1139 static void ip6_append_data_mtu(unsigned int *mtu,
1140                                 int *maxfraglen,
1141                                 unsigned int fragheaderlen,
1142                                 struct sk_buff *skb,
1143                                 struct rt6_info *rt,
1144                                 unsigned int orig_mtu)
1145 {
1146         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1147                 if (!skb) {
1148                         /* first fragment, reserve header_len */
1149                         *mtu = orig_mtu - rt->dst.header_len;
1150
1151                 } else {
1152                         /*
1153                          * this fragment is not first, the headers
1154                          * space is regarded as data space.
1155                          */
1156                         *mtu = orig_mtu;
1157                 }
1158                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1159                               + fragheaderlen - sizeof(struct frag_hdr);
1160         }
1161 }
1162
1163 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1164                           struct inet6_cork *v6_cork,
1165                           int hlimit, int tclass, struct ipv6_txoptions *opt,
1166                           struct rt6_info *rt, struct flowi6 *fl6)
1167 {
1168         struct ipv6_pinfo *np = inet6_sk(sk);
1169         unsigned int mtu;
1170
1171         /*
1172          * setup for corking
1173          */
1174         if (opt) {
1175                 if (WARN_ON(v6_cork->opt))
1176                         return -EINVAL;
1177
1178                 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1179                 if (unlikely(!v6_cork->opt))
1180                         return -ENOBUFS;
1181
1182                 v6_cork->opt->tot_len = opt->tot_len;
1183                 v6_cork->opt->opt_flen = opt->opt_flen;
1184                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1185
1186                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1187                                                     sk->sk_allocation);
1188                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1189                         return -ENOBUFS;
1190
1191                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1192                                                     sk->sk_allocation);
1193                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1194                         return -ENOBUFS;
1195
1196                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1197                                                    sk->sk_allocation);
1198                 if (opt->hopopt && !v6_cork->opt->hopopt)
1199                         return -ENOBUFS;
1200
1201                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1202                                                     sk->sk_allocation);
1203                 if (opt->srcrt && !v6_cork->opt->srcrt)
1204                         return -ENOBUFS;
1205
1206                 /* need source address above miyazawa*/
1207         }
1208         dst_hold(&rt->dst);
1209         cork->base.dst = &rt->dst;
1210         cork->fl.u.ip6 = *fl6;
1211         v6_cork->hop_limit = hlimit;
1212         v6_cork->tclass = tclass;
1213         if (rt->dst.flags & DST_XFRM_TUNNEL)
1214                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1215                       rt->dst.dev->mtu : dst_mtu(&rt->dst);
1216         else
1217                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1218                       rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1219         if (np->frag_size < mtu) {
1220                 if (np->frag_size)
1221                         mtu = np->frag_size;
1222         }
1223         cork->base.fragsize = mtu;
1224         if (dst_allfrag(rt->dst.path))
1225                 cork->base.flags |= IPCORK_ALLFRAG;
1226         cork->base.length = 0;
1227
1228         return 0;
1229 }
1230
1231 static int __ip6_append_data(struct sock *sk,
1232                              struct flowi6 *fl6,
1233                              struct sk_buff_head *queue,
1234                              struct inet_cork *cork,
1235                              struct inet6_cork *v6_cork,
1236                              struct page_frag *pfrag,
1237                              int getfrag(void *from, char *to, int offset,
1238                                          int len, int odd, struct sk_buff *skb),
1239                              void *from, int length, int transhdrlen,
1240                              unsigned int flags, int dontfrag)
1241 {
1242         struct sk_buff *skb, *skb_prev = NULL;
1243         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1244         int exthdrlen = 0;
1245         int dst_exthdrlen = 0;
1246         int hh_len;
1247         int copy;
1248         int err;
1249         int offset = 0;
1250         __u8 tx_flags = 0;
1251         u32 tskey = 0;
1252         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1253         struct ipv6_txoptions *opt = v6_cork->opt;
1254         int csummode = CHECKSUM_NONE;
1255
1256         skb = skb_peek_tail(queue);
1257         if (!skb) {
1258                 exthdrlen = opt ? opt->opt_flen : 0;
1259                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1260         }
1261
1262         mtu = cork->fragsize;
1263         orig_mtu = mtu;
1264
1265         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1266
1267         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1268                         (opt ? opt->opt_nflen : 0);
1269         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1270                      sizeof(struct frag_hdr);
1271
1272         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1273                 unsigned int maxnonfragsize, headersize;
1274
1275                 headersize = sizeof(struct ipv6hdr) +
1276                              (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1277                              (dst_allfrag(&rt->dst) ?
1278                               sizeof(struct frag_hdr) : 0) +
1279                              rt->rt6i_nfheader_len;
1280
1281                 if (ip6_sk_ignore_df(sk))
1282                         maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1283                 else
1284                         maxnonfragsize = mtu;
1285
1286                 /* dontfrag active */
1287                 if ((cork->length + length > mtu - headersize) && dontfrag &&
1288                     (sk->sk_protocol == IPPROTO_UDP ||
1289                      sk->sk_protocol == IPPROTO_RAW)) {
1290                         ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1291                                                    sizeof(struct ipv6hdr));
1292                         goto emsgsize;
1293                 }
1294
1295                 if (cork->length + length > maxnonfragsize - headersize) {
1296 emsgsize:
1297                         ipv6_local_error(sk, EMSGSIZE, fl6,
1298                                          mtu - headersize +
1299                                          sizeof(struct ipv6hdr));
1300                         return -EMSGSIZE;
1301                 }
1302         }
1303
1304         if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1305                 sock_tx_timestamp(sk, &tx_flags);
1306                 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1307                     sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1308                         tskey = sk->sk_tskey++;
1309         }
1310
1311         /* If this is the first and only packet and device
1312          * supports checksum offloading, let's use it.
1313          * Use transhdrlen, same as IPv4, because partial
1314          * sums only work when transhdrlen is set.
1315          */
1316         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1317             length + fragheaderlen < mtu &&
1318             rt->dst.dev->features & NETIF_F_V6_CSUM &&
1319             !exthdrlen)
1320                 csummode = CHECKSUM_PARTIAL;
1321         /*
1322          * Let's try using as much space as possible.
1323          * Use MTU if total length of the message fits into the MTU.
1324          * Otherwise, we need to reserve fragment header and
1325          * fragment alignment (= 8-15 octects, in total).
1326          *
1327          * Note that we may need to "move" the data from the tail of
1328          * of the buffer to the new fragment when we split
1329          * the message.
1330          *
1331          * FIXME: It may be fragmented into multiple chunks
1332          *        at once if non-fragmentable extension headers
1333          *        are too large.
1334          * --yoshfuji
1335          */
1336
1337         cork->length += length;
1338         if (((length > mtu) ||
1339              (skb && skb_is_gso(skb))) &&
1340             (sk->sk_protocol == IPPROTO_UDP) &&
1341             (rt->dst.dev->features & NETIF_F_UFO) &&
1342             (sk->sk_type == SOCK_DGRAM)) {
1343                 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1344                                           hh_len, fragheaderlen,
1345                                           transhdrlen, mtu, flags, fl6);
1346                 if (err)
1347                         goto error;
1348                 return 0;
1349         }
1350
1351         if (!skb)
1352                 goto alloc_new_skb;
1353
1354         while (length > 0) {
1355                 /* Check if the remaining data fits into current packet. */
1356                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1357                 if (copy < length)
1358                         copy = maxfraglen - skb->len;
1359
1360                 if (copy <= 0) {
1361                         char *data;
1362                         unsigned int datalen;
1363                         unsigned int fraglen;
1364                         unsigned int fraggap;
1365                         unsigned int alloclen;
1366 alloc_new_skb:
1367                         /* There's no room in the current skb */
1368                         if (skb)
1369                                 fraggap = skb->len - maxfraglen;
1370                         else
1371                                 fraggap = 0;
1372                         /* update mtu and maxfraglen if necessary */
1373                         if (!skb || !skb_prev)
1374                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1375                                                     fragheaderlen, skb, rt,
1376                                                     orig_mtu);
1377
1378                         skb_prev = skb;
1379
1380                         /*
1381                          * If remaining data exceeds the mtu,
1382                          * we know we need more fragment(s).
1383                          */
1384                         datalen = length + fraggap;
1385
1386                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1387                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1388                         if ((flags & MSG_MORE) &&
1389                             !(rt->dst.dev->features&NETIF_F_SG))
1390                                 alloclen = mtu;
1391                         else
1392                                 alloclen = datalen + fragheaderlen;
1393
1394                         alloclen += dst_exthdrlen;
1395
1396                         if (datalen != length + fraggap) {
1397                                 /*
1398                                  * this is not the last fragment, the trailer
1399                                  * space is regarded as data space.
1400                                  */
1401                                 datalen += rt->dst.trailer_len;
1402                         }
1403
1404                         alloclen += rt->dst.trailer_len;
1405                         fraglen = datalen + fragheaderlen;
1406
1407                         /*
1408                          * We just reserve space for fragment header.
1409                          * Note: this may be overallocation if the message
1410                          * (without MSG_MORE) fits into the MTU.
1411                          */
1412                         alloclen += sizeof(struct frag_hdr);
1413
1414                         if (transhdrlen) {
1415                                 skb = sock_alloc_send_skb(sk,
1416                                                 alloclen + hh_len,
1417                                                 (flags & MSG_DONTWAIT), &err);
1418                         } else {
1419                                 skb = NULL;
1420                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1421                                     2 * sk->sk_sndbuf)
1422                                         skb = sock_wmalloc(sk,
1423                                                            alloclen + hh_len, 1,
1424                                                            sk->sk_allocation);
1425                                 if (unlikely(!skb))
1426                                         err = -ENOBUFS;
1427                         }
1428                         if (!skb)
1429                                 goto error;
1430                         /*
1431                          *      Fill in the control structures
1432                          */
1433                         skb->protocol = htons(ETH_P_IPV6);
1434                         skb->ip_summed = csummode;
1435                         skb->csum = 0;
1436                         /* reserve for fragmentation and ipsec header */
1437                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1438                                     dst_exthdrlen);
1439
1440                         /* Only the initial fragment is time stamped */
1441                         skb_shinfo(skb)->tx_flags = tx_flags;
1442                         tx_flags = 0;
1443                         skb_shinfo(skb)->tskey = tskey;
1444                         tskey = 0;
1445
1446                         /*
1447                          *      Find where to start putting bytes
1448                          */
1449                         data = skb_put(skb, fraglen);
1450                         skb_set_network_header(skb, exthdrlen);
1451                         data += fragheaderlen;
1452                         skb->transport_header = (skb->network_header +
1453                                                  fragheaderlen);
1454                         if (fraggap) {
1455                                 skb->csum = skb_copy_and_csum_bits(
1456                                         skb_prev, maxfraglen,
1457                                         data + transhdrlen, fraggap, 0);
1458                                 skb_prev->csum = csum_sub(skb_prev->csum,
1459                                                           skb->csum);
1460                                 data += fraggap;
1461                                 pskb_trim_unique(skb_prev, maxfraglen);
1462                         }
1463                         copy = datalen - transhdrlen - fraggap;
1464
1465                         if (copy < 0) {
1466                                 err = -EINVAL;
1467                                 kfree_skb(skb);
1468                                 goto error;
1469                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1470                                 err = -EFAULT;
1471                                 kfree_skb(skb);
1472                                 goto error;
1473                         }
1474
1475                         offset += copy;
1476                         length -= datalen - fraggap;
1477                         transhdrlen = 0;
1478                         exthdrlen = 0;
1479                         dst_exthdrlen = 0;
1480
1481                         /*
1482                          * Put the packet on the pending queue
1483                          */
1484                         __skb_queue_tail(queue, skb);
1485                         continue;
1486                 }
1487
1488                 if (copy > length)
1489                         copy = length;
1490
1491                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1492                         unsigned int off;
1493
1494                         off = skb->len;
1495                         if (getfrag(from, skb_put(skb, copy),
1496                                                 offset, copy, off, skb) < 0) {
1497                                 __skb_trim(skb, off);
1498                                 err = -EFAULT;
1499                                 goto error;
1500                         }
1501                 } else {
1502                         int i = skb_shinfo(skb)->nr_frags;
1503
1504                         err = -ENOMEM;
1505                         if (!sk_page_frag_refill(sk, pfrag))
1506                                 goto error;
1507
1508                         if (!skb_can_coalesce(skb, i, pfrag->page,
1509                                               pfrag->offset)) {
1510                                 err = -EMSGSIZE;
1511                                 if (i == MAX_SKB_FRAGS)
1512                                         goto error;
1513
1514                                 __skb_fill_page_desc(skb, i, pfrag->page,
1515                                                      pfrag->offset, 0);
1516                                 skb_shinfo(skb)->nr_frags = ++i;
1517                                 get_page(pfrag->page);
1518                         }
1519                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1520                         if (getfrag(from,
1521                                     page_address(pfrag->page) + pfrag->offset,
1522                                     offset, copy, skb->len, skb) < 0)
1523                                 goto error_efault;
1524
1525                         pfrag->offset += copy;
1526                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1527                         skb->len += copy;
1528                         skb->data_len += copy;
1529                         skb->truesize += copy;
1530                         atomic_add(copy, &sk->sk_wmem_alloc);
1531                 }
1532                 offset += copy;
1533                 length -= copy;
1534         }
1535
1536         return 0;
1537
1538 error_efault:
1539         err = -EFAULT;
1540 error:
1541         cork->length -= length;
1542         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1543         return err;
1544 }
1545
1546 int ip6_append_data(struct sock *sk,
1547                     int getfrag(void *from, char *to, int offset, int len,
1548                                 int odd, struct sk_buff *skb),
1549                     void *from, int length, int transhdrlen, int hlimit,
1550                     int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1551                     struct rt6_info *rt, unsigned int flags, int dontfrag)
1552 {
1553         struct inet_sock *inet = inet_sk(sk);
1554         struct ipv6_pinfo *np = inet6_sk(sk);
1555         int exthdrlen;
1556         int err;
1557
1558         if (flags&MSG_PROBE)
1559                 return 0;
1560         if (skb_queue_empty(&sk->sk_write_queue)) {
1561                 /*
1562                  * setup for corking
1563                  */
1564                 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1565                                      tclass, opt, rt, fl6);
1566                 if (err)
1567                         return err;
1568
1569                 exthdrlen = (opt ? opt->opt_flen : 0);
1570                 length += exthdrlen;
1571                 transhdrlen += exthdrlen;
1572         } else {
1573                 fl6 = &inet->cork.fl.u.ip6;
1574                 transhdrlen = 0;
1575         }
1576
1577         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1578                                  &np->cork, sk_page_frag(sk), getfrag,
1579                                  from, length, transhdrlen, flags, dontfrag);
1580 }
1581 EXPORT_SYMBOL_GPL(ip6_append_data);
1582
1583 static void ip6_cork_release(struct inet_cork_full *cork,
1584                              struct inet6_cork *v6_cork)
1585 {
1586         if (v6_cork->opt) {
1587                 kfree(v6_cork->opt->dst0opt);
1588                 kfree(v6_cork->opt->dst1opt);
1589                 kfree(v6_cork->opt->hopopt);
1590                 kfree(v6_cork->opt->srcrt);
1591                 kfree(v6_cork->opt);
1592                 v6_cork->opt = NULL;
1593         }
1594
1595         if (cork->base.dst) {
1596                 dst_release(cork->base.dst);
1597                 cork->base.dst = NULL;
1598                 cork->base.flags &= ~IPCORK_ALLFRAG;
1599         }
1600         memset(&cork->fl, 0, sizeof(cork->fl));
1601 }
1602
1603 struct sk_buff *__ip6_make_skb(struct sock *sk,
1604                                struct sk_buff_head *queue,
1605                                struct inet_cork_full *cork,
1606                                struct inet6_cork *v6_cork)
1607 {
1608         struct sk_buff *skb, *tmp_skb;
1609         struct sk_buff **tail_skb;
1610         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1611         struct ipv6_pinfo *np = inet6_sk(sk);
1612         struct net *net = sock_net(sk);
1613         struct ipv6hdr *hdr;
1614         struct ipv6_txoptions *opt = v6_cork->opt;
1615         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1616         struct flowi6 *fl6 = &cork->fl.u.ip6;
1617         unsigned char proto = fl6->flowi6_proto;
1618
1619         skb = __skb_dequeue(queue);
1620         if (!skb)
1621                 goto out;
1622         tail_skb = &(skb_shinfo(skb)->frag_list);
1623
1624         /* move skb->data to ip header from ext header */
1625         if (skb->data < skb_network_header(skb))
1626                 __skb_pull(skb, skb_network_offset(skb));
1627         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1628                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1629                 *tail_skb = tmp_skb;
1630                 tail_skb = &(tmp_skb->next);
1631                 skb->len += tmp_skb->len;
1632                 skb->data_len += tmp_skb->len;
1633                 skb->truesize += tmp_skb->truesize;
1634                 tmp_skb->destructor = NULL;
1635                 tmp_skb->sk = NULL;
1636         }
1637
1638         /* Allow local fragmentation. */
1639         skb->ignore_df = ip6_sk_ignore_df(sk);
1640
1641         *final_dst = fl6->daddr;
1642         __skb_pull(skb, skb_network_header_len(skb));
1643         if (opt && opt->opt_flen)
1644                 ipv6_push_frag_opts(skb, opt, &proto);
1645         if (opt && opt->opt_nflen)
1646                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1647
1648         skb_push(skb, sizeof(struct ipv6hdr));
1649         skb_reset_network_header(skb);
1650         hdr = ipv6_hdr(skb);
1651
1652         ip6_flow_hdr(hdr, v6_cork->tclass,
1653                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1654                                         np->autoflowlabel, fl6));
1655         hdr->hop_limit = v6_cork->hop_limit;
1656         hdr->nexthdr = proto;
1657         hdr->saddr = fl6->saddr;
1658         hdr->daddr = *final_dst;
1659
1660         skb->priority = sk->sk_priority;
1661         skb->mark = sk->sk_mark;
1662
1663         skb_dst_set(skb, dst_clone(&rt->dst));
1664         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1665         if (proto == IPPROTO_ICMPV6) {
1666                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1667
1668                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1669                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1670         }
1671
1672         ip6_cork_release(cork, v6_cork);
1673 out:
1674         return skb;
1675 }
1676
1677 int ip6_send_skb(struct sk_buff *skb)
1678 {
1679         struct net *net = sock_net(skb->sk);
1680         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1681         int err;
1682
1683         err = ip6_local_out(skb);
1684         if (err) {
1685                 if (err > 0)
1686                         err = net_xmit_errno(err);
1687                 if (err)
1688                         IP6_INC_STATS(net, rt->rt6i_idev,
1689                                       IPSTATS_MIB_OUTDISCARDS);
1690         }
1691
1692         return err;
1693 }
1694
1695 int ip6_push_pending_frames(struct sock *sk)
1696 {
1697         struct sk_buff *skb;
1698
1699         skb = ip6_finish_skb(sk);
1700         if (!skb)
1701                 return 0;
1702
1703         return ip6_send_skb(skb);
1704 }
1705 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1706
1707 static void __ip6_flush_pending_frames(struct sock *sk,
1708                                        struct sk_buff_head *queue,
1709                                        struct inet_cork_full *cork,
1710                                        struct inet6_cork *v6_cork)
1711 {
1712         struct sk_buff *skb;
1713
1714         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1715                 if (skb_dst(skb))
1716                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1717                                       IPSTATS_MIB_OUTDISCARDS);
1718                 kfree_skb(skb);
1719         }
1720
1721         ip6_cork_release(cork, v6_cork);
1722 }
1723
1724 void ip6_flush_pending_frames(struct sock *sk)
1725 {
1726         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1727                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1728 }
1729 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1730
1731 struct sk_buff *ip6_make_skb(struct sock *sk,
1732                              int getfrag(void *from, char *to, int offset,
1733                                          int len, int odd, struct sk_buff *skb),
1734                              void *from, int length, int transhdrlen,
1735                              int hlimit, int tclass,
1736                              struct ipv6_txoptions *opt, struct flowi6 *fl6,
1737                              struct rt6_info *rt, unsigned int flags,
1738                              int dontfrag)
1739 {
1740         struct inet_cork_full cork;
1741         struct inet6_cork v6_cork;
1742         struct sk_buff_head queue;
1743         int exthdrlen = (opt ? opt->opt_flen : 0);
1744         int err;
1745
1746         if (flags & MSG_PROBE)
1747                 return NULL;
1748
1749         __skb_queue_head_init(&queue);
1750
1751         cork.base.flags = 0;
1752         cork.base.addr = 0;
1753         cork.base.opt = NULL;
1754         v6_cork.opt = NULL;
1755         err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1756         if (err)
1757                 return ERR_PTR(err);
1758
1759         if (dontfrag < 0)
1760                 dontfrag = inet6_sk(sk)->dontfrag;
1761
1762         err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1763                                 &current->task_frag, getfrag, from,
1764                                 length + exthdrlen, transhdrlen + exthdrlen,
1765                                 flags, dontfrag);
1766         if (err) {
1767                 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1768                 return ERR_PTR(err);
1769         }
1770
1771         return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1772 }