Merge branch 'for-2.6.35' of git://linux-nfs.org/~bfields/linux
[linux-drm-fsl-dcu.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/kernel.h>
20 #include <linux/slab.h>
21 #include <linux/tcp.h>                  /* for tcphdr */
22 #include <net/ip.h>
23 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
24 #include <net/udp.h>
25 #include <net/icmp.h>                   /* for icmp_send */
26 #include <net/route.h>                  /* for ip_route_output */
27 #include <net/ipv6.h>
28 #include <net/ip6_route.h>
29 #include <linux/icmpv6.h>
30 #include <linux/netfilter.h>
31 #include <linux/netfilter_ipv4.h>
32
33 #include <net/ip_vs.h>
34
35
36 /*
37  *      Destination cache to speed up outgoing route lookup
38  */
39 static inline void
40 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
41 {
42         struct dst_entry *old_dst;
43
44         old_dst = dest->dst_cache;
45         dest->dst_cache = dst;
46         dest->dst_rtos = rtos;
47         dst_release(old_dst);
48 }
49
50 static inline struct dst_entry *
51 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
52 {
53         struct dst_entry *dst = dest->dst_cache;
54
55         if (!dst)
56                 return NULL;
57         if ((dst->obsolete
58              || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
59             dst->ops->check(dst, cookie) == NULL) {
60                 dest->dst_cache = NULL;
61                 dst_release(dst);
62                 return NULL;
63         }
64         dst_hold(dst);
65         return dst;
66 }
67
68 static struct rtable *
69 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
70 {
71         struct rtable *rt;                      /* Route to the other host */
72         struct ip_vs_dest *dest = cp->dest;
73
74         if (dest) {
75                 spin_lock(&dest->dst_lock);
76                 if (!(rt = (struct rtable *)
77                       __ip_vs_dst_check(dest, rtos, 0))) {
78                         struct flowi fl = {
79                                 .oif = 0,
80                                 .nl_u = {
81                                         .ip4_u = {
82                                                 .daddr = dest->addr.ip,
83                                                 .saddr = 0,
84                                                 .tos = rtos, } },
85                         };
86
87                         if (ip_route_output_key(&init_net, &rt, &fl)) {
88                                 spin_unlock(&dest->dst_lock);
89                                 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
90                                              &dest->addr.ip);
91                                 return NULL;
92                         }
93                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
94                         IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
95                                   &dest->addr.ip,
96                                   atomic_read(&rt->u.dst.__refcnt), rtos);
97                 }
98                 spin_unlock(&dest->dst_lock);
99         } else {
100                 struct flowi fl = {
101                         .oif = 0,
102                         .nl_u = {
103                                 .ip4_u = {
104                                         .daddr = cp->daddr.ip,
105                                         .saddr = 0,
106                                         .tos = rtos, } },
107                 };
108
109                 if (ip_route_output_key(&init_net, &rt, &fl)) {
110                         IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
111                                      &cp->daddr.ip);
112                         return NULL;
113                 }
114         }
115
116         return rt;
117 }
118
119 #ifdef CONFIG_IP_VS_IPV6
120 static struct rt6_info *
121 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
122 {
123         struct rt6_info *rt;                    /* Route to the other host */
124         struct ip_vs_dest *dest = cp->dest;
125
126         if (dest) {
127                 spin_lock(&dest->dst_lock);
128                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
129                 if (!rt) {
130                         struct flowi fl = {
131                                 .oif = 0,
132                                 .nl_u = {
133                                         .ip6_u = {
134                                                 .daddr = dest->addr.in6,
135                                                 .saddr = {
136                                                         .s6_addr32 =
137                                                                 { 0, 0, 0, 0 },
138                                                 },
139                                         },
140                                 },
141                         };
142
143                         rt = (struct rt6_info *)ip6_route_output(&init_net,
144                                                                  NULL, &fl);
145                         if (!rt) {
146                                 spin_unlock(&dest->dst_lock);
147                                 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
148                                              &dest->addr.in6);
149                                 return NULL;
150                         }
151                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
152                         IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
153                                   &dest->addr.in6,
154                                   atomic_read(&rt->u.dst.__refcnt));
155                 }
156                 spin_unlock(&dest->dst_lock);
157         } else {
158                 struct flowi fl = {
159                         .oif = 0,
160                         .nl_u = {
161                                 .ip6_u = {
162                                         .daddr = cp->daddr.in6,
163                                         .saddr = {
164                                                 .s6_addr32 = { 0, 0, 0, 0 },
165                                         },
166                                 },
167                         },
168                 };
169
170                 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
171                 if (!rt) {
172                         IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
173                                      &cp->daddr.in6);
174                         return NULL;
175                 }
176         }
177
178         return rt;
179 }
180 #endif
181
182
183 /*
184  *      Release dest->dst_cache before a dest is removed
185  */
186 void
187 ip_vs_dst_reset(struct ip_vs_dest *dest)
188 {
189         struct dst_entry *old_dst;
190
191         old_dst = dest->dst_cache;
192         dest->dst_cache = NULL;
193         dst_release(old_dst);
194 }
195
196 #define IP_VS_XMIT(pf, skb, rt)                         \
197 do {                                                    \
198         (skb)->ipvs_property = 1;                       \
199         skb_forward_csum(skb);                          \
200         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
201                 (rt)->u.dst.dev, dst_output);           \
202 } while (0)
203
204
205 /*
206  *      NULL transmitter (do nothing except return NF_ACCEPT)
207  */
208 int
209 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
210                 struct ip_vs_protocol *pp)
211 {
212         /* we do not touch skb and do not need pskb ptr */
213         return NF_ACCEPT;
214 }
215
216
217 /*
218  *      Bypass transmitter
219  *      Let packets bypass the destination when the destination is not
220  *      available, it may be only used in transparent cache cluster.
221  */
222 int
223 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
224                   struct ip_vs_protocol *pp)
225 {
226         struct rtable *rt;                      /* Route to the other host */
227         struct iphdr  *iph = ip_hdr(skb);
228         u8     tos = iph->tos;
229         int    mtu;
230         struct flowi fl = {
231                 .oif = 0,
232                 .nl_u = {
233                         .ip4_u = {
234                                 .daddr = iph->daddr,
235                                 .saddr = 0,
236                                 .tos = RT_TOS(tos), } },
237         };
238
239         EnterFunction(10);
240
241         if (ip_route_output_key(&init_net, &rt, &fl)) {
242                 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
243                              __func__, &iph->daddr);
244                 goto tx_error_icmp;
245         }
246
247         /* MTU checking */
248         mtu = dst_mtu(&rt->u.dst);
249         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
250                 ip_rt_put(rt);
251                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
252                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
253                 goto tx_error;
254         }
255
256         /*
257          * Call ip_send_check because we are not sure it is called
258          * after ip_defrag. Is copy-on-write needed?
259          */
260         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
261                 ip_rt_put(rt);
262                 return NF_STOLEN;
263         }
264         ip_send_check(ip_hdr(skb));
265
266         /* drop old route */
267         skb_dst_drop(skb);
268         skb_dst_set(skb, &rt->u.dst);
269
270         /* Another hack: avoid icmp_send in ip_fragment */
271         skb->local_df = 1;
272
273         IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
274
275         LeaveFunction(10);
276         return NF_STOLEN;
277
278  tx_error_icmp:
279         dst_link_failure(skb);
280  tx_error:
281         kfree_skb(skb);
282         LeaveFunction(10);
283         return NF_STOLEN;
284 }
285
286 #ifdef CONFIG_IP_VS_IPV6
287 int
288 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
289                      struct ip_vs_protocol *pp)
290 {
291         struct rt6_info *rt;                    /* Route to the other host */
292         struct ipv6hdr  *iph = ipv6_hdr(skb);
293         int    mtu;
294         struct flowi fl = {
295                 .oif = 0,
296                 .nl_u = {
297                         .ip6_u = {
298                                 .daddr = iph->daddr,
299                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
300         };
301
302         EnterFunction(10);
303
304         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
305         if (!rt) {
306                 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
307                              __func__, &iph->daddr);
308                 goto tx_error_icmp;
309         }
310
311         /* MTU checking */
312         mtu = dst_mtu(&rt->u.dst);
313         if (skb->len > mtu) {
314                 dst_release(&rt->u.dst);
315                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
316                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
317                 goto tx_error;
318         }
319
320         /*
321          * Call ip_send_check because we are not sure it is called
322          * after ip_defrag. Is copy-on-write needed?
323          */
324         skb = skb_share_check(skb, GFP_ATOMIC);
325         if (unlikely(skb == NULL)) {
326                 dst_release(&rt->u.dst);
327                 return NF_STOLEN;
328         }
329
330         /* drop old route */
331         skb_dst_drop(skb);
332         skb_dst_set(skb, &rt->u.dst);
333
334         /* Another hack: avoid icmp_send in ip_fragment */
335         skb->local_df = 1;
336
337         IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
338
339         LeaveFunction(10);
340         return NF_STOLEN;
341
342  tx_error_icmp:
343         dst_link_failure(skb);
344  tx_error:
345         kfree_skb(skb);
346         LeaveFunction(10);
347         return NF_STOLEN;
348 }
349 #endif
350
351 /*
352  *      NAT transmitter (only for outside-to-inside nat forwarding)
353  *      Not used for related ICMP
354  */
355 int
356 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
357                struct ip_vs_protocol *pp)
358 {
359         struct rtable *rt;              /* Route to the other host */
360         int mtu;
361         struct iphdr *iph = ip_hdr(skb);
362
363         EnterFunction(10);
364
365         /* check if it is a connection of no-client-port */
366         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
367                 __be16 _pt, *p;
368                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
369                 if (p == NULL)
370                         goto tx_error;
371                 ip_vs_conn_fill_cport(cp, *p);
372                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
373         }
374
375         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
376                 goto tx_error_icmp;
377
378         /* MTU checking */
379         mtu = dst_mtu(&rt->u.dst);
380         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
381                 ip_rt_put(rt);
382                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
383                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
384                 goto tx_error;
385         }
386
387         /* copy-on-write the packet before mangling it */
388         if (!skb_make_writable(skb, sizeof(struct iphdr)))
389                 goto tx_error_put;
390
391         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
392                 goto tx_error_put;
393
394         /* drop old route */
395         skb_dst_drop(skb);
396         skb_dst_set(skb, &rt->u.dst);
397
398         /* mangle the packet */
399         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
400                 goto tx_error;
401         ip_hdr(skb)->daddr = cp->daddr.ip;
402         ip_send_check(ip_hdr(skb));
403
404         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
405
406         /* FIXME: when application helper enlarges the packet and the length
407            is larger than the MTU of outgoing device, there will be still
408            MTU problem. */
409
410         /* Another hack: avoid icmp_send in ip_fragment */
411         skb->local_df = 1;
412
413         IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
414
415         LeaveFunction(10);
416         return NF_STOLEN;
417
418   tx_error_icmp:
419         dst_link_failure(skb);
420   tx_error:
421         LeaveFunction(10);
422         kfree_skb(skb);
423         return NF_STOLEN;
424   tx_error_put:
425         ip_rt_put(rt);
426         goto tx_error;
427 }
428
429 #ifdef CONFIG_IP_VS_IPV6
430 int
431 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
432                   struct ip_vs_protocol *pp)
433 {
434         struct rt6_info *rt;            /* Route to the other host */
435         int mtu;
436
437         EnterFunction(10);
438
439         /* check if it is a connection of no-client-port */
440         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
441                 __be16 _pt, *p;
442                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
443                                        sizeof(_pt), &_pt);
444                 if (p == NULL)
445                         goto tx_error;
446                 ip_vs_conn_fill_cport(cp, *p);
447                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
448         }
449
450         rt = __ip_vs_get_out_rt_v6(cp);
451         if (!rt)
452                 goto tx_error_icmp;
453
454         /* MTU checking */
455         mtu = dst_mtu(&rt->u.dst);
456         if (skb->len > mtu) {
457                 dst_release(&rt->u.dst);
458                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
459                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
460                                  "ip_vs_nat_xmit_v6(): frag needed for");
461                 goto tx_error;
462         }
463
464         /* copy-on-write the packet before mangling it */
465         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
466                 goto tx_error_put;
467
468         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
469                 goto tx_error_put;
470
471         /* drop old route */
472         skb_dst_drop(skb);
473         skb_dst_set(skb, &rt->u.dst);
474
475         /* mangle the packet */
476         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
477                 goto tx_error;
478         ipv6_hdr(skb)->daddr = cp->daddr.in6;
479
480         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
481
482         /* FIXME: when application helper enlarges the packet and the length
483            is larger than the MTU of outgoing device, there will be still
484            MTU problem. */
485
486         /* Another hack: avoid icmp_send in ip_fragment */
487         skb->local_df = 1;
488
489         IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
490
491         LeaveFunction(10);
492         return NF_STOLEN;
493
494 tx_error_icmp:
495         dst_link_failure(skb);
496 tx_error:
497         LeaveFunction(10);
498         kfree_skb(skb);
499         return NF_STOLEN;
500 tx_error_put:
501         dst_release(&rt->u.dst);
502         goto tx_error;
503 }
504 #endif
505
506
507 /*
508  *   IP Tunneling transmitter
509  *
510  *   This function encapsulates the packet in a new IP packet, its
511  *   destination will be set to cp->daddr. Most code of this function
512  *   is taken from ipip.c.
513  *
514  *   It is used in VS/TUN cluster. The load balancer selects a real
515  *   server from a cluster based on a scheduling algorithm,
516  *   encapsulates the request packet and forwards it to the selected
517  *   server. For example, all real servers are configured with
518  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
519  *   the encapsulated packet, it will decapsulate the packet, processe
520  *   the request and return the response packets directly to the client
521  *   without passing the load balancer. This can greatly increase the
522  *   scalability of virtual server.
523  *
524  *   Used for ANY protocol
525  */
526 int
527 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
528                   struct ip_vs_protocol *pp)
529 {
530         struct rtable *rt;                      /* Route to the other host */
531         struct net_device *tdev;                /* Device to other host */
532         struct iphdr  *old_iph = ip_hdr(skb);
533         u8     tos = old_iph->tos;
534         __be16 df = old_iph->frag_off;
535         sk_buff_data_t old_transport_header = skb->transport_header;
536         struct iphdr  *iph;                     /* Our new IP header */
537         unsigned int max_headroom;              /* The extra header space needed */
538         int    mtu;
539
540         EnterFunction(10);
541
542         if (skb->protocol != htons(ETH_P_IP)) {
543                 IP_VS_DBG_RL("%s(): protocol error, "
544                              "ETH_P_IP: %d, skb protocol: %d\n",
545                              __func__, htons(ETH_P_IP), skb->protocol);
546                 goto tx_error;
547         }
548
549         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
550                 goto tx_error_icmp;
551
552         tdev = rt->u.dst.dev;
553
554         mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
555         if (mtu < 68) {
556                 ip_rt_put(rt);
557                 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
558                 goto tx_error;
559         }
560         if (skb_dst(skb))
561                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
562
563         df |= (old_iph->frag_off & htons(IP_DF));
564
565         if ((old_iph->frag_off & htons(IP_DF))
566             && mtu < ntohs(old_iph->tot_len)) {
567                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
568                 ip_rt_put(rt);
569                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
570                 goto tx_error;
571         }
572
573         /*
574          * Okay, now see if we can stuff it in the buffer as-is.
575          */
576         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
577
578         if (skb_headroom(skb) < max_headroom
579             || skb_cloned(skb) || skb_shared(skb)) {
580                 struct sk_buff *new_skb =
581                         skb_realloc_headroom(skb, max_headroom);
582                 if (!new_skb) {
583                         ip_rt_put(rt);
584                         kfree_skb(skb);
585                         IP_VS_ERR_RL("%s(): no memory\n", __func__);
586                         return NF_STOLEN;
587                 }
588                 kfree_skb(skb);
589                 skb = new_skb;
590                 old_iph = ip_hdr(skb);
591         }
592
593         skb->transport_header = old_transport_header;
594
595         /* fix old IP header checksum */
596         ip_send_check(old_iph);
597
598         skb_push(skb, sizeof(struct iphdr));
599         skb_reset_network_header(skb);
600         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
601
602         /* drop old route */
603         skb_dst_drop(skb);
604         skb_dst_set(skb, &rt->u.dst);
605
606         /*
607          *      Push down and install the IPIP header.
608          */
609         iph                     =       ip_hdr(skb);
610         iph->version            =       4;
611         iph->ihl                =       sizeof(struct iphdr)>>2;
612         iph->frag_off           =       df;
613         iph->protocol           =       IPPROTO_IPIP;
614         iph->tos                =       tos;
615         iph->daddr              =       rt->rt_dst;
616         iph->saddr              =       rt->rt_src;
617         iph->ttl                =       old_iph->ttl;
618         ip_select_ident(iph, &rt->u.dst, NULL);
619
620         /* Another hack: avoid icmp_send in ip_fragment */
621         skb->local_df = 1;
622
623         ip_local_out(skb);
624
625         LeaveFunction(10);
626
627         return NF_STOLEN;
628
629   tx_error_icmp:
630         dst_link_failure(skb);
631   tx_error:
632         kfree_skb(skb);
633         LeaveFunction(10);
634         return NF_STOLEN;
635 }
636
637 #ifdef CONFIG_IP_VS_IPV6
638 int
639 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
640                      struct ip_vs_protocol *pp)
641 {
642         struct rt6_info *rt;            /* Route to the other host */
643         struct net_device *tdev;        /* Device to other host */
644         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
645         sk_buff_data_t old_transport_header = skb->transport_header;
646         struct ipv6hdr  *iph;           /* Our new IP header */
647         unsigned int max_headroom;      /* The extra header space needed */
648         int    mtu;
649
650         EnterFunction(10);
651
652         if (skb->protocol != htons(ETH_P_IPV6)) {
653                 IP_VS_DBG_RL("%s(): protocol error, "
654                              "ETH_P_IPV6: %d, skb protocol: %d\n",
655                              __func__, htons(ETH_P_IPV6), skb->protocol);
656                 goto tx_error;
657         }
658
659         rt = __ip_vs_get_out_rt_v6(cp);
660         if (!rt)
661                 goto tx_error_icmp;
662
663         tdev = rt->u.dst.dev;
664
665         mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
666         /* TODO IPv6: do we need this check in IPv6? */
667         if (mtu < 1280) {
668                 dst_release(&rt->u.dst);
669                 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
670                 goto tx_error;
671         }
672         if (skb_dst(skb))
673                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
674
675         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
676                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
677                 dst_release(&rt->u.dst);
678                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
679                 goto tx_error;
680         }
681
682         /*
683          * Okay, now see if we can stuff it in the buffer as-is.
684          */
685         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
686
687         if (skb_headroom(skb) < max_headroom
688             || skb_cloned(skb) || skb_shared(skb)) {
689                 struct sk_buff *new_skb =
690                         skb_realloc_headroom(skb, max_headroom);
691                 if (!new_skb) {
692                         dst_release(&rt->u.dst);
693                         kfree_skb(skb);
694                         IP_VS_ERR_RL("%s(): no memory\n", __func__);
695                         return NF_STOLEN;
696                 }
697                 kfree_skb(skb);
698                 skb = new_skb;
699                 old_iph = ipv6_hdr(skb);
700         }
701
702         skb->transport_header = old_transport_header;
703
704         skb_push(skb, sizeof(struct ipv6hdr));
705         skb_reset_network_header(skb);
706         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
707
708         /* drop old route */
709         skb_dst_drop(skb);
710         skb_dst_set(skb, &rt->u.dst);
711
712         /*
713          *      Push down and install the IPIP header.
714          */
715         iph                     =       ipv6_hdr(skb);
716         iph->version            =       6;
717         iph->nexthdr            =       IPPROTO_IPV6;
718         iph->payload_len        =       old_iph->payload_len;
719         be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
720         iph->priority           =       old_iph->priority;
721         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
722         iph->daddr              =       rt->rt6i_dst.addr;
723         iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
724         iph->hop_limit          =       old_iph->hop_limit;
725
726         /* Another hack: avoid icmp_send in ip_fragment */
727         skb->local_df = 1;
728
729         ip6_local_out(skb);
730
731         LeaveFunction(10);
732
733         return NF_STOLEN;
734
735 tx_error_icmp:
736         dst_link_failure(skb);
737 tx_error:
738         kfree_skb(skb);
739         LeaveFunction(10);
740         return NF_STOLEN;
741 }
742 #endif
743
744
745 /*
746  *      Direct Routing transmitter
747  *      Used for ANY protocol
748  */
749 int
750 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
751               struct ip_vs_protocol *pp)
752 {
753         struct rtable *rt;                      /* Route to the other host */
754         struct iphdr  *iph = ip_hdr(skb);
755         int    mtu;
756
757         EnterFunction(10);
758
759         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
760                 goto tx_error_icmp;
761
762         /* MTU checking */
763         mtu = dst_mtu(&rt->u.dst);
764         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
765                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
766                 ip_rt_put(rt);
767                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
768                 goto tx_error;
769         }
770
771         /*
772          * Call ip_send_check because we are not sure it is called
773          * after ip_defrag. Is copy-on-write needed?
774          */
775         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
776                 ip_rt_put(rt);
777                 return NF_STOLEN;
778         }
779         ip_send_check(ip_hdr(skb));
780
781         /* drop old route */
782         skb_dst_drop(skb);
783         skb_dst_set(skb, &rt->u.dst);
784
785         /* Another hack: avoid icmp_send in ip_fragment */
786         skb->local_df = 1;
787
788         IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
789
790         LeaveFunction(10);
791         return NF_STOLEN;
792
793   tx_error_icmp:
794         dst_link_failure(skb);
795   tx_error:
796         kfree_skb(skb);
797         LeaveFunction(10);
798         return NF_STOLEN;
799 }
800
801 #ifdef CONFIG_IP_VS_IPV6
802 int
803 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
804                  struct ip_vs_protocol *pp)
805 {
806         struct rt6_info *rt;                    /* Route to the other host */
807         int    mtu;
808
809         EnterFunction(10);
810
811         rt = __ip_vs_get_out_rt_v6(cp);
812         if (!rt)
813                 goto tx_error_icmp;
814
815         /* MTU checking */
816         mtu = dst_mtu(&rt->u.dst);
817         if (skb->len > mtu) {
818                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
819                 dst_release(&rt->u.dst);
820                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
821                 goto tx_error;
822         }
823
824         /*
825          * Call ip_send_check because we are not sure it is called
826          * after ip_defrag. Is copy-on-write needed?
827          */
828         skb = skb_share_check(skb, GFP_ATOMIC);
829         if (unlikely(skb == NULL)) {
830                 dst_release(&rt->u.dst);
831                 return NF_STOLEN;
832         }
833
834         /* drop old route */
835         skb_dst_drop(skb);
836         skb_dst_set(skb, &rt->u.dst);
837
838         /* Another hack: avoid icmp_send in ip_fragment */
839         skb->local_df = 1;
840
841         IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
842
843         LeaveFunction(10);
844         return NF_STOLEN;
845
846 tx_error_icmp:
847         dst_link_failure(skb);
848 tx_error:
849         kfree_skb(skb);
850         LeaveFunction(10);
851         return NF_STOLEN;
852 }
853 #endif
854
855
856 /*
857  *      ICMP packet transmitter
858  *      called by the ip_vs_in_icmp
859  */
860 int
861 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
862                 struct ip_vs_protocol *pp, int offset)
863 {
864         struct rtable   *rt;    /* Route to the other host */
865         int mtu;
866         int rc;
867
868         EnterFunction(10);
869
870         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
871            forwarded directly here, because there is no need to
872            translate address/port back */
873         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
874                 if (cp->packet_xmit)
875                         rc = cp->packet_xmit(skb, cp, pp);
876                 else
877                         rc = NF_ACCEPT;
878                 /* do not touch skb anymore */
879                 atomic_inc(&cp->in_pkts);
880                 goto out;
881         }
882
883         /*
884          * mangle and send the packet here (only for VS/NAT)
885          */
886
887         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
888                 goto tx_error_icmp;
889
890         /* MTU checking */
891         mtu = dst_mtu(&rt->u.dst);
892         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
893                 ip_rt_put(rt);
894                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
895                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
896                 goto tx_error;
897         }
898
899         /* copy-on-write the packet before mangling it */
900         if (!skb_make_writable(skb, offset))
901                 goto tx_error_put;
902
903         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
904                 goto tx_error_put;
905
906         /* drop the old route when skb is not shared */
907         skb_dst_drop(skb);
908         skb_dst_set(skb, &rt->u.dst);
909
910         ip_vs_nat_icmp(skb, pp, cp, 0);
911
912         /* Another hack: avoid icmp_send in ip_fragment */
913         skb->local_df = 1;
914
915         IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
916
917         rc = NF_STOLEN;
918         goto out;
919
920   tx_error_icmp:
921         dst_link_failure(skb);
922   tx_error:
923         dev_kfree_skb(skb);
924         rc = NF_STOLEN;
925   out:
926         LeaveFunction(10);
927         return rc;
928   tx_error_put:
929         ip_rt_put(rt);
930         goto tx_error;
931 }
932
933 #ifdef CONFIG_IP_VS_IPV6
934 int
935 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
936                 struct ip_vs_protocol *pp, int offset)
937 {
938         struct rt6_info *rt;    /* Route to the other host */
939         int mtu;
940         int rc;
941
942         EnterFunction(10);
943
944         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
945            forwarded directly here, because there is no need to
946            translate address/port back */
947         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
948                 if (cp->packet_xmit)
949                         rc = cp->packet_xmit(skb, cp, pp);
950                 else
951                         rc = NF_ACCEPT;
952                 /* do not touch skb anymore */
953                 atomic_inc(&cp->in_pkts);
954                 goto out;
955         }
956
957         /*
958          * mangle and send the packet here (only for VS/NAT)
959          */
960
961         rt = __ip_vs_get_out_rt_v6(cp);
962         if (!rt)
963                 goto tx_error_icmp;
964
965         /* MTU checking */
966         mtu = dst_mtu(&rt->u.dst);
967         if (skb->len > mtu) {
968                 dst_release(&rt->u.dst);
969                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
970                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
971                 goto tx_error;
972         }
973
974         /* copy-on-write the packet before mangling it */
975         if (!skb_make_writable(skb, offset))
976                 goto tx_error_put;
977
978         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
979                 goto tx_error_put;
980
981         /* drop the old route when skb is not shared */
982         skb_dst_drop(skb);
983         skb_dst_set(skb, &rt->u.dst);
984
985         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
986
987         /* Another hack: avoid icmp_send in ip_fragment */
988         skb->local_df = 1;
989
990         IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
991
992         rc = NF_STOLEN;
993         goto out;
994
995 tx_error_icmp:
996         dst_link_failure(skb);
997 tx_error:
998         dev_kfree_skb(skb);
999         rc = NF_STOLEN;
1000 out:
1001         LeaveFunction(10);
1002         return rc;
1003 tx_error_put:
1004         dst_release(&rt->u.dst);
1005         goto tx_error;
1006 }
1007 #endif