3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/capability.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/interrupt.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/in6.h>
25 #include <linux/ipv6.h>
26 #include <linux/route.h>
29 #include <net/ndisc.h>
30 #include <net/addrconf.h>
31 #include <net/transp_v6.h>
32 #include <net/ip6_route.h>
33 #include <net/tcp_states.h>
35 #include <linux/errqueue.h>
36 #include <asm/uaccess.h>
38 int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
40 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
41 struct inet_sock *inet = inet_sk(sk);
42 struct ipv6_pinfo *np = inet6_sk(sk);
43 struct in6_addr *daddr, *final_p = NULL, final;
44 struct dst_entry *dst;
46 struct ip6_flowlabel *flowlabel = NULL;
50 if (usin->sin6_family == AF_INET) {
51 if (__ipv6_only_sock(sk))
53 err = ip4_datagram_connect(sk, uaddr, addr_len);
57 if (addr_len < SIN6_LEN_RFC2133)
60 if (usin->sin6_family != AF_INET6)
63 memset(&fl, 0, sizeof(fl));
65 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
66 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
67 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
68 if (flowlabel == NULL)
70 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
74 addr_type = ipv6_addr_type(&usin->sin6_addr);
76 if (addr_type == IPV6_ADDR_ANY) {
80 usin->sin6_addr.s6_addr[15] = 0x01;
83 daddr = &usin->sin6_addr;
85 if (addr_type == IPV6_ADDR_MAPPED) {
86 struct sockaddr_in sin;
88 if (__ipv6_only_sock(sk)) {
92 sin.sin_family = AF_INET;
93 sin.sin_addr.s_addr = daddr->s6_addr32[3];
94 sin.sin_port = usin->sin6_port;
96 err = ip4_datagram_connect(sk,
97 (struct sockaddr*) &sin,
104 ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
106 if (ipv6_addr_any(&np->saddr)) {
107 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
111 if (ipv6_addr_any(&np->rcv_saddr)) {
112 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
118 if (addr_type&IPV6_ADDR_LINKLOCAL) {
119 if (addr_len >= sizeof(struct sockaddr_in6) &&
120 usin->sin6_scope_id) {
121 if (sk->sk_bound_dev_if &&
122 sk->sk_bound_dev_if != usin->sin6_scope_id) {
126 sk->sk_bound_dev_if = usin->sin6_scope_id;
127 if (!sk->sk_bound_dev_if &&
128 (addr_type & IPV6_ADDR_MULTICAST))
129 fl.oif = np->mcast_oif;
132 /* Connect to link-local address requires an interface */
133 if (!sk->sk_bound_dev_if) {
139 ipv6_addr_copy(&np->daddr, daddr);
140 np->flow_label = fl.fl6_flowlabel;
142 inet->dport = usin->sin6_port;
145 * Check for a route to destination an obtain the
146 * destination cache for it.
149 fl.proto = sk->sk_protocol;
150 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
151 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
152 fl.oif = sk->sk_bound_dev_if;
153 fl.fl_ip_dport = inet->dport;
154 fl.fl_ip_sport = inet->sport;
156 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
157 fl.oif = np->mcast_oif;
159 security_sk_classify_flow(sk, &fl);
162 if (flowlabel->opt && flowlabel->opt->srcrt) {
163 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
164 ipv6_addr_copy(&final, &fl.fl6_dst);
165 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
168 } else if (np->opt && np->opt->srcrt) {
169 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
170 ipv6_addr_copy(&final, &fl.fl6_dst);
171 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
175 err = ip6_dst_lookup(sk, &dst, &fl);
179 ipv6_addr_copy(&fl.fl6_dst, final_p);
181 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
184 /* source address lookup done in ip6_dst_lookup */
186 if (ipv6_addr_any(&np->saddr))
187 ipv6_addr_copy(&np->saddr, &fl.fl6_src);
189 if (ipv6_addr_any(&np->rcv_saddr)) {
190 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
191 inet->rcv_saddr = LOOPBACK4_IPV6;
194 ip6_dst_store(sk, dst,
195 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
198 sk->sk_state = TCP_ESTABLISHED;
200 fl6_sock_release(flowlabel);
204 void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
205 u16 port, u32 info, u8 *payload)
207 struct ipv6_pinfo *np = inet6_sk(sk);
208 struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
209 struct sock_exterr_skb *serr;
214 skb = skb_clone(skb, GFP_ATOMIC);
218 serr = SKB_EXT_ERR(skb);
219 serr->ee.ee_errno = err;
220 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
221 serr->ee.ee_type = icmph->icmp6_type;
222 serr->ee.ee_code = icmph->icmp6_code;
224 serr->ee.ee_info = info;
225 serr->ee.ee_data = 0;
226 serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
229 skb->h.raw = payload;
230 __skb_pull(skb, payload - skb->data);
232 if (sock_queue_err_skb(sk, skb))
236 void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
238 struct ipv6_pinfo *np = inet6_sk(sk);
239 struct sock_exterr_skb *serr;
246 skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
250 iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
252 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
254 serr = SKB_EXT_ERR(skb);
255 serr->ee.ee_errno = err;
256 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
257 serr->ee.ee_type = 0;
258 serr->ee.ee_code = 0;
260 serr->ee.ee_info = info;
261 serr->ee.ee_data = 0;
262 serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
263 serr->port = fl->fl_ip_dport;
265 skb->h.raw = skb->tail;
266 __skb_pull(skb, skb->tail - skb->data);
268 if (sock_queue_err_skb(sk, skb))
273 * Handle MSG_ERRQUEUE
275 int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct sock_exterr_skb *serr;
279 struct sk_buff *skb, *skb2;
280 struct sockaddr_in6 *sin;
282 struct sock_extended_err ee;
283 struct sockaddr_in6 offender;
289 skb = skb_dequeue(&sk->sk_error_queue);
295 msg->msg_flags |= MSG_TRUNC;
298 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
302 sock_recv_timestamp(msg, sk, skb);
304 serr = SKB_EXT_ERR(skb);
306 sin = (struct sockaddr_in6 *)msg->msg_name;
308 sin->sin6_family = AF_INET6;
309 sin->sin6_flowinfo = 0;
310 sin->sin6_port = serr->port;
311 sin->sin6_scope_id = 0;
312 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
313 ipv6_addr_copy(&sin->sin6_addr,
314 (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
316 sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
317 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
318 sin->sin6_scope_id = IP6CB(skb)->iif;
320 ipv6_addr_set(&sin->sin6_addr, 0, 0,
322 *(u32*)(skb->nh.raw + serr->addr_offset));
326 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
327 sin = &errhdr.offender;
328 sin->sin6_family = AF_UNSPEC;
329 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
330 sin->sin6_family = AF_INET6;
331 sin->sin6_flowinfo = 0;
332 sin->sin6_scope_id = 0;
333 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
334 ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
336 datagram_recv_ctl(sk, msg, skb);
337 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
338 sin->sin6_scope_id = IP6CB(skb)->iif;
340 struct inet_sock *inet = inet_sk(sk);
342 ipv6_addr_set(&sin->sin6_addr, 0, 0,
345 if (inet->cmsg_flags)
346 ip_cmsg_recv(msg, skb);
350 put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr);
352 /* Now we could try to dump offended packet options */
354 msg->msg_flags |= MSG_ERRQUEUE;
357 /* Reset and regenerate socket error */
358 spin_lock_bh(&sk->sk_error_queue.lock);
360 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
361 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
362 spin_unlock_bh(&sk->sk_error_queue.lock);
363 sk->sk_error_report(sk);
365 spin_unlock_bh(&sk->sk_error_queue.lock);
376 int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
378 struct ipv6_pinfo *np = inet6_sk(sk);
379 struct inet6_skb_parm *opt = IP6CB(skb);
381 if (np->rxopt.bits.rxinfo) {
382 struct in6_pktinfo src_info;
384 src_info.ipi6_ifindex = opt->iif;
385 ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
386 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
389 if (np->rxopt.bits.rxhlim) {
390 int hlim = skb->nh.ipv6h->hop_limit;
391 put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
394 if (np->rxopt.bits.rxtclass) {
395 int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff;
396 put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
399 if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
400 u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
401 put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
404 /* HbH is allowed only once */
405 if (np->rxopt.bits.hopopts && opt->hop) {
406 u8 *ptr = skb->nh.raw + opt->hop;
407 put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
411 (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) {
413 * Silly enough, but we need to reparse in order to
414 * report extension headers (except for HbH)
417 * Also note that IPV6_RECVRTHDRDSTOPTS is NOT
418 * (and WILL NOT be) defined because
419 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
421 unsigned int off = sizeof(struct ipv6hdr);
422 u8 nexthdr = skb->nh.ipv6h->nexthdr;
424 while (off <= opt->lastopt) {
426 u8 *ptr = skb->nh.raw + off;
429 case IPPROTO_DSTOPTS:
431 len = (ptr[1] + 1) << 3;
432 if (np->rxopt.bits.dstopts)
433 put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr);
435 case IPPROTO_ROUTING:
437 len = (ptr[1] + 1) << 3;
438 if (np->rxopt.bits.srcrt)
439 put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr);
443 len = (ptr[1] + 2) << 2;
447 len = (ptr[1] + 1) << 3;
455 /* socket options in old style */
456 if (np->rxopt.bits.rxoinfo) {
457 struct in6_pktinfo src_info;
459 src_info.ipi6_ifindex = opt->iif;
460 ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
461 put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
463 if (np->rxopt.bits.rxohlim) {
464 int hlim = skb->nh.ipv6h->hop_limit;
465 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
467 if (np->rxopt.bits.ohopopts && opt->hop) {
468 u8 *ptr = skb->nh.raw + opt->hop;
469 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
471 if (np->rxopt.bits.odstopts && opt->dst0) {
472 u8 *ptr = skb->nh.raw + opt->dst0;
473 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
475 if (np->rxopt.bits.osrcrt && opt->srcrt) {
476 struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
477 put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
479 if (np->rxopt.bits.odstopts && opt->dst1) {
480 u8 *ptr = skb->nh.raw + opt->dst1;
481 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
486 int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
487 struct ipv6_txoptions *opt,
488 int *hlimit, int *tclass)
490 struct in6_pktinfo *src_info;
491 struct cmsghdr *cmsg;
492 struct ipv6_rt_hdr *rthdr;
493 struct ipv6_opt_hdr *hdr;
497 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
499 struct net_device *dev = NULL;
501 if (!CMSG_OK(msg, cmsg)) {
506 if (cmsg->cmsg_level != SOL_IPV6)
509 switch (cmsg->cmsg_type) {
511 case IPV6_2292PKTINFO:
512 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
517 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
519 if (src_info->ipi6_ifindex) {
520 if (fl->oif && src_info->ipi6_ifindex != fl->oif)
522 fl->oif = src_info->ipi6_ifindex;
525 addr_type = ipv6_addr_type(&src_info->ipi6_addr);
527 if (addr_type == IPV6_ADDR_ANY)
530 if (addr_type & IPV6_ADDR_LINKLOCAL) {
531 if (!src_info->ipi6_ifindex)
534 dev = dev_get_by_index(src_info->ipi6_ifindex);
539 if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
548 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
552 if (cmsg->cmsg_len < CMSG_LEN(4)) {
557 if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
558 if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
563 fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
566 case IPV6_2292HOPOPTS:
568 if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
573 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
574 len = ((hdr->hdrlen + 1) << 3);
575 if (cmsg->cmsg_len < CMSG_LEN(len)) {
579 if (!capable(CAP_NET_RAW)) {
583 opt->opt_nflen += len;
587 case IPV6_2292DSTOPTS:
588 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
593 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
594 len = ((hdr->hdrlen + 1) << 3);
595 if (cmsg->cmsg_len < CMSG_LEN(len)) {
599 if (!capable(CAP_NET_RAW)) {
607 opt->opt_flen += len;
612 case IPV6_RTHDRDSTOPTS:
613 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
618 hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
619 len = ((hdr->hdrlen + 1) << 3);
620 if (cmsg->cmsg_len < CMSG_LEN(len)) {
624 if (!capable(CAP_NET_RAW)) {
628 if (cmsg->cmsg_type == IPV6_DSTOPTS) {
629 opt->opt_flen += len;
632 opt->opt_nflen += len;
639 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
644 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
654 len = ((rthdr->hdrlen + 1) << 3);
656 if (cmsg->cmsg_len < CMSG_LEN(len)) {
661 /* segments left must also match */
662 if ((rthdr->hdrlen >> 1) != rthdr->segments_left) {
667 opt->opt_nflen += len;
670 if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) {
671 int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
673 opt->opt_nflen += dsthdrlen;
674 opt->dst0opt = opt->dst1opt;
676 opt->opt_flen -= dsthdrlen;
681 case IPV6_2292HOPLIMIT:
683 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
688 *hlimit = *(int *)CMSG_DATA(cmsg);
696 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
700 tc = *(int *)CMSG_DATA(cmsg);
701 if (tc < -1 || tc > 0xff)
710 LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",