3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
99 struct dst_entry *dst = skb_dst(skb);
100 const struct rt6_info *rt = (const struct rt6_info *)dst;
104 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
106 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
109 static void tcp_v6_hash(struct sock *sk)
111 if (sk->sk_state != TCP_CLOSE) {
112 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
117 __inet6_hash(sk, NULL);
122 static __inline__ __sum16 tcp_v6_check(int len,
123 const struct in6_addr *saddr,
124 const struct in6_addr *daddr,
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
130 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
132 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
133 ipv6_hdr(skb)->saddr.s6_addr32,
135 tcp_hdr(skb)->source);
138 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
141 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
142 struct inet_sock *inet = inet_sk(sk);
143 struct inet_connection_sock *icsk = inet_csk(sk);
144 struct ipv6_pinfo *np = inet6_sk(sk);
145 struct tcp_sock *tp = tcp_sk(sk);
146 struct in6_addr *saddr = NULL, *final_p, final;
149 struct dst_entry *dst;
153 if (addr_len < SIN6_LEN_RFC2133)
156 if (usin->sin6_family != AF_INET6)
157 return -EAFNOSUPPORT;
159 memset(&fl6, 0, sizeof(fl6));
162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
163 IP6_ECN_flow_init(fl6.flowlabel);
164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
165 struct ip6_flowlabel *flowlabel;
166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 if (flowlabel == NULL)
169 usin->sin6_addr = flowlabel->dst;
170 fl6_sock_release(flowlabel);
175 * connect() to INADDR_ANY means loopback (BSD'ism).
178 if(ipv6_addr_any(&usin->sin6_addr))
179 usin->sin6_addr.s6_addr[15] = 0x1;
181 addr_type = ipv6_addr_type(&usin->sin6_addr);
183 if(addr_type & IPV6_ADDR_MULTICAST)
186 if (addr_type&IPV6_ADDR_LINKLOCAL) {
187 if (addr_len >= sizeof(struct sockaddr_in6) &&
188 usin->sin6_scope_id) {
189 /* If interface is set while binding, indices
192 if (sk->sk_bound_dev_if &&
193 sk->sk_bound_dev_if != usin->sin6_scope_id)
196 sk->sk_bound_dev_if = usin->sin6_scope_id;
199 /* Connect to link-local address requires an interface */
200 if (!sk->sk_bound_dev_if)
204 if (tp->rx_opt.ts_recent_stamp &&
205 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
206 tp->rx_opt.ts_recent = 0;
207 tp->rx_opt.ts_recent_stamp = 0;
211 np->daddr = usin->sin6_addr;
212 np->flow_label = fl6.flowlabel;
218 if (addr_type == IPV6_ADDR_MAPPED) {
219 u32 exthdrlen = icsk->icsk_ext_hdr_len;
220 struct sockaddr_in sin;
222 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
224 if (__ipv6_only_sock(sk))
227 sin.sin_family = AF_INET;
228 sin.sin_port = usin->sin6_port;
229 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
231 icsk->icsk_af_ops = &ipv6_mapped;
232 sk->sk_backlog_rcv = tcp_v4_do_rcv;
233 #ifdef CONFIG_TCP_MD5SIG
234 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
240 icsk->icsk_ext_hdr_len = exthdrlen;
241 icsk->icsk_af_ops = &ipv6_specific;
242 sk->sk_backlog_rcv = tcp_v6_do_rcv;
243 #ifdef CONFIG_TCP_MD5SIG
244 tp->af_specific = &tcp_sock_ipv6_specific;
248 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
249 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
256 if (!ipv6_addr_any(&np->rcv_saddr))
257 saddr = &np->rcv_saddr;
259 fl6.flowi6_proto = IPPROTO_TCP;
260 fl6.daddr = np->daddr;
261 fl6.saddr = saddr ? *saddr : np->saddr;
262 fl6.flowi6_oif = sk->sk_bound_dev_if;
263 fl6.flowi6_mark = sk->sk_mark;
264 fl6.fl6_dport = usin->sin6_port;
265 fl6.fl6_sport = inet->inet_sport;
267 final_p = fl6_update_dst(&fl6, np->opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
279 np->rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 __ip6_dst_store(sk, dst, NULL, NULL);
289 rt = (struct rt6_info *) dst;
290 if (tcp_death_row.sysctl_tw_recycle &&
291 !tp->rx_opt.ts_recent_stamp &&
292 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
293 tcp_fetch_timewait_stamp(sk, dst);
295 icsk->icsk_ext_hdr_len = 0;
297 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
300 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302 inet->inet_dport = usin->sin6_port;
304 tcp_set_state(sk, TCP_SYN_SENT);
305 err = inet6_hash_connect(&tcp_death_row, sk);
310 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
315 err = tcp_connect(sk);
322 tcp_set_state(sk, TCP_CLOSE);
325 inet->inet_dport = 0;
326 sk->sk_route_caps = 0;
330 static void tcp_v6_mtu_reduced(struct sock *sk)
332 struct dst_entry *dst;
334 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
337 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
341 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
342 tcp_sync_mss(sk, dst_mtu(dst));
343 tcp_simple_retransmit(sk);
347 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
348 u8 type, u8 code, int offset, __be32 info)
350 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
351 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
352 struct ipv6_pinfo *np;
357 struct net *net = dev_net(skb->dev);
359 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
360 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
363 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
368 if (sk->sk_state == TCP_TIME_WAIT) {
369 inet_twsk_put(inet_twsk(sk));
374 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
375 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
377 if (sk->sk_state == TCP_CLOSE)
380 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
381 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
386 seq = ntohl(th->seq);
387 if (sk->sk_state != TCP_LISTEN &&
388 !between(seq, tp->snd_una, tp->snd_nxt)) {
389 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
395 if (type == NDISC_REDIRECT) {
396 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
399 dst->ops->redirect(dst, sk, skb);
402 if (type == ICMPV6_PKT_TOOBIG) {
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
412 icmpv6_err_convert(type, code, &err);
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
418 if (sock_owned_by_user(sk))
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
429 WARN_ON(req->sk != NULL);
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 inet_csk_reqsk_queue_drop(sk, req, prev);
440 case TCP_SYN_RECV: /* Cannot happen.
441 It can, it SYNs are crossed. --ANK */
442 if (!sock_owned_by_user(sk)) {
444 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
448 sk->sk_err_soft = err;
452 if (!sock_owned_by_user(sk) && np->recverr) {
454 sk->sk_error_report(sk);
456 sk->sk_err_soft = err;
464 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
466 struct request_sock *req,
467 struct request_values *rvp,
470 struct inet6_request_sock *treq = inet6_rsk(req);
471 struct ipv6_pinfo *np = inet6_sk(sk);
472 struct sk_buff * skb;
475 /* First, grab a route. */
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
479 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
484 fl6->daddr = treq->rmt_addr;
485 skb_set_queue_mapping(skb, queue_mapping);
486 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
487 err = net_xmit_eval(err);
494 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
495 struct request_values *rvp)
499 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
500 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
503 static void tcp_v6_reqsk_destructor(struct request_sock *req)
505 kfree_skb(inet6_rsk(req)->pktopts);
508 #ifdef CONFIG_TCP_MD5SIG
509 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
510 const struct in6_addr *addr)
512 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
515 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
516 struct sock *addr_sk)
518 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
521 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
522 struct request_sock *req)
524 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
527 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
530 struct tcp_md5sig cmd;
531 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
533 if (optlen < sizeof(cmd))
536 if (copy_from_user(&cmd, optval, sizeof(cmd)))
539 if (sin6->sin6_family != AF_INET6)
542 if (!cmd.tcpm_keylen) {
543 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
544 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
546 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
550 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
553 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
554 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
555 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
557 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
558 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
561 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
562 const struct in6_addr *daddr,
563 const struct in6_addr *saddr, int nbytes)
565 struct tcp6_pseudohdr *bp;
566 struct scatterlist sg;
568 bp = &hp->md5_blk.ip6;
569 /* 1. TCP pseudo-header (RFC2460) */
572 bp->protocol = cpu_to_be32(IPPROTO_TCP);
573 bp->len = cpu_to_be32(nbytes);
575 sg_init_one(&sg, bp, sizeof(*bp));
576 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
579 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
580 const struct in6_addr *daddr, struct in6_addr *saddr,
581 const struct tcphdr *th)
583 struct tcp_md5sig_pool *hp;
584 struct hash_desc *desc;
586 hp = tcp_get_md5sig_pool();
588 goto clear_hash_noput;
589 desc = &hp->md5_desc;
591 if (crypto_hash_init(desc))
593 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
595 if (tcp_md5_hash_header(hp, th))
597 if (tcp_md5_hash_key(hp, key))
599 if (crypto_hash_final(desc, md5_hash))
602 tcp_put_md5sig_pool();
606 tcp_put_md5sig_pool();
608 memset(md5_hash, 0, 16);
612 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
613 const struct sock *sk,
614 const struct request_sock *req,
615 const struct sk_buff *skb)
617 const struct in6_addr *saddr, *daddr;
618 struct tcp_md5sig_pool *hp;
619 struct hash_desc *desc;
620 const struct tcphdr *th = tcp_hdr(skb);
623 saddr = &inet6_sk(sk)->saddr;
624 daddr = &inet6_sk(sk)->daddr;
626 saddr = &inet6_rsk(req)->loc_addr;
627 daddr = &inet6_rsk(req)->rmt_addr;
629 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
630 saddr = &ip6h->saddr;
631 daddr = &ip6h->daddr;
634 hp = tcp_get_md5sig_pool();
636 goto clear_hash_noput;
637 desc = &hp->md5_desc;
639 if (crypto_hash_init(desc))
642 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
644 if (tcp_md5_hash_header(hp, th))
646 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
648 if (tcp_md5_hash_key(hp, key))
650 if (crypto_hash_final(desc, md5_hash))
653 tcp_put_md5sig_pool();
657 tcp_put_md5sig_pool();
659 memset(md5_hash, 0, 16);
663 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
665 const __u8 *hash_location = NULL;
666 struct tcp_md5sig_key *hash_expected;
667 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
668 const struct tcphdr *th = tcp_hdr(skb);
672 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
673 hash_location = tcp_parse_md5sig_option(th);
675 /* We've parsed the options - do we have a hash? */
676 if (!hash_expected && !hash_location)
679 if (hash_expected && !hash_location) {
680 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
684 if (!hash_expected && hash_location) {
685 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
689 /* check the signature */
690 genhash = tcp_v6_md5_hash_skb(newhash,
694 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
695 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
696 genhash ? "failed" : "mismatch",
697 &ip6h->saddr, ntohs(th->source),
698 &ip6h->daddr, ntohs(th->dest));
705 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
707 .obj_size = sizeof(struct tcp6_request_sock),
708 .rtx_syn_ack = tcp_v6_rtx_synack,
709 .send_ack = tcp_v6_reqsk_send_ack,
710 .destructor = tcp_v6_reqsk_destructor,
711 .send_reset = tcp_v6_send_reset,
712 .syn_ack_timeout = tcp_syn_ack_timeout,
715 #ifdef CONFIG_TCP_MD5SIG
716 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
717 .md5_lookup = tcp_v6_reqsk_md5_lookup,
718 .calc_md5_hash = tcp_v6_md5_hash_skb,
722 static void __tcp_v6_send_check(struct sk_buff *skb,
723 const struct in6_addr *saddr, const struct in6_addr *daddr)
725 struct tcphdr *th = tcp_hdr(skb);
727 if (skb->ip_summed == CHECKSUM_PARTIAL) {
728 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
729 skb->csum_start = skb_transport_header(skb) - skb->head;
730 skb->csum_offset = offsetof(struct tcphdr, check);
732 th->check = tcp_v6_check(skb->len, saddr, daddr,
733 csum_partial(th, th->doff << 2,
738 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
740 struct ipv6_pinfo *np = inet6_sk(sk);
742 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
745 static int tcp_v6_gso_send_check(struct sk_buff *skb)
747 const struct ipv6hdr *ipv6h;
750 if (!pskb_may_pull(skb, sizeof(*th)))
753 ipv6h = ipv6_hdr(skb);
757 skb->ip_summed = CHECKSUM_PARTIAL;
758 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
762 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
765 const struct ipv6hdr *iph = skb_gro_network_header(skb);
769 switch (skb->ip_summed) {
770 case CHECKSUM_COMPLETE:
771 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
773 skb->ip_summed = CHECKSUM_UNNECESSARY;
777 NAPI_GRO_CB(skb)->flush = 1;
781 wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
784 sum = csum_fold(skb_checksum(skb,
791 skb->ip_summed = CHECKSUM_UNNECESSARY;
795 return tcp_gro_receive(head, skb);
798 static int tcp6_gro_complete(struct sk_buff *skb)
800 const struct ipv6hdr *iph = ipv6_hdr(skb);
801 struct tcphdr *th = tcp_hdr(skb);
803 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
804 &iph->saddr, &iph->daddr, 0);
805 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
807 return tcp_gro_complete(skb);
810 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
811 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
813 const struct tcphdr *th = tcp_hdr(skb);
815 struct sk_buff *buff;
817 struct net *net = dev_net(skb_dst(skb)->dev);
818 struct sock *ctl_sk = net->ipv6.tcp_sk;
819 unsigned int tot_len = sizeof(struct tcphdr);
820 struct dst_entry *dst;
824 tot_len += TCPOLEN_TSTAMP_ALIGNED;
825 #ifdef CONFIG_TCP_MD5SIG
827 tot_len += TCPOLEN_MD5SIG_ALIGNED;
830 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
835 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
837 t1 = (struct tcphdr *) skb_push(buff, tot_len);
838 skb_reset_transport_header(buff);
840 /* Swap the send and the receive. */
841 memset(t1, 0, sizeof(*t1));
842 t1->dest = th->source;
843 t1->source = th->dest;
844 t1->doff = tot_len / 4;
845 t1->seq = htonl(seq);
846 t1->ack_seq = htonl(ack);
847 t1->ack = !rst || !th->ack;
849 t1->window = htons(win);
851 topt = (__be32 *)(t1 + 1);
854 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
855 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
856 *topt++ = htonl(tcp_time_stamp);
860 #ifdef CONFIG_TCP_MD5SIG
862 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
863 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
864 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
865 &ipv6_hdr(skb)->saddr,
866 &ipv6_hdr(skb)->daddr, t1);
870 memset(&fl6, 0, sizeof(fl6));
871 fl6.daddr = ipv6_hdr(skb)->saddr;
872 fl6.saddr = ipv6_hdr(skb)->daddr;
874 buff->ip_summed = CHECKSUM_PARTIAL;
877 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
879 fl6.flowi6_proto = IPPROTO_TCP;
880 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
881 fl6.flowi6_oif = inet6_iif(skb);
882 fl6.fl6_dport = t1->dest;
883 fl6.fl6_sport = t1->source;
884 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
886 /* Pass a socket to ip6_dst_lookup either it is for RST
887 * Underlying function will use this to retrieve the network
890 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
892 skb_dst_set(buff, dst);
893 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
894 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
896 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
903 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
905 const struct tcphdr *th = tcp_hdr(skb);
906 u32 seq = 0, ack_seq = 0;
907 struct tcp_md5sig_key *key = NULL;
908 #ifdef CONFIG_TCP_MD5SIG
909 const __u8 *hash_location = NULL;
910 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
911 unsigned char newhash[16];
913 struct sock *sk1 = NULL;
919 if (!ipv6_unicast_destination(skb))
922 #ifdef CONFIG_TCP_MD5SIG
923 hash_location = tcp_parse_md5sig_option(th);
924 if (!sk && hash_location) {
926 * active side is lost. Try to find listening socket through
927 * source port, and then find md5 key through listening socket.
928 * we are not loose security here:
929 * Incoming packet is checked with md5 hash with finding key,
930 * no RST generated if md5 hash doesn't match.
932 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
933 &tcp_hashinfo, &ipv6h->daddr,
934 ntohs(th->source), inet6_iif(skb));
939 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
943 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
944 if (genhash || memcmp(hash_location, newhash, 16) != 0)
947 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
952 seq = ntohl(th->ack_seq);
954 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
957 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
959 #ifdef CONFIG_TCP_MD5SIG
968 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
969 struct tcp_md5sig_key *key, u8 tclass)
971 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
974 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
976 struct inet_timewait_sock *tw = inet_twsk(sk);
977 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
979 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
980 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
981 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
987 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
988 struct request_sock *req)
990 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
991 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
995 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
997 struct request_sock *req, **prev;
998 const struct tcphdr *th = tcp_hdr(skb);
1001 /* Find possible connection requests. */
1002 req = inet6_csk_search_req(sk, &prev, th->source,
1003 &ipv6_hdr(skb)->saddr,
1004 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1006 return tcp_check_req(sk, skb, req, prev, false);
1008 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1009 &ipv6_hdr(skb)->saddr, th->source,
1010 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1013 if (nsk->sk_state != TCP_TIME_WAIT) {
1017 inet_twsk_put(inet_twsk(nsk));
1021 #ifdef CONFIG_SYN_COOKIES
1023 sk = cookie_v6_check(sk, skb);
1028 /* FIXME: this is substantially similar to the ipv4 code.
1029 * Can some kind of merge be done? -- erics
1031 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1033 struct tcp_extend_values tmp_ext;
1034 struct tcp_options_received tmp_opt;
1035 const u8 *hash_location;
1036 struct request_sock *req;
1037 struct inet6_request_sock *treq;
1038 struct ipv6_pinfo *np = inet6_sk(sk);
1039 struct tcp_sock *tp = tcp_sk(sk);
1040 __u32 isn = TCP_SKB_CB(skb)->when;
1041 struct dst_entry *dst = NULL;
1043 bool want_cookie = false;
1045 if (skb->protocol == htons(ETH_P_IP))
1046 return tcp_v4_conn_request(sk, skb);
1048 if (!ipv6_unicast_destination(skb))
1051 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1052 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1057 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1060 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1064 #ifdef CONFIG_TCP_MD5SIG
1065 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1068 tcp_clear_options(&tmp_opt);
1069 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1070 tmp_opt.user_mss = tp->rx_opt.user_mss;
1071 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1073 if (tmp_opt.cookie_plus > 0 &&
1074 tmp_opt.saw_tstamp &&
1075 !tp->rx_opt.cookie_out_never &&
1076 (sysctl_tcp_cookie_size > 0 ||
1077 (tp->cookie_values != NULL &&
1078 tp->cookie_values->cookie_desired > 0))) {
1081 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1082 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1084 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1087 /* Secret recipe starts with IP addresses */
1088 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1093 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1099 /* plus variable length Initiator Cookie */
1102 *c++ ^= *hash_location++;
1104 want_cookie = false; /* not our kind of cookie */
1105 tmp_ext.cookie_out_never = 0; /* false */
1106 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1107 } else if (!tp->rx_opt.cookie_in_always) {
1108 /* redundant indications, but ensure initialization. */
1109 tmp_ext.cookie_out_never = 1; /* true */
1110 tmp_ext.cookie_plus = 0;
1114 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1116 if (want_cookie && !tmp_opt.saw_tstamp)
1117 tcp_clear_options(&tmp_opt);
1119 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1120 tcp_openreq_init(req, &tmp_opt, skb);
1122 treq = inet6_rsk(req);
1123 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1124 treq->loc_addr = ipv6_hdr(skb)->daddr;
1125 if (!want_cookie || tmp_opt.tstamp_ok)
1126 TCP_ECN_create_request(req, skb);
1128 treq->iif = sk->sk_bound_dev_if;
1130 /* So that link locals have meaning */
1131 if (!sk->sk_bound_dev_if &&
1132 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1133 treq->iif = inet6_iif(skb);
1136 if (ipv6_opt_accepted(sk, skb) ||
1137 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1138 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1139 atomic_inc(&skb->users);
1140 treq->pktopts = skb;
1144 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1145 req->cookie_ts = tmp_opt.tstamp_ok;
1149 /* VJ's idea. We save last timestamp seen
1150 * from the destination in peer table, when entering
1151 * state TIME-WAIT, and check against it before
1152 * accepting new connection request.
1154 * If "isn" is not zero, this request hit alive
1155 * timewait bucket, so that all the necessary checks
1156 * are made in the function processing timewait state.
1158 if (tmp_opt.saw_tstamp &&
1159 tcp_death_row.sysctl_tw_recycle &&
1160 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1161 if (!tcp_peer_is_proven(req, dst, true)) {
1162 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1163 goto drop_and_release;
1166 /* Kill the following clause, if you dislike this way. */
1167 else if (!sysctl_tcp_syncookies &&
1168 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1169 (sysctl_max_syn_backlog >> 2)) &&
1170 !tcp_peer_is_proven(req, dst, false)) {
1171 /* Without syncookies last quarter of
1172 * backlog is filled with destinations,
1173 * proven to be alive.
1174 * It means that we continue to communicate
1175 * to destinations, already remembered
1176 * to the moment of synflood.
1178 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1179 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1180 goto drop_and_release;
1183 isn = tcp_v6_init_sequence(skb);
1186 tcp_rsk(req)->snt_isn = isn;
1188 if (security_inet_conn_request(sk, skb, req))
1189 goto drop_and_release;
1191 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1192 (struct request_values *)&tmp_ext,
1193 skb_get_queue_mapping(skb)) ||
1197 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1198 tcp_rsk(req)->listener = NULL;
1199 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1207 return 0; /* don't send reset */
1210 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1211 struct request_sock *req,
1212 struct dst_entry *dst)
1214 struct inet6_request_sock *treq;
1215 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1216 struct tcp6_sock *newtcp6sk;
1217 struct inet_sock *newinet;
1218 struct tcp_sock *newtp;
1220 #ifdef CONFIG_TCP_MD5SIG
1221 struct tcp_md5sig_key *key;
1225 if (skb->protocol == htons(ETH_P_IP)) {
1230 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1235 newtcp6sk = (struct tcp6_sock *)newsk;
1236 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1238 newinet = inet_sk(newsk);
1239 newnp = inet6_sk(newsk);
1240 newtp = tcp_sk(newsk);
1242 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1244 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1246 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1248 newnp->rcv_saddr = newnp->saddr;
1250 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1251 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1252 #ifdef CONFIG_TCP_MD5SIG
1253 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1256 newnp->ipv6_ac_list = NULL;
1257 newnp->ipv6_fl_list = NULL;
1258 newnp->pktoptions = NULL;
1260 newnp->mcast_oif = inet6_iif(skb);
1261 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1262 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1265 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1266 * here, tcp_create_openreq_child now does this for us, see the comment in
1267 * that function for the gory details. -acme
1270 /* It is tricky place. Until this moment IPv4 tcp
1271 worked with IPv6 icsk.icsk_af_ops.
1274 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1279 treq = inet6_rsk(req);
1281 if (sk_acceptq_is_full(sk))
1285 dst = inet6_csk_route_req(sk, &fl6, req);
1290 newsk = tcp_create_openreq_child(sk, req, skb);
1295 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1296 * count here, tcp_create_openreq_child now does this for us, see the
1297 * comment in that function for the gory details. -acme
1300 newsk->sk_gso_type = SKB_GSO_TCPV6;
1301 __ip6_dst_store(newsk, dst, NULL, NULL);
1302 inet6_sk_rx_dst_set(newsk, skb);
1304 newtcp6sk = (struct tcp6_sock *)newsk;
1305 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1307 newtp = tcp_sk(newsk);
1308 newinet = inet_sk(newsk);
1309 newnp = inet6_sk(newsk);
1311 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1313 newnp->daddr = treq->rmt_addr;
1314 newnp->saddr = treq->loc_addr;
1315 newnp->rcv_saddr = treq->loc_addr;
1316 newsk->sk_bound_dev_if = treq->iif;
1318 /* Now IPv6 options...
1320 First: no IPv4 options.
1322 newinet->inet_opt = NULL;
1323 newnp->ipv6_ac_list = NULL;
1324 newnp->ipv6_fl_list = NULL;
1327 newnp->rxopt.all = np->rxopt.all;
1329 /* Clone pktoptions received with SYN */
1330 newnp->pktoptions = NULL;
1331 if (treq->pktopts != NULL) {
1332 newnp->pktoptions = skb_clone(treq->pktopts,
1333 sk_gfp_atomic(sk, GFP_ATOMIC));
1334 consume_skb(treq->pktopts);
1335 treq->pktopts = NULL;
1336 if (newnp->pktoptions)
1337 skb_set_owner_r(newnp->pktoptions, newsk);
1340 newnp->mcast_oif = inet6_iif(skb);
1341 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1342 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1344 /* Clone native IPv6 options from listening socket (if any)
1346 Yes, keeping reference count would be much more clever,
1347 but we make one more one thing there: reattach optmem
1351 newnp->opt = ipv6_dup_options(newsk, np->opt);
1353 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1355 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1356 newnp->opt->opt_flen);
1358 tcp_mtup_init(newsk);
1359 tcp_sync_mss(newsk, dst_mtu(dst));
1360 newtp->advmss = dst_metric_advmss(dst);
1361 if (tcp_sk(sk)->rx_opt.user_mss &&
1362 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1363 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1365 tcp_initialize_rcv_mss(newsk);
1366 tcp_synack_rtt_meas(newsk, req);
1367 newtp->total_retrans = req->retrans;
1369 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1370 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1372 #ifdef CONFIG_TCP_MD5SIG
1373 /* Copy over the MD5 key from the original socket */
1374 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1375 /* We're using one, so create a matching key
1376 * on the newsk structure. If we fail to get
1377 * memory, then we end up not copying the key
1380 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1381 AF_INET6, key->key, key->keylen,
1382 sk_gfp_atomic(sk, GFP_ATOMIC));
1386 if (__inet_inherit_port(sk, newsk) < 0) {
1390 __inet6_hash(newsk, NULL);
1395 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1399 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1403 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1405 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1406 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1407 &ipv6_hdr(skb)->daddr, skb->csum)) {
1408 skb->ip_summed = CHECKSUM_UNNECESSARY;
1413 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1414 &ipv6_hdr(skb)->saddr,
1415 &ipv6_hdr(skb)->daddr, 0));
1417 if (skb->len <= 76) {
1418 return __skb_checksum_complete(skb);
1423 /* The socket must have it's spinlock held when we get
1426 * We have a potential double-lock case here, so even when
1427 * doing backlog processing we use the BH locking scheme.
1428 * This is because we cannot sleep with the original spinlock
1431 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1433 struct ipv6_pinfo *np = inet6_sk(sk);
1434 struct tcp_sock *tp;
1435 struct sk_buff *opt_skb = NULL;
1437 /* Imagine: socket is IPv6. IPv4 packet arrives,
1438 goes to IPv4 receive handler and backlogged.
1439 From backlog it always goes here. Kerboom...
1440 Fortunately, tcp_rcv_established and rcv_established
1441 handle them correctly, but it is not case with
1442 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1445 if (skb->protocol == htons(ETH_P_IP))
1446 return tcp_v4_do_rcv(sk, skb);
1448 #ifdef CONFIG_TCP_MD5SIG
1449 if (tcp_v6_inbound_md5_hash (sk, skb))
1453 if (sk_filter(sk, skb))
1457 * socket locking is here for SMP purposes as backlog rcv
1458 * is currently called with bh processing disabled.
1461 /* Do Stevens' IPV6_PKTOPTIONS.
1463 Yes, guys, it is the only place in our code, where we
1464 may make it not affecting IPv4.
1465 The rest of code is protocol independent,
1466 and I do not like idea to uglify IPv4.
1468 Actually, all the idea behind IPV6_PKTOPTIONS
1469 looks not very well thought. For now we latch
1470 options, received in the last packet, enqueued
1471 by tcp. Feel free to propose better solution.
1475 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1477 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1478 struct dst_entry *dst = sk->sk_rx_dst;
1480 sock_rps_save_rxhash(sk, skb);
1482 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1483 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1485 sk->sk_rx_dst = NULL;
1489 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1492 goto ipv6_pktoptions;
1496 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1499 if (sk->sk_state == TCP_LISTEN) {
1500 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1505 * Queue it on the new socket if the new socket is active,
1506 * otherwise we just shortcircuit this and continue with
1510 sock_rps_save_rxhash(nsk, skb);
1511 if (tcp_child_process(sk, nsk, skb))
1514 __kfree_skb(opt_skb);
1518 sock_rps_save_rxhash(sk, skb);
1520 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1523 goto ipv6_pktoptions;
1527 tcp_v6_send_reset(sk, skb);
1530 __kfree_skb(opt_skb);
1534 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1539 /* Do you ask, what is it?
1541 1. skb was enqueued by tcp.
1542 2. skb is added to tail of read queue, rather than out of order.
1543 3. socket is not in passive state.
1544 4. Finally, it really contains options, which user wants to receive.
1547 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1548 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1549 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1550 np->mcast_oif = inet6_iif(opt_skb);
1551 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1552 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1553 if (np->rxopt.bits.rxtclass)
1554 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1555 if (ipv6_opt_accepted(sk, opt_skb)) {
1556 skb_set_owner_r(opt_skb, sk);
1557 opt_skb = xchg(&np->pktoptions, opt_skb);
1559 __kfree_skb(opt_skb);
1560 opt_skb = xchg(&np->pktoptions, NULL);
1568 static int tcp_v6_rcv(struct sk_buff *skb)
1570 const struct tcphdr *th;
1571 const struct ipv6hdr *hdr;
1574 struct net *net = dev_net(skb->dev);
1576 if (skb->pkt_type != PACKET_HOST)
1580 * Count it even if it's bad.
1582 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1584 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1589 if (th->doff < sizeof(struct tcphdr)/4)
1591 if (!pskb_may_pull(skb, th->doff*4))
1594 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1598 hdr = ipv6_hdr(skb);
1599 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1600 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1601 skb->len - th->doff*4);
1602 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1603 TCP_SKB_CB(skb)->when = 0;
1604 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1605 TCP_SKB_CB(skb)->sacked = 0;
1607 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1612 if (sk->sk_state == TCP_TIME_WAIT)
1615 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1616 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1617 goto discard_and_relse;
1620 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1621 goto discard_and_relse;
1623 if (sk_filter(sk, skb))
1624 goto discard_and_relse;
1628 bh_lock_sock_nested(sk);
1630 if (!sock_owned_by_user(sk)) {
1631 #ifdef CONFIG_NET_DMA
1632 struct tcp_sock *tp = tcp_sk(sk);
1633 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1634 tp->ucopy.dma_chan = net_dma_find_channel();
1635 if (tp->ucopy.dma_chan)
1636 ret = tcp_v6_do_rcv(sk, skb);
1640 if (!tcp_prequeue(sk, skb))
1641 ret = tcp_v6_do_rcv(sk, skb);
1643 } else if (unlikely(sk_add_backlog(sk, skb,
1644 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1646 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1647 goto discard_and_relse;
1652 return ret ? -1 : 0;
1655 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1658 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1660 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1662 tcp_v6_send_reset(NULL, skb);
1679 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1680 inet_twsk_put(inet_twsk(sk));
1684 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1685 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1686 inet_twsk_put(inet_twsk(sk));
1690 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1695 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1696 &ipv6_hdr(skb)->daddr,
1697 ntohs(th->dest), inet6_iif(skb));
1699 struct inet_timewait_sock *tw = inet_twsk(sk);
1700 inet_twsk_deschedule(tw, &tcp_death_row);
1705 /* Fall through to ACK */
1708 tcp_v6_timewait_ack(sk, skb);
1712 case TCP_TW_SUCCESS:;
1717 static void tcp_v6_early_demux(struct sk_buff *skb)
1719 const struct ipv6hdr *hdr;
1720 const struct tcphdr *th;
1723 if (skb->pkt_type != PACKET_HOST)
1726 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1729 hdr = ipv6_hdr(skb);
1732 if (th->doff < sizeof(struct tcphdr) / 4)
1735 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1736 &hdr->saddr, th->source,
1737 &hdr->daddr, ntohs(th->dest),
1741 skb->destructor = sock_edemux;
1742 if (sk->sk_state != TCP_TIME_WAIT) {
1743 struct dst_entry *dst = sk->sk_rx_dst;
1744 struct inet_sock *icsk = inet_sk(sk);
1746 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1748 icsk->rx_dst_ifindex == skb->skb_iif)
1749 skb_dst_set_noref(skb, dst);
1754 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1755 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1756 .twsk_unique = tcp_twsk_unique,
1757 .twsk_destructor= tcp_twsk_destructor,
1760 static const struct inet_connection_sock_af_ops ipv6_specific = {
1761 .queue_xmit = inet6_csk_xmit,
1762 .send_check = tcp_v6_send_check,
1763 .rebuild_header = inet6_sk_rebuild_header,
1764 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1765 .conn_request = tcp_v6_conn_request,
1766 .syn_recv_sock = tcp_v6_syn_recv_sock,
1767 .net_header_len = sizeof(struct ipv6hdr),
1768 .net_frag_header_len = sizeof(struct frag_hdr),
1769 .setsockopt = ipv6_setsockopt,
1770 .getsockopt = ipv6_getsockopt,
1771 .addr2sockaddr = inet6_csk_addr2sockaddr,
1772 .sockaddr_len = sizeof(struct sockaddr_in6),
1773 .bind_conflict = inet6_csk_bind_conflict,
1774 #ifdef CONFIG_COMPAT
1775 .compat_setsockopt = compat_ipv6_setsockopt,
1776 .compat_getsockopt = compat_ipv6_getsockopt,
1780 #ifdef CONFIG_TCP_MD5SIG
1781 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1782 .md5_lookup = tcp_v6_md5_lookup,
1783 .calc_md5_hash = tcp_v6_md5_hash_skb,
1784 .md5_parse = tcp_v6_parse_md5_keys,
1789 * TCP over IPv4 via INET6 API
1792 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1793 .queue_xmit = ip_queue_xmit,
1794 .send_check = tcp_v4_send_check,
1795 .rebuild_header = inet_sk_rebuild_header,
1796 .sk_rx_dst_set = inet_sk_rx_dst_set,
1797 .conn_request = tcp_v6_conn_request,
1798 .syn_recv_sock = tcp_v6_syn_recv_sock,
1799 .net_header_len = sizeof(struct iphdr),
1800 .setsockopt = ipv6_setsockopt,
1801 .getsockopt = ipv6_getsockopt,
1802 .addr2sockaddr = inet6_csk_addr2sockaddr,
1803 .sockaddr_len = sizeof(struct sockaddr_in6),
1804 .bind_conflict = inet6_csk_bind_conflict,
1805 #ifdef CONFIG_COMPAT
1806 .compat_setsockopt = compat_ipv6_setsockopt,
1807 .compat_getsockopt = compat_ipv6_getsockopt,
1811 #ifdef CONFIG_TCP_MD5SIG
1812 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1813 .md5_lookup = tcp_v4_md5_lookup,
1814 .calc_md5_hash = tcp_v4_md5_hash_skb,
1815 .md5_parse = tcp_v6_parse_md5_keys,
1819 /* NOTE: A lot of things set to zero explicitly by call to
1820 * sk_alloc() so need not be done here.
1822 static int tcp_v6_init_sock(struct sock *sk)
1824 struct inet_connection_sock *icsk = inet_csk(sk);
1828 icsk->icsk_af_ops = &ipv6_specific;
1830 #ifdef CONFIG_TCP_MD5SIG
1831 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1837 static void tcp_v6_destroy_sock(struct sock *sk)
1839 tcp_v4_destroy_sock(sk);
1840 inet6_destroy_sock(sk);
1843 #ifdef CONFIG_PROC_FS
1844 /* Proc filesystem TCPv6 sock list dumping. */
1845 static void get_openreq6(struct seq_file *seq,
1846 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1848 int ttd = req->expires - jiffies;
1849 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1850 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1856 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1857 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1859 src->s6_addr32[0], src->s6_addr32[1],
1860 src->s6_addr32[2], src->s6_addr32[3],
1861 ntohs(inet_rsk(req)->loc_port),
1862 dest->s6_addr32[0], dest->s6_addr32[1],
1863 dest->s6_addr32[2], dest->s6_addr32[3],
1864 ntohs(inet_rsk(req)->rmt_port),
1866 0,0, /* could print option size, but that is af dependent. */
1867 1, /* timers active (only the expire timer) */
1868 jiffies_to_clock_t(ttd),
1870 from_kuid_munged(seq_user_ns(seq), uid),
1871 0, /* non standard timer */
1872 0, /* open_requests have no inode */
1876 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1878 const struct in6_addr *dest, *src;
1881 unsigned long timer_expires;
1882 const struct inet_sock *inet = inet_sk(sp);
1883 const struct tcp_sock *tp = tcp_sk(sp);
1884 const struct inet_connection_sock *icsk = inet_csk(sp);
1885 const struct ipv6_pinfo *np = inet6_sk(sp);
1888 src = &np->rcv_saddr;
1889 destp = ntohs(inet->inet_dport);
1890 srcp = ntohs(inet->inet_sport);
1892 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1894 timer_expires = icsk->icsk_timeout;
1895 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1897 timer_expires = icsk->icsk_timeout;
1898 } else if (timer_pending(&sp->sk_timer)) {
1900 timer_expires = sp->sk_timer.expires;
1903 timer_expires = jiffies;
1907 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1908 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1910 src->s6_addr32[0], src->s6_addr32[1],
1911 src->s6_addr32[2], src->s6_addr32[3], srcp,
1912 dest->s6_addr32[0], dest->s6_addr32[1],
1913 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1915 tp->write_seq-tp->snd_una,
1916 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1918 jiffies_delta_to_clock_t(timer_expires - jiffies),
1919 icsk->icsk_retransmits,
1920 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1921 icsk->icsk_probes_out,
1923 atomic_read(&sp->sk_refcnt), sp,
1924 jiffies_to_clock_t(icsk->icsk_rto),
1925 jiffies_to_clock_t(icsk->icsk_ack.ato),
1926 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1928 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1932 static void get_timewait6_sock(struct seq_file *seq,
1933 struct inet_timewait_sock *tw, int i)
1935 const struct in6_addr *dest, *src;
1937 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1938 long delta = tw->tw_ttd - jiffies;
1940 dest = &tw6->tw_v6_daddr;
1941 src = &tw6->tw_v6_rcv_saddr;
1942 destp = ntohs(tw->tw_dport);
1943 srcp = ntohs(tw->tw_sport);
1946 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1947 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1949 src->s6_addr32[0], src->s6_addr32[1],
1950 src->s6_addr32[2], src->s6_addr32[3], srcp,
1951 dest->s6_addr32[0], dest->s6_addr32[1],
1952 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1953 tw->tw_substate, 0, 0,
1954 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1955 atomic_read(&tw->tw_refcnt), tw);
1958 static int tcp6_seq_show(struct seq_file *seq, void *v)
1960 struct tcp_iter_state *st;
1962 if (v == SEQ_START_TOKEN) {
1967 "st tx_queue rx_queue tr tm->when retrnsmt"
1968 " uid timeout inode\n");
1973 switch (st->state) {
1974 case TCP_SEQ_STATE_LISTENING:
1975 case TCP_SEQ_STATE_ESTABLISHED:
1976 get_tcp6_sock(seq, v, st->num);
1978 case TCP_SEQ_STATE_OPENREQ:
1979 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1981 case TCP_SEQ_STATE_TIME_WAIT:
1982 get_timewait6_sock(seq, v, st->num);
1989 static const struct file_operations tcp6_afinfo_seq_fops = {
1990 .owner = THIS_MODULE,
1991 .open = tcp_seq_open,
1993 .llseek = seq_lseek,
1994 .release = seq_release_net
1997 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2000 .seq_fops = &tcp6_afinfo_seq_fops,
2002 .show = tcp6_seq_show,
2006 int __net_init tcp6_proc_init(struct net *net)
2008 return tcp_proc_register(net, &tcp6_seq_afinfo);
2011 void tcp6_proc_exit(struct net *net)
2013 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2017 struct proto tcpv6_prot = {
2019 .owner = THIS_MODULE,
2021 .connect = tcp_v6_connect,
2022 .disconnect = tcp_disconnect,
2023 .accept = inet_csk_accept,
2025 .init = tcp_v6_init_sock,
2026 .destroy = tcp_v6_destroy_sock,
2027 .shutdown = tcp_shutdown,
2028 .setsockopt = tcp_setsockopt,
2029 .getsockopt = tcp_getsockopt,
2030 .recvmsg = tcp_recvmsg,
2031 .sendmsg = tcp_sendmsg,
2032 .sendpage = tcp_sendpage,
2033 .backlog_rcv = tcp_v6_do_rcv,
2034 .release_cb = tcp_release_cb,
2035 .mtu_reduced = tcp_v6_mtu_reduced,
2036 .hash = tcp_v6_hash,
2037 .unhash = inet_unhash,
2038 .get_port = inet_csk_get_port,
2039 .enter_memory_pressure = tcp_enter_memory_pressure,
2040 .sockets_allocated = &tcp_sockets_allocated,
2041 .memory_allocated = &tcp_memory_allocated,
2042 .memory_pressure = &tcp_memory_pressure,
2043 .orphan_count = &tcp_orphan_count,
2044 .sysctl_wmem = sysctl_tcp_wmem,
2045 .sysctl_rmem = sysctl_tcp_rmem,
2046 .max_header = MAX_TCP_HEADER,
2047 .obj_size = sizeof(struct tcp6_sock),
2048 .slab_flags = SLAB_DESTROY_BY_RCU,
2049 .twsk_prot = &tcp6_timewait_sock_ops,
2050 .rsk_prot = &tcp6_request_sock_ops,
2051 .h.hashinfo = &tcp_hashinfo,
2052 .no_autobind = true,
2053 #ifdef CONFIG_COMPAT
2054 .compat_setsockopt = compat_tcp_setsockopt,
2055 .compat_getsockopt = compat_tcp_getsockopt,
2057 #ifdef CONFIG_MEMCG_KMEM
2058 .proto_cgroup = tcp_proto_cgroup,
2062 static const struct inet6_protocol tcpv6_protocol = {
2063 .early_demux = tcp_v6_early_demux,
2064 .handler = tcp_v6_rcv,
2065 .err_handler = tcp_v6_err,
2066 .gso_send_check = tcp_v6_gso_send_check,
2067 .gso_segment = tcp_tso_segment,
2068 .gro_receive = tcp6_gro_receive,
2069 .gro_complete = tcp6_gro_complete,
2070 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2073 static struct inet_protosw tcpv6_protosw = {
2074 .type = SOCK_STREAM,
2075 .protocol = IPPROTO_TCP,
2076 .prot = &tcpv6_prot,
2077 .ops = &inet6_stream_ops,
2079 .flags = INET_PROTOSW_PERMANENT |
2083 static int __net_init tcpv6_net_init(struct net *net)
2085 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2086 SOCK_RAW, IPPROTO_TCP, net);
2089 static void __net_exit tcpv6_net_exit(struct net *net)
2091 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2094 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2096 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2099 static struct pernet_operations tcpv6_net_ops = {
2100 .init = tcpv6_net_init,
2101 .exit = tcpv6_net_exit,
2102 .exit_batch = tcpv6_net_exit_batch,
2105 int __init tcpv6_init(void)
2109 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2113 /* register inet6 protocol */
2114 ret = inet6_register_protosw(&tcpv6_protosw);
2116 goto out_tcpv6_protocol;
2118 ret = register_pernet_subsys(&tcpv6_net_ops);
2120 goto out_tcpv6_protosw;
2125 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2127 inet6_unregister_protosw(&tcpv6_protosw);
2131 void tcpv6_exit(void)
2133 unregister_pernet_subsys(&tcpv6_net_ops);
2134 inet6_unregister_protosw(&tcpv6_protosw);
2135 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);