1 #include <linux/module.h>
2 #include <linux/errno.h>
3 #include <linux/socket.h>
4 #include <linux/skbuff.h>
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <net/genetlink.h>
12 #include <net/protocol.h>
14 #include <net/udp_tunnel.h>
16 #include <uapi/linux/fou.h>
17 #include <uapi/linux/genetlink.h>
19 static DEFINE_SPINLOCK(fou_lock);
20 static LIST_HEAD(fou_list);
26 struct udp_offload udp_offloads;
27 struct list_head list;
33 struct udp_port_cfg udp_config;
36 static inline struct fou *fou_from_sock(struct sock *sk)
38 return sk->sk_user_data;
41 static void fou_recv_pull(struct sk_buff *skb, size_t len)
43 struct iphdr *iph = ip_hdr(skb);
45 /* Remove 'len' bytes from the packet (UDP header and
46 * FOU header if present).
48 iph->tot_len = htons(ntohs(iph->tot_len) - len);
50 skb_postpull_rcsum(skb, udp_hdr(skb), len);
51 skb_reset_transport_header(skb);
54 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
56 struct fou *fou = fou_from_sock(sk);
61 fou_recv_pull(skb, sizeof(struct udphdr));
63 return -fou->protocol;
66 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
67 void *data, size_t hdrlen, u8 ipproto)
70 size_t start = ntohs(pd[0]);
71 size_t offset = ntohs(pd[1]);
72 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
75 if (skb->remcsum_offload) {
76 /* Already processed in GRO path */
77 skb->remcsum_offload = 0;
81 if (!pskb_may_pull(skb, plen))
83 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
85 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
86 __skb_checksum_complete(skb);
88 delta = remcsum_adjust((void *)guehdr + hdrlen,
89 skb->csum, start, offset);
91 /* Adjust skb->csum since we changed the packet */
92 skb->csum = csum_add(skb->csum, delta);
97 static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
104 static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
106 struct fou *fou = fou_from_sock(sk);
107 size_t len, optlen, hdrlen;
108 struct guehdr *guehdr;
115 len = sizeof(struct udphdr) + sizeof(struct guehdr);
116 if (!pskb_may_pull(skb, len))
119 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
121 optlen = guehdr->hlen << 2;
124 if (!pskb_may_pull(skb, len))
127 /* guehdr may change after pull */
128 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
130 hdrlen = sizeof(struct guehdr) + optlen;
132 if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
135 hdrlen = sizeof(struct guehdr) + optlen;
137 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
139 /* Pull csum through the guehdr now . This can be used if
140 * there is a remote checksum offload.
142 skb_postpull_rcsum(skb, udp_hdr(skb), len);
146 if (guehdr->flags & GUE_FLAG_PRIV) {
147 __be32 flags = *(__be32 *)(data + doffset);
149 doffset += GUE_LEN_PRIV;
151 if (flags & GUE_PFLAG_REMCSUM) {
152 guehdr = gue_remcsum(skb, guehdr, data + doffset,
153 hdrlen, guehdr->proto_ctype);
159 doffset += GUE_PLEN_REMCSUM;
163 if (unlikely(guehdr->control))
164 return gue_control_message(skb, guehdr);
166 __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
167 skb_reset_transport_header(skb);
169 return -guehdr->proto_ctype;
176 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
178 struct udp_offload *uoff)
180 const struct net_offload *ops;
181 struct sk_buff **pp = NULL;
182 u8 proto = NAPI_GRO_CB(skb)->proto;
183 const struct net_offload **offloads;
186 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
187 ops = rcu_dereference(offloads[proto]);
188 if (!ops || !ops->callbacks.gro_receive)
191 pp = ops->callbacks.gro_receive(head, skb);
199 static int fou_gro_complete(struct sk_buff *skb, int nhoff,
200 struct udp_offload *uoff)
202 const struct net_offload *ops;
203 u8 proto = NAPI_GRO_CB(skb)->proto;
205 const struct net_offload **offloads;
207 udp_tunnel_gro_complete(skb, nhoff);
210 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
211 ops = rcu_dereference(offloads[proto]);
212 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
215 err = ops->callbacks.gro_complete(skb, nhoff);
223 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
224 struct guehdr *guehdr, void *data,
225 size_t hdrlen, u8 ipproto)
228 size_t start = ntohs(pd[0]);
229 size_t offset = ntohs(pd[1]);
230 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
233 if (skb->remcsum_offload)
236 if (!NAPI_GRO_CB(skb)->csum_valid)
239 /* Pull checksum that will be written */
240 if (skb_gro_header_hard(skb, off + plen)) {
241 guehdr = skb_gro_header_slow(skb, off + plen, off);
246 delta = remcsum_adjust((void *)guehdr + hdrlen,
247 NAPI_GRO_CB(skb)->csum, start, offset);
249 /* Adjust skb->csum since we changed the packet */
250 skb->csum = csum_add(skb->csum, delta);
251 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
253 skb->remcsum_offload = 1;
258 static struct sk_buff **gue_gro_receive(struct sk_buff **head,
260 struct udp_offload *uoff)
262 const struct net_offload **offloads;
263 const struct net_offload *ops;
264 struct sk_buff **pp = NULL;
266 struct guehdr *guehdr;
267 size_t len, optlen, hdrlen, off;
272 off = skb_gro_offset(skb);
273 len = off + sizeof(*guehdr);
275 guehdr = skb_gro_header_fast(skb, off);
276 if (skb_gro_header_hard(skb, len)) {
277 guehdr = skb_gro_header_slow(skb, len, off);
278 if (unlikely(!guehdr))
282 optlen = guehdr->hlen << 2;
285 if (skb_gro_header_hard(skb, len)) {
286 guehdr = skb_gro_header_slow(skb, len, off);
287 if (unlikely(!guehdr))
291 if (unlikely(guehdr->control) || guehdr->version != 0 ||
292 validate_gue_flags(guehdr, optlen))
295 hdrlen = sizeof(*guehdr) + optlen;
297 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
298 * this is needed if there is a remote checkcsum offload.
300 skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
304 if (guehdr->flags & GUE_FLAG_PRIV) {
305 __be32 flags = *(__be32 *)(data + doffset);
307 doffset += GUE_LEN_PRIV;
309 if (flags & GUE_PFLAG_REMCSUM) {
310 guehdr = gue_gro_remcsum(skb, off, guehdr,
311 data + doffset, hdrlen,
312 guehdr->proto_ctype);
318 doffset += GUE_PLEN_REMCSUM;
322 skb_gro_pull(skb, hdrlen);
326 for (p = *head; p; p = p->next) {
327 const struct guehdr *guehdr2;
329 if (!NAPI_GRO_CB(p)->same_flow)
332 guehdr2 = (struct guehdr *)(p->data + off);
334 /* Compare base GUE header to be equal (covers
335 * hlen, version, proto_ctype, and flags.
337 if (guehdr->word != guehdr2->word) {
338 NAPI_GRO_CB(p)->same_flow = 0;
342 /* Compare optional fields are the same. */
343 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
344 guehdr->hlen << 2)) {
345 NAPI_GRO_CB(p)->same_flow = 0;
351 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
352 ops = rcu_dereference(offloads[guehdr->proto_ctype]);
353 if (WARN_ON(!ops || !ops->callbacks.gro_receive))
356 pp = ops->callbacks.gro_receive(head, skb);
361 NAPI_GRO_CB(skb)->flush |= flush;
366 static int gue_gro_complete(struct sk_buff *skb, int nhoff,
367 struct udp_offload *uoff)
369 const struct net_offload **offloads;
370 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
371 const struct net_offload *ops;
372 unsigned int guehlen;
376 proto = guehdr->proto_ctype;
378 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
381 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
382 ops = rcu_dereference(offloads[proto]);
383 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
386 err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
393 static int fou_add_to_port_list(struct fou *fou)
397 spin_lock(&fou_lock);
398 list_for_each_entry(fout, &fou_list, list) {
399 if (fou->port == fout->port) {
400 spin_unlock(&fou_lock);
405 list_add(&fou->list, &fou_list);
406 spin_unlock(&fou_lock);
411 static void fou_release(struct fou *fou)
413 struct socket *sock = fou->sock;
414 struct sock *sk = sock->sk;
416 udp_del_offload(&fou->udp_offloads);
418 list_del(&fou->list);
420 /* Remove hooks into tunnel socket */
421 sk->sk_user_data = NULL;
428 static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
430 udp_sk(sk)->encap_rcv = fou_udp_recv;
431 fou->protocol = cfg->protocol;
432 fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
433 fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
434 fou->udp_offloads.port = cfg->udp_config.local_udp_port;
435 fou->udp_offloads.ipproto = cfg->protocol;
440 static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
442 udp_sk(sk)->encap_rcv = gue_udp_recv;
443 fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
444 fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
445 fou->udp_offloads.port = cfg->udp_config.local_udp_port;
450 static int fou_create(struct net *net, struct fou_cfg *cfg,
451 struct socket **sockp)
453 struct fou *fou = NULL;
455 struct socket *sock = NULL;
458 /* Open UDP socket */
459 err = udp_sock_create(net, &cfg->udp_config, &sock);
463 /* Allocate FOU port structure */
464 fou = kzalloc(sizeof(*fou), GFP_KERNEL);
472 fou->port = cfg->udp_config.local_udp_port;
474 /* Initial for fou type */
476 case FOU_ENCAP_DIRECT:
477 err = fou_encap_init(sk, fou, cfg);
482 err = gue_encap_init(sk, fou, cfg);
491 udp_sk(sk)->encap_type = 1;
494 sk->sk_user_data = fou;
497 inet_inc_convert_csum(sk);
499 sk->sk_allocation = GFP_ATOMIC;
501 if (cfg->udp_config.family == AF_INET) {
502 err = udp_add_offload(&fou->udp_offloads);
507 err = fou_add_to_port_list(fou);
524 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
527 u16 port = cfg->udp_config.local_udp_port;
530 spin_lock(&fou_lock);
531 list_for_each_entry(fou, &fou_list, list) {
532 if (fou->port == port) {
533 udp_del_offload(&fou->udp_offloads);
539 spin_unlock(&fou_lock);
544 static struct genl_family fou_nl_family = {
545 .id = GENL_ID_GENERATE,
547 .name = FOU_GENL_NAME,
548 .version = FOU_GENL_VERSION,
549 .maxattr = FOU_ATTR_MAX,
553 static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
554 [FOU_ATTR_PORT] = { .type = NLA_U16, },
555 [FOU_ATTR_AF] = { .type = NLA_U8, },
556 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
557 [FOU_ATTR_TYPE] = { .type = NLA_U8, },
560 static int parse_nl_config(struct genl_info *info,
563 memset(cfg, 0, sizeof(*cfg));
565 cfg->udp_config.family = AF_INET;
567 if (info->attrs[FOU_ATTR_AF]) {
568 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
570 if (family != AF_INET && family != AF_INET6)
573 cfg->udp_config.family = family;
576 if (info->attrs[FOU_ATTR_PORT]) {
577 u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
579 cfg->udp_config.local_udp_port = port;
582 if (info->attrs[FOU_ATTR_IPPROTO])
583 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
585 if (info->attrs[FOU_ATTR_TYPE])
586 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
591 static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
596 err = parse_nl_config(info, &cfg);
600 return fou_create(&init_net, &cfg, NULL);
603 static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
607 parse_nl_config(info, &cfg);
609 return fou_destroy(&init_net, &cfg);
612 static const struct genl_ops fou_nl_ops[] = {
615 .doit = fou_nl_cmd_add_port,
616 .policy = fou_nl_policy,
617 .flags = GENL_ADMIN_PERM,
621 .doit = fou_nl_cmd_rm_port,
622 .policy = fou_nl_policy,
623 .flags = GENL_ADMIN_PERM,
627 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
629 return sizeof(struct udphdr);
631 EXPORT_SYMBOL(fou_encap_hlen);
633 size_t gue_encap_hlen(struct ip_tunnel_encap *e)
636 bool need_priv = false;
638 len = sizeof(struct udphdr) + sizeof(struct guehdr);
640 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
641 len += GUE_PLEN_REMCSUM;
645 len += need_priv ? GUE_LEN_PRIV : 0;
649 EXPORT_SYMBOL(gue_encap_hlen);
651 static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
652 struct flowi4 *fl4, u8 *protocol, __be16 sport)
656 skb_push(skb, sizeof(struct udphdr));
657 skb_reset_transport_header(skb);
663 uh->len = htons(skb->len);
665 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
666 fl4->saddr, fl4->daddr, skb->len);
668 *protocol = IPPROTO_UDP;
671 int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
672 u8 *protocol, struct flowi4 *fl4)
674 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
675 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
678 skb = iptunnel_handle_offloads(skb, csum, type);
683 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
685 fou_build_udp(skb, e, fl4, protocol, sport);
689 EXPORT_SYMBOL(fou_build_header);
691 int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
692 u8 *protocol, struct flowi4 *fl4)
694 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
695 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
696 struct guehdr *guehdr;
697 size_t hdrlen, optlen = 0;
700 bool need_priv = false;
702 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
703 skb->ip_summed == CHECKSUM_PARTIAL) {
705 optlen += GUE_PLEN_REMCSUM;
706 type |= SKB_GSO_TUNNEL_REMCSUM;
710 optlen += need_priv ? GUE_LEN_PRIV : 0;
712 skb = iptunnel_handle_offloads(skb, csum, type);
717 /* Get source port (based on flow hash) before skb_push */
718 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
721 hdrlen = sizeof(struct guehdr) + optlen;
723 skb_push(skb, hdrlen);
725 guehdr = (struct guehdr *)skb->data;
729 guehdr->hlen = optlen >> 2;
731 guehdr->proto_ctype = *protocol;
736 __be32 *flags = data;
738 guehdr->flags |= GUE_FLAG_PRIV;
740 data += GUE_LEN_PRIV;
742 if (type & SKB_GSO_TUNNEL_REMCSUM) {
743 u16 csum_start = skb_checksum_start_offset(skb);
746 if (csum_start < hdrlen)
749 csum_start -= hdrlen;
750 pd[0] = htons(csum_start);
751 pd[1] = htons(csum_start + skb->csum_offset);
753 if (!skb_is_gso(skb)) {
754 skb->ip_summed = CHECKSUM_NONE;
755 skb->encapsulation = 0;
758 *flags |= GUE_PFLAG_REMCSUM;
759 data += GUE_PLEN_REMCSUM;
764 fou_build_udp(skb, e, fl4, protocol, sport);
768 EXPORT_SYMBOL(gue_build_header);
770 #ifdef CONFIG_NET_FOU_IP_TUNNELS
772 static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = {
773 .encap_hlen = fou_encap_hlen,
774 .build_header = fou_build_header,
777 static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = {
778 .encap_hlen = gue_encap_hlen,
779 .build_header = gue_build_header,
782 static int ip_tunnel_encap_add_fou_ops(void)
786 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
788 pr_err("can't add fou ops\n");
792 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
794 pr_err("can't add gue ops\n");
795 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
802 static void ip_tunnel_encap_del_fou_ops(void)
804 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
805 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
810 static int ip_tunnel_encap_add_fou_ops(void)
815 static void ip_tunnel_encap_del_fou_ops(void)
821 static int __init fou_init(void)
825 ret = genl_register_family_with_ops(&fou_nl_family,
831 ret = ip_tunnel_encap_add_fou_ops();
833 genl_unregister_family(&fou_nl_family);
839 static void __exit fou_fini(void)
841 struct fou *fou, *next;
843 ip_tunnel_encap_del_fou_ops();
845 genl_unregister_family(&fou_nl_family);
847 /* Close all the FOU sockets */
849 spin_lock(&fou_lock);
850 list_for_each_entry_safe(fou, next, &fou_list, list)
852 spin_unlock(&fou_lock);
855 module_init(fou_init);
856 module_exit(fou_fini);
857 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
858 MODULE_LICENSE("GPL");