Merge branch develop-3.10 into develop-3.10-next
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     t->parms.iph.saddr != 0 ||
170                     !(t->dev->flags & IFF_UP))
171                         continue;
172
173                 if (!ip_tunnel_key_match(&t->parms, flags, key))
174                         continue;
175
176                 if (t->parms.link == link)
177                         return t;
178                 else if (!cand)
179                         cand = t;
180         }
181
182         hash = ip_tunnel_hash(itn, key, 0);
183         head = &itn->tunnels[hash];
184
185         hlist_for_each_entry_rcu(t, head, hash_node) {
186                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
187                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
188                         continue;
189
190                 if (!(t->dev->flags & IFF_UP))
191                         continue;
192
193                 if (!ip_tunnel_key_match(&t->parms, flags, key))
194                         continue;
195
196                 if (t->parms.link == link)
197                         return t;
198                 else if (!cand)
199                         cand = t;
200         }
201
202         if (flags & TUNNEL_NO_KEY)
203                 goto skip_key_lookup;
204
205         hlist_for_each_entry_rcu(t, head, hash_node) {
206                 if (t->parms.i_key != key ||
207                     t->parms.iph.saddr != 0 ||
208                     t->parms.iph.daddr != 0 ||
209                     !(t->dev->flags & IFF_UP))
210                         continue;
211
212                 if (t->parms.link == link)
213                         return t;
214                 else if (!cand)
215                         cand = t;
216         }
217
218 skip_key_lookup:
219         if (cand)
220                 return cand;
221
222         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
223                 return netdev_priv(itn->fb_tunnel_dev);
224
225
226         return NULL;
227 }
228 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
229
230 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
231                                     struct ip_tunnel_parm *parms)
232 {
233         unsigned int h;
234         __be32 remote;
235
236         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
237                 remote = parms->iph.daddr;
238         else
239                 remote = 0;
240
241         h = ip_tunnel_hash(itn, parms->i_key, remote);
242         return &itn->tunnels[h];
243 }
244
245 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
246 {
247         struct hlist_head *head = ip_bucket(itn, &t->parms);
248
249         hlist_add_head_rcu(&t->hash_node, head);
250 }
251
252 static void ip_tunnel_del(struct ip_tunnel *t)
253 {
254         hlist_del_init_rcu(&t->hash_node);
255 }
256
257 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
258                                         struct ip_tunnel_parm *parms,
259                                         int type)
260 {
261         __be32 remote = parms->iph.daddr;
262         __be32 local = parms->iph.saddr;
263         __be32 key = parms->i_key;
264         int link = parms->link;
265         struct ip_tunnel *t = NULL;
266         struct hlist_head *head = ip_bucket(itn, parms);
267
268         hlist_for_each_entry_rcu(t, head, hash_node) {
269                 if (local == t->parms.iph.saddr &&
270                     remote == t->parms.iph.daddr &&
271                     key == t->parms.i_key &&
272                     link == t->parms.link &&
273                     type == t->dev->type)
274                         break;
275         }
276         return t;
277 }
278
279 static struct net_device *__ip_tunnel_create(struct net *net,
280                                              const struct rtnl_link_ops *ops,
281                                              struct ip_tunnel_parm *parms)
282 {
283         int err;
284         struct ip_tunnel *tunnel;
285         struct net_device *dev;
286         char name[IFNAMSIZ];
287
288         if (parms->name[0])
289                 strlcpy(name, parms->name, IFNAMSIZ);
290         else {
291                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
292                         err = -E2BIG;
293                         goto failed;
294                 }
295                 strlcpy(name, ops->kind, IFNAMSIZ);
296                 strncat(name, "%d", 2);
297         }
298
299         ASSERT_RTNL();
300         dev = alloc_netdev(ops->priv_size, name, ops->setup);
301         if (!dev) {
302                 err = -ENOMEM;
303                 goto failed;
304         }
305         dev_net_set(dev, net);
306
307         dev->rtnl_link_ops = ops;
308
309         tunnel = netdev_priv(dev);
310         tunnel->parms = *parms;
311
312         err = register_netdevice(dev);
313         if (err)
314                 goto failed_free;
315
316         return dev;
317
318 failed_free:
319         free_netdev(dev);
320 failed:
321         return ERR_PTR(err);
322 }
323
324 static inline struct rtable *ip_route_output_tunnel(struct net *net,
325                                                     struct flowi4 *fl4,
326                                                     int proto,
327                                                     __be32 daddr, __be32 saddr,
328                                                     __be32 key, __u8 tos, int oif)
329 {
330         memset(fl4, 0, sizeof(*fl4));
331         fl4->flowi4_oif = oif;
332         fl4->daddr = daddr;
333         fl4->saddr = saddr;
334         fl4->flowi4_tos = tos;
335         fl4->flowi4_proto = proto;
336         fl4->fl4_gre_key = key;
337         return ip_route_output_key(net, fl4);
338 }
339
340 static int ip_tunnel_bind_dev(struct net_device *dev)
341 {
342         struct net_device *tdev = NULL;
343         struct ip_tunnel *tunnel = netdev_priv(dev);
344         const struct iphdr *iph;
345         int hlen = LL_MAX_HEADER;
346         int mtu = ETH_DATA_LEN;
347         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
348
349         iph = &tunnel->parms.iph;
350
351         /* Guess output device to choose reasonable mtu and needed_headroom */
352         if (iph->daddr) {
353                 struct flowi4 fl4;
354                 struct rtable *rt;
355
356                 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
357                                             tunnel->parms.iph.protocol,
358                                             iph->daddr, iph->saddr,
359                                             tunnel->parms.o_key,
360                                             RT_TOS(iph->tos),
361                                             tunnel->parms.link);
362                 if (!IS_ERR(rt)) {
363                         tdev = rt->dst.dev;
364                         ip_rt_put(rt);
365                 }
366                 if (dev->type != ARPHRD_ETHER)
367                         dev->flags |= IFF_POINTOPOINT;
368         }
369
370         if (!tdev && tunnel->parms.link)
371                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
372
373         if (tdev) {
374                 hlen = tdev->hard_header_len + tdev->needed_headroom;
375                 mtu = tdev->mtu;
376         }
377         dev->iflink = tunnel->parms.link;
378
379         dev->needed_headroom = t_hlen + hlen;
380         mtu -= (dev->hard_header_len + t_hlen);
381
382         if (mtu < 68)
383                 mtu = 68;
384
385         return mtu;
386 }
387
388 static struct ip_tunnel *ip_tunnel_create(struct net *net,
389                                           struct ip_tunnel_net *itn,
390                                           struct ip_tunnel_parm *parms)
391 {
392         struct ip_tunnel *nt, *fbt;
393         struct net_device *dev;
394
395         BUG_ON(!itn->fb_tunnel_dev);
396         fbt = netdev_priv(itn->fb_tunnel_dev);
397         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
398         if (IS_ERR(dev))
399                 return NULL;
400
401         dev->mtu = ip_tunnel_bind_dev(dev);
402
403         nt = netdev_priv(dev);
404         ip_tunnel_add(itn, nt);
405         return nt;
406 }
407
408 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
409                   const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error)
410 {
411         struct pcpu_tstats *tstats;
412         const struct iphdr *iph = ip_hdr(skb);
413         int err;
414
415         secpath_reset(skb);
416
417         skb->protocol = tpi->proto;
418
419         skb->mac_header = skb->network_header;
420         __pskb_pull(skb, hdr_len);
421         skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
422 #ifdef CONFIG_NET_IPGRE_BROADCAST
423         if (ipv4_is_multicast(iph->daddr)) {
424                 /* Looped back packet, drop it! */
425                 if (rt_is_output_route(skb_rtable(skb)))
426                         goto drop;
427                 tunnel->dev->stats.multicast++;
428                 skb->pkt_type = PACKET_BROADCAST;
429         }
430 #endif
431
432         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
433              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
434                 tunnel->dev->stats.rx_crc_errors++;
435                 tunnel->dev->stats.rx_errors++;
436                 goto drop;
437         }
438
439         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
440                 if (!(tpi->flags&TUNNEL_SEQ) ||
441                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
442                         tunnel->dev->stats.rx_fifo_errors++;
443                         tunnel->dev->stats.rx_errors++;
444                         goto drop;
445                 }
446                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
447         }
448
449         /* Warning: All skb pointers will be invalidated! */
450         if (tunnel->dev->type == ARPHRD_ETHER) {
451                 if (!pskb_may_pull(skb, ETH_HLEN)) {
452                         tunnel->dev->stats.rx_length_errors++;
453                         tunnel->dev->stats.rx_errors++;
454                         goto drop;
455                 }
456
457                 iph = ip_hdr(skb);
458                 skb->protocol = eth_type_trans(skb, tunnel->dev);
459                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
460         }
461
462         skb->pkt_type = PACKET_HOST;
463         __skb_tunnel_rx(skb, tunnel->dev);
464
465         skb_reset_network_header(skb);
466         err = IP_ECN_decapsulate(iph, skb);
467         if (unlikely(err)) {
468                 if (log_ecn_error)
469                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
470                                         &iph->saddr, iph->tos);
471                 if (err > 1) {
472                         ++tunnel->dev->stats.rx_frame_errors;
473                         ++tunnel->dev->stats.rx_errors;
474                         goto drop;
475                 }
476         }
477
478         tstats = this_cpu_ptr(tunnel->dev->tstats);
479         u64_stats_update_begin(&tstats->syncp);
480         tstats->rx_packets++;
481         tstats->rx_bytes += skb->len;
482         u64_stats_update_end(&tstats->syncp);
483
484         gro_cells_receive(&tunnel->gro_cells, skb);
485         return 0;
486
487 drop:
488         kfree_skb(skb);
489         return 0;
490 }
491 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
492
493 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
494                             struct rtable *rt, __be16 df)
495 {
496         struct ip_tunnel *tunnel = netdev_priv(dev);
497         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
498         int mtu;
499
500         if (df)
501                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
502                                         - sizeof(struct iphdr) - tunnel->hlen;
503         else
504                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
505
506         if (skb_dst(skb))
507                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
508
509         if (skb->protocol == htons(ETH_P_IP)) {
510                 if (!skb_is_gso(skb) &&
511                     (df & htons(IP_DF)) && mtu < pkt_size) {
512                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
513                         return -E2BIG;
514                 }
515         }
516 #if IS_ENABLED(CONFIG_IPV6)
517         else if (skb->protocol == htons(ETH_P_IPV6)) {
518                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
519
520                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
521                            mtu >= IPV6_MIN_MTU) {
522                         if ((tunnel->parms.iph.daddr &&
523                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
524                             rt6->rt6i_dst.plen == 128) {
525                                 rt6->rt6i_flags |= RTF_MODIFIED;
526                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
527                         }
528                 }
529
530                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
531                                         mtu < pkt_size) {
532                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
533                         return -E2BIG;
534                 }
535         }
536 #endif
537         return 0;
538 }
539
540 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
541                     const struct iphdr *tnl_params)
542 {
543         struct ip_tunnel *tunnel = netdev_priv(dev);
544         const struct iphdr *inner_iph;
545         struct iphdr *iph;
546         struct flowi4 fl4;
547         u8     tos, ttl;
548         __be16 df;
549         struct rtable *rt;              /* Route to the other host */
550         struct net_device *tdev;        /* Device to other host */
551         unsigned int max_headroom;      /* The extra header space needed */
552         __be32 dst;
553
554         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
555
556         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
557         dst = tnl_params->daddr;
558         if (dst == 0) {
559                 /* NBMA tunnel */
560
561                 if (skb_dst(skb) == NULL) {
562                         dev->stats.tx_fifo_errors++;
563                         goto tx_error;
564                 }
565
566                 if (skb->protocol == htons(ETH_P_IP)) {
567                         rt = skb_rtable(skb);
568                         dst = rt_nexthop(rt, inner_iph->daddr);
569                 }
570 #if IS_ENABLED(CONFIG_IPV6)
571                 else if (skb->protocol == htons(ETH_P_IPV6)) {
572                         const struct in6_addr *addr6;
573                         struct neighbour *neigh;
574                         bool do_tx_error_icmp;
575                         int addr_type;
576
577                         neigh = dst_neigh_lookup(skb_dst(skb),
578                                                  &ipv6_hdr(skb)->daddr);
579                         if (neigh == NULL)
580                                 goto tx_error;
581
582                         addr6 = (const struct in6_addr *)&neigh->primary_key;
583                         addr_type = ipv6_addr_type(addr6);
584
585                         if (addr_type == IPV6_ADDR_ANY) {
586                                 addr6 = &ipv6_hdr(skb)->daddr;
587                                 addr_type = ipv6_addr_type(addr6);
588                         }
589
590                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
591                                 do_tx_error_icmp = true;
592                         else {
593                                 do_tx_error_icmp = false;
594                                 dst = addr6->s6_addr32[3];
595                         }
596                         neigh_release(neigh);
597                         if (do_tx_error_icmp)
598                                 goto tx_error_icmp;
599                 }
600 #endif
601                 else
602                         goto tx_error;
603         }
604
605         tos = tnl_params->tos;
606         if (tos & 0x1) {
607                 tos &= ~0x1;
608                 if (skb->protocol == htons(ETH_P_IP))
609                         tos = inner_iph->tos;
610                 else if (skb->protocol == htons(ETH_P_IPV6))
611                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
612         }
613
614         rt = ip_route_output_tunnel(dev_net(dev), &fl4,
615                                     tunnel->parms.iph.protocol,
616                                     dst, tnl_params->saddr,
617                                     tunnel->parms.o_key,
618                                     RT_TOS(tos),
619                                     tunnel->parms.link);
620         if (IS_ERR(rt)) {
621                 dev->stats.tx_carrier_errors++;
622                 goto tx_error;
623         }
624         tdev = rt->dst.dev;
625
626         if (tdev == dev) {
627                 ip_rt_put(rt);
628                 dev->stats.collisions++;
629                 goto tx_error;
630         }
631
632
633         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
634                 ip_rt_put(rt);
635                 goto tx_error;
636         }
637
638         if (tunnel->err_count > 0) {
639                 if (time_before(jiffies,
640                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
641                         tunnel->err_count--;
642
643                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
644                         dst_link_failure(skb);
645                 } else
646                         tunnel->err_count = 0;
647         }
648
649         ttl = tnl_params->ttl;
650         if (ttl == 0) {
651                 if (skb->protocol == htons(ETH_P_IP))
652                         ttl = inner_iph->ttl;
653 #if IS_ENABLED(CONFIG_IPV6)
654                 else if (skb->protocol == htons(ETH_P_IPV6))
655                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
656 #endif
657                 else
658                         ttl = ip4_dst_hoplimit(&rt->dst);
659         }
660
661         df = tnl_params->frag_off;
662         if (skb->protocol == htons(ETH_P_IP))
663                 df |= (inner_iph->frag_off&htons(IP_DF));
664
665         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
666                                                + rt->dst.header_len;
667         if (max_headroom > dev->needed_headroom)
668                 dev->needed_headroom = max_headroom;
669
670         if (skb_cow_head(skb, dev->needed_headroom)) {
671                 dev->stats.tx_dropped++;
672                 dev_kfree_skb(skb);
673                 return;
674         }
675
676         skb_dst_drop(skb);
677         skb_dst_set(skb, &rt->dst);
678
679         /* Push down and install the IP header. */
680         skb_push(skb, sizeof(struct iphdr));
681         skb_reset_network_header(skb);
682
683         iph = ip_hdr(skb);
684         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
685
686         iph->version    =       4;
687         iph->ihl        =       sizeof(struct iphdr) >> 2;
688         iph->frag_off   =       df;
689         iph->protocol   =       tnl_params->protocol;
690         iph->tos        =       ip_tunnel_ecn_encap(tos, inner_iph, skb);
691         iph->daddr      =       fl4.daddr;
692         iph->saddr      =       fl4.saddr;
693         iph->ttl        =       ttl;
694         __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
695
696         iptunnel_xmit(skb, dev);
697         return;
698
699 #if IS_ENABLED(CONFIG_IPV6)
700 tx_error_icmp:
701         dst_link_failure(skb);
702 #endif
703 tx_error:
704         dev->stats.tx_errors++;
705         dev_kfree_skb(skb);
706 }
707 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
708
709 static void ip_tunnel_update(struct ip_tunnel_net *itn,
710                              struct ip_tunnel *t,
711                              struct net_device *dev,
712                              struct ip_tunnel_parm *p,
713                              bool set_mtu)
714 {
715         ip_tunnel_del(t);
716         t->parms.iph.saddr = p->iph.saddr;
717         t->parms.iph.daddr = p->iph.daddr;
718         t->parms.i_key = p->i_key;
719         t->parms.o_key = p->o_key;
720         if (dev->type != ARPHRD_ETHER) {
721                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
722                 memcpy(dev->broadcast, &p->iph.daddr, 4);
723         }
724         ip_tunnel_add(itn, t);
725
726         t->parms.iph.ttl = p->iph.ttl;
727         t->parms.iph.tos = p->iph.tos;
728         t->parms.iph.frag_off = p->iph.frag_off;
729
730         if (t->parms.link != p->link) {
731                 int mtu;
732
733                 t->parms.link = p->link;
734                 mtu = ip_tunnel_bind_dev(dev);
735                 if (set_mtu)
736                         dev->mtu = mtu;
737         }
738         netdev_state_change(dev);
739 }
740
741 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
742 {
743         int err = 0;
744         struct ip_tunnel *t;
745         struct net *net = dev_net(dev);
746         struct ip_tunnel *tunnel = netdev_priv(dev);
747         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
748
749         BUG_ON(!itn->fb_tunnel_dev);
750         switch (cmd) {
751         case SIOCGETTUNNEL:
752                 t = NULL;
753                 if (dev == itn->fb_tunnel_dev)
754                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
755                 if (t == NULL)
756                         t = netdev_priv(dev);
757                 memcpy(p, &t->parms, sizeof(*p));
758                 break;
759
760         case SIOCADDTUNNEL:
761         case SIOCCHGTUNNEL:
762                 err = -EPERM;
763                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
764                         goto done;
765                 if (p->iph.ttl)
766                         p->iph.frag_off |= htons(IP_DF);
767                 if (!(p->i_flags&TUNNEL_KEY))
768                         p->i_key = 0;
769                 if (!(p->o_flags&TUNNEL_KEY))
770                         p->o_key = 0;
771
772                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
773
774                 if (!t && (cmd == SIOCADDTUNNEL))
775                         t = ip_tunnel_create(net, itn, p);
776
777                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
778                         if (t != NULL) {
779                                 if (t->dev != dev) {
780                                         err = -EEXIST;
781                                         break;
782                                 }
783                         } else {
784                                 unsigned int nflags = 0;
785
786                                 if (ipv4_is_multicast(p->iph.daddr))
787                                         nflags = IFF_BROADCAST;
788                                 else if (p->iph.daddr)
789                                         nflags = IFF_POINTOPOINT;
790
791                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
792                                         err = -EINVAL;
793                                         break;
794                                 }
795
796                                 t = netdev_priv(dev);
797                         }
798                 }
799
800                 if (t) {
801                         err = 0;
802                         ip_tunnel_update(itn, t, dev, p, true);
803                 } else
804                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
805                 break;
806
807         case SIOCDELTUNNEL:
808                 err = -EPERM;
809                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
810                         goto done;
811
812                 if (dev == itn->fb_tunnel_dev) {
813                         err = -ENOENT;
814                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
815                         if (t == NULL)
816                                 goto done;
817                         err = -EPERM;
818                         if (t == netdev_priv(itn->fb_tunnel_dev))
819                                 goto done;
820                         dev = t->dev;
821                 }
822                 unregister_netdevice(dev);
823                 err = 0;
824                 break;
825
826         default:
827                 err = -EINVAL;
828         }
829
830 done:
831         return err;
832 }
833 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
834
835 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
836 {
837         struct ip_tunnel *tunnel = netdev_priv(dev);
838         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
839
840         if (new_mtu < 68 ||
841             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
842                 return -EINVAL;
843         dev->mtu = new_mtu;
844         return 0;
845 }
846 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
847
848 static void ip_tunnel_dev_free(struct net_device *dev)
849 {
850         struct ip_tunnel *tunnel = netdev_priv(dev);
851
852         gro_cells_destroy(&tunnel->gro_cells);
853         free_percpu(dev->tstats);
854         free_netdev(dev);
855 }
856
857 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
858 {
859         struct net *net = dev_net(dev);
860         struct ip_tunnel *tunnel = netdev_priv(dev);
861         struct ip_tunnel_net *itn;
862
863         itn = net_generic(net, tunnel->ip_tnl_net_id);
864
865         if (itn->fb_tunnel_dev != dev) {
866                 ip_tunnel_del(netdev_priv(dev));
867                 unregister_netdevice_queue(dev, head);
868         }
869 }
870 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
871
872 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
873                                   struct rtnl_link_ops *ops, char *devname)
874 {
875         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
876         struct ip_tunnel_parm parms;
877
878         itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
879         if (!itn->tunnels)
880                 return -ENOMEM;
881
882         if (!ops) {
883                 itn->fb_tunnel_dev = NULL;
884                 return 0;
885         }
886         memset(&parms, 0, sizeof(parms));
887         if (devname)
888                 strlcpy(parms.name, devname, IFNAMSIZ);
889
890         rtnl_lock();
891         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
892         rtnl_unlock();
893         if (IS_ERR(itn->fb_tunnel_dev)) {
894                 kfree(itn->tunnels);
895                 return PTR_ERR(itn->fb_tunnel_dev);
896         }
897
898         return 0;
899 }
900 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
901
902 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
903 {
904         int h;
905
906         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
907                 struct ip_tunnel *t;
908                 struct hlist_node *n;
909                 struct hlist_head *thead = &itn->tunnels[h];
910
911                 hlist_for_each_entry_safe(t, n, thead, hash_node)
912                         unregister_netdevice_queue(t->dev, head);
913         }
914         if (itn->fb_tunnel_dev)
915                 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
916 }
917
918 void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
919 {
920         LIST_HEAD(list);
921
922         rtnl_lock();
923         ip_tunnel_destroy(itn, &list);
924         unregister_netdevice_many(&list);
925         rtnl_unlock();
926         kfree(itn->tunnels);
927 }
928 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
929
930 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
931                       struct ip_tunnel_parm *p)
932 {
933         struct ip_tunnel *nt;
934         struct net *net = dev_net(dev);
935         struct ip_tunnel_net *itn;
936         int mtu;
937         int err;
938
939         nt = netdev_priv(dev);
940         itn = net_generic(net, nt->ip_tnl_net_id);
941
942         if (ip_tunnel_find(itn, p, dev->type))
943                 return -EEXIST;
944
945         nt->parms = *p;
946         err = register_netdevice(dev);
947         if (err)
948                 goto out;
949
950         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
951                 eth_hw_addr_random(dev);
952
953         mtu = ip_tunnel_bind_dev(dev);
954         if (!tb[IFLA_MTU])
955                 dev->mtu = mtu;
956
957         ip_tunnel_add(itn, nt);
958
959 out:
960         return err;
961 }
962 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
963
964 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
965                          struct ip_tunnel_parm *p)
966 {
967         struct ip_tunnel *t, *nt;
968         struct net *net = dev_net(dev);
969         struct ip_tunnel *tunnel = netdev_priv(dev);
970         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
971
972         if (dev == itn->fb_tunnel_dev)
973                 return -EINVAL;
974
975         nt = netdev_priv(dev);
976
977         t = ip_tunnel_find(itn, p, dev->type);
978
979         if (t) {
980                 if (t->dev != dev)
981                         return -EEXIST;
982         } else {
983                 t = nt;
984
985                 if (dev->type != ARPHRD_ETHER) {
986                         unsigned int nflags = 0;
987
988                         if (ipv4_is_multicast(p->iph.daddr))
989                                 nflags = IFF_BROADCAST;
990                         else if (p->iph.daddr)
991                                 nflags = IFF_POINTOPOINT;
992
993                         if ((dev->flags ^ nflags) &
994                             (IFF_POINTOPOINT | IFF_BROADCAST))
995                                 return -EINVAL;
996                 }
997         }
998
999         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1000         return 0;
1001 }
1002 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1003
1004 int ip_tunnel_init(struct net_device *dev)
1005 {
1006         struct ip_tunnel *tunnel = netdev_priv(dev);
1007         struct iphdr *iph = &tunnel->parms.iph;
1008         int err;
1009
1010         dev->destructor = ip_tunnel_dev_free;
1011         dev->tstats = alloc_percpu(struct pcpu_tstats);
1012         if (!dev->tstats)
1013                 return -ENOMEM;
1014
1015         err = gro_cells_init(&tunnel->gro_cells, dev);
1016         if (err) {
1017                 free_percpu(dev->tstats);
1018                 return err;
1019         }
1020
1021         tunnel->dev = dev;
1022         strcpy(tunnel->parms.name, dev->name);
1023         iph->version            = 4;
1024         iph->ihl                = 5;
1025
1026         return 0;
1027 }
1028 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1029
1030 void ip_tunnel_uninit(struct net_device *dev)
1031 {
1032         struct net *net = dev_net(dev);
1033         struct ip_tunnel *tunnel = netdev_priv(dev);
1034         struct ip_tunnel_net *itn;
1035
1036         itn = net_generic(net, tunnel->ip_tnl_net_id);
1037         /* fb_tunnel_dev will be unregisted in net-exit call. */
1038         if (itn->fb_tunnel_dev != dev)
1039                 ip_tunnel_del(netdev_priv(dev));
1040 }
1041 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1042
1043 /* Do least required initialization, rest of init is done in tunnel_init call */
1044 void ip_tunnel_setup(struct net_device *dev, int net_id)
1045 {
1046         struct ip_tunnel *tunnel = netdev_priv(dev);
1047         tunnel->ip_tnl_net_id = net_id;
1048 }
1049 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1050
1051 MODULE_LICENSE("GPL");