Merge branch 'v3.10/topic/gator' of git://git.linaro.org/kernel/linux-linaro-stable...
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     !(t->dev->flags & IFF_UP))
170                         continue;
171
172                 if (!ip_tunnel_key_match(&t->parms, flags, key))
173                         continue;
174
175                 if (t->parms.link == link)
176                         return t;
177                 else if (!cand)
178                         cand = t;
179         }
180
181         hash = ip_tunnel_hash(itn, key, 0);
182         head = &itn->tunnels[hash];
183
184         hlist_for_each_entry_rcu(t, head, hash_node) {
185                 if ((local != t->parms.iph.saddr &&
186                      (local != t->parms.iph.daddr ||
187                       !ipv4_is_multicast(local))) ||
188                     !(t->dev->flags & IFF_UP))
189                         continue;
190
191                 if (!ip_tunnel_key_match(&t->parms, flags, key))
192                         continue;
193
194                 if (t->parms.link == link)
195                         return t;
196                 else if (!cand)
197                         cand = t;
198         }
199
200         if (flags & TUNNEL_NO_KEY)
201                 goto skip_key_lookup;
202
203         hlist_for_each_entry_rcu(t, head, hash_node) {
204                 if (t->parms.i_key != key ||
205                     !(t->dev->flags & IFF_UP))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214 skip_key_lookup:
215         if (cand)
216                 return cand;
217
218         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219                 return netdev_priv(itn->fb_tunnel_dev);
220
221
222         return NULL;
223 }
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227                                     struct ip_tunnel_parm *parms)
228 {
229         unsigned int h;
230         __be32 remote;
231
232         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233                 remote = parms->iph.daddr;
234         else
235                 remote = 0;
236
237         h = ip_tunnel_hash(itn, parms->i_key, remote);
238         return &itn->tunnels[h];
239 }
240
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242 {
243         struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245         hlist_add_head_rcu(&t->hash_node, head);
246 }
247
248 static void ip_tunnel_del(struct ip_tunnel *t)
249 {
250         hlist_del_init_rcu(&t->hash_node);
251 }
252
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254                                         struct ip_tunnel_parm *parms,
255                                         int type)
256 {
257         __be32 remote = parms->iph.daddr;
258         __be32 local = parms->iph.saddr;
259         __be32 key = parms->i_key;
260         int link = parms->link;
261         struct ip_tunnel *t = NULL;
262         struct hlist_head *head = ip_bucket(itn, parms);
263
264         hlist_for_each_entry_rcu(t, head, hash_node) {
265                 if (local == t->parms.iph.saddr &&
266                     remote == t->parms.iph.daddr &&
267                     key == t->parms.i_key &&
268                     link == t->parms.link &&
269                     type == t->dev->type)
270                         break;
271         }
272         return t;
273 }
274
275 static struct net_device *__ip_tunnel_create(struct net *net,
276                                              const struct rtnl_link_ops *ops,
277                                              struct ip_tunnel_parm *parms)
278 {
279         int err;
280         struct ip_tunnel *tunnel;
281         struct net_device *dev;
282         char name[IFNAMSIZ];
283
284         if (parms->name[0])
285                 strlcpy(name, parms->name, IFNAMSIZ);
286         else {
287                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288                         err = -E2BIG;
289                         goto failed;
290                 }
291                 strlcpy(name, ops->kind, IFNAMSIZ);
292                 strncat(name, "%d", 2);
293         }
294
295         ASSERT_RTNL();
296         dev = alloc_netdev(ops->priv_size, name, ops->setup);
297         if (!dev) {
298                 err = -ENOMEM;
299                 goto failed;
300         }
301         dev_net_set(dev, net);
302
303         dev->rtnl_link_ops = ops;
304
305         tunnel = netdev_priv(dev);
306         tunnel->parms = *parms;
307
308         err = register_netdevice(dev);
309         if (err)
310                 goto failed_free;
311
312         return dev;
313
314 failed_free:
315         free_netdev(dev);
316 failed:
317         return ERR_PTR(err);
318 }
319
320 static inline struct rtable *ip_route_output_tunnel(struct net *net,
321                                                     struct flowi4 *fl4,
322                                                     int proto,
323                                                     __be32 daddr, __be32 saddr,
324                                                     __be32 key, __u8 tos, int oif)
325 {
326         memset(fl4, 0, sizeof(*fl4));
327         fl4->flowi4_oif = oif;
328         fl4->daddr = daddr;
329         fl4->saddr = saddr;
330         fl4->flowi4_tos = tos;
331         fl4->flowi4_proto = proto;
332         fl4->fl4_gre_key = key;
333         return ip_route_output_key(net, fl4);
334 }
335
336 static int ip_tunnel_bind_dev(struct net_device *dev)
337 {
338         struct net_device *tdev = NULL;
339         struct ip_tunnel *tunnel = netdev_priv(dev);
340         const struct iphdr *iph;
341         int hlen = LL_MAX_HEADER;
342         int mtu = ETH_DATA_LEN;
343         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345         iph = &tunnel->parms.iph;
346
347         /* Guess output device to choose reasonable mtu and needed_headroom */
348         if (iph->daddr) {
349                 struct flowi4 fl4;
350                 struct rtable *rt;
351
352                 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353                                             tunnel->parms.iph.protocol,
354                                             iph->daddr, iph->saddr,
355                                             tunnel->parms.o_key,
356                                             RT_TOS(iph->tos),
357                                             tunnel->parms.link);
358                 if (!IS_ERR(rt)) {
359                         tdev = rt->dst.dev;
360                         ip_rt_put(rt);
361                 }
362                 if (dev->type != ARPHRD_ETHER)
363                         dev->flags |= IFF_POINTOPOINT;
364         }
365
366         if (!tdev && tunnel->parms.link)
367                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369         if (tdev) {
370                 hlen = tdev->hard_header_len + tdev->needed_headroom;
371                 mtu = tdev->mtu;
372         }
373         dev->iflink = tunnel->parms.link;
374
375         dev->needed_headroom = t_hlen + hlen;
376         mtu -= (dev->hard_header_len + t_hlen);
377
378         if (mtu < 68)
379                 mtu = 68;
380
381         return mtu;
382 }
383
384 static struct ip_tunnel *ip_tunnel_create(struct net *net,
385                                           struct ip_tunnel_net *itn,
386                                           struct ip_tunnel_parm *parms)
387 {
388         struct ip_tunnel *nt, *fbt;
389         struct net_device *dev;
390
391         BUG_ON(!itn->fb_tunnel_dev);
392         fbt = netdev_priv(itn->fb_tunnel_dev);
393         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394         if (IS_ERR(dev))
395                 return NULL;
396
397         dev->mtu = ip_tunnel_bind_dev(dev);
398
399         nt = netdev_priv(dev);
400         ip_tunnel_add(itn, nt);
401         return nt;
402 }
403
404 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405                   const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error)
406 {
407         struct pcpu_tstats *tstats;
408         const struct iphdr *iph = ip_hdr(skb);
409         int err;
410
411         secpath_reset(skb);
412
413         skb->protocol = tpi->proto;
414
415         skb->mac_header = skb->network_header;
416         __pskb_pull(skb, hdr_len);
417         skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418 #ifdef CONFIG_NET_IPGRE_BROADCAST
419         if (ipv4_is_multicast(iph->daddr)) {
420                 /* Looped back packet, drop it! */
421                 if (rt_is_output_route(skb_rtable(skb)))
422                         goto drop;
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         /* Warning: All skb pointers will be invalidated! */
446         if (tunnel->dev->type == ARPHRD_ETHER) {
447                 if (!pskb_may_pull(skb, ETH_HLEN)) {
448                         tunnel->dev->stats.rx_length_errors++;
449                         tunnel->dev->stats.rx_errors++;
450                         goto drop;
451                 }
452
453                 iph = ip_hdr(skb);
454                 skb->protocol = eth_type_trans(skb, tunnel->dev);
455                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456         }
457
458         skb->pkt_type = PACKET_HOST;
459         __skb_tunnel_rx(skb, tunnel->dev);
460
461         skb_reset_network_header(skb);
462         err = IP_ECN_decapsulate(iph, skb);
463         if (unlikely(err)) {
464                 if (log_ecn_error)
465                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466                                         &iph->saddr, iph->tos);
467                 if (err > 1) {
468                         ++tunnel->dev->stats.rx_frame_errors;
469                         ++tunnel->dev->stats.rx_errors;
470                         goto drop;
471                 }
472         }
473
474         tstats = this_cpu_ptr(tunnel->dev->tstats);
475         u64_stats_update_begin(&tstats->syncp);
476         tstats->rx_packets++;
477         tstats->rx_bytes += skb->len;
478         u64_stats_update_end(&tstats->syncp);
479
480         gro_cells_receive(&tunnel->gro_cells, skb);
481         return 0;
482
483 drop:
484         kfree_skb(skb);
485         return 0;
486 }
487 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
490                             struct rtable *rt, __be16 df)
491 {
492         struct ip_tunnel *tunnel = netdev_priv(dev);
493         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
494         int mtu;
495
496         if (df)
497                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
498                                         - sizeof(struct iphdr) - tunnel->hlen;
499         else
500                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
501
502         if (skb_dst(skb))
503                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
504
505         if (skb->protocol == htons(ETH_P_IP)) {
506                 if (!skb_is_gso(skb) &&
507                     (df & htons(IP_DF)) && mtu < pkt_size) {
508                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
509                         return -E2BIG;
510                 }
511         }
512 #if IS_ENABLED(CONFIG_IPV6)
513         else if (skb->protocol == htons(ETH_P_IPV6)) {
514                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
515
516                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
517                            mtu >= IPV6_MIN_MTU) {
518                         if ((tunnel->parms.iph.daddr &&
519                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
520                             rt6->rt6i_dst.plen == 128) {
521                                 rt6->rt6i_flags |= RTF_MODIFIED;
522                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
523                         }
524                 }
525
526                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
527                                         mtu < pkt_size) {
528                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
529                         return -E2BIG;
530                 }
531         }
532 #endif
533         return 0;
534 }
535
536 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
537                     const struct iphdr *tnl_params)
538 {
539         struct ip_tunnel *tunnel = netdev_priv(dev);
540         const struct iphdr *inner_iph;
541         struct iphdr *iph;
542         struct flowi4 fl4;
543         u8     tos, ttl;
544         __be16 df;
545         struct rtable *rt;              /* Route to the other host */
546         struct net_device *tdev;        /* Device to other host */
547         unsigned int max_headroom;      /* The extra header space needed */
548         __be32 dst;
549
550         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
551
552         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
553         dst = tnl_params->daddr;
554         if (dst == 0) {
555                 /* NBMA tunnel */
556
557                 if (skb_dst(skb) == NULL) {
558                         dev->stats.tx_fifo_errors++;
559                         goto tx_error;
560                 }
561
562                 if (skb->protocol == htons(ETH_P_IP)) {
563                         rt = skb_rtable(skb);
564                         dst = rt_nexthop(rt, inner_iph->daddr);
565                 }
566 #if IS_ENABLED(CONFIG_IPV6)
567                 else if (skb->protocol == htons(ETH_P_IPV6)) {
568                         const struct in6_addr *addr6;
569                         struct neighbour *neigh;
570                         bool do_tx_error_icmp;
571                         int addr_type;
572
573                         neigh = dst_neigh_lookup(skb_dst(skb),
574                                                  &ipv6_hdr(skb)->daddr);
575                         if (neigh == NULL)
576                                 goto tx_error;
577
578                         addr6 = (const struct in6_addr *)&neigh->primary_key;
579                         addr_type = ipv6_addr_type(addr6);
580
581                         if (addr_type == IPV6_ADDR_ANY) {
582                                 addr6 = &ipv6_hdr(skb)->daddr;
583                                 addr_type = ipv6_addr_type(addr6);
584                         }
585
586                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
587                                 do_tx_error_icmp = true;
588                         else {
589                                 do_tx_error_icmp = false;
590                                 dst = addr6->s6_addr32[3];
591                         }
592                         neigh_release(neigh);
593                         if (do_tx_error_icmp)
594                                 goto tx_error_icmp;
595                 }
596 #endif
597                 else
598                         goto tx_error;
599         }
600
601         tos = tnl_params->tos;
602         if (tos & 0x1) {
603                 tos &= ~0x1;
604                 if (skb->protocol == htons(ETH_P_IP))
605                         tos = inner_iph->tos;
606                 else if (skb->protocol == htons(ETH_P_IPV6))
607                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
608         }
609
610         rt = ip_route_output_tunnel(dev_net(dev), &fl4,
611                                     tunnel->parms.iph.protocol,
612                                     dst, tnl_params->saddr,
613                                     tunnel->parms.o_key,
614                                     RT_TOS(tos),
615                                     tunnel->parms.link);
616         if (IS_ERR(rt)) {
617                 dev->stats.tx_carrier_errors++;
618                 goto tx_error;
619         }
620         tdev = rt->dst.dev;
621
622         if (tdev == dev) {
623                 ip_rt_put(rt);
624                 dev->stats.collisions++;
625                 goto tx_error;
626         }
627
628
629         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
630                 ip_rt_put(rt);
631                 goto tx_error;
632         }
633
634         if (tunnel->err_count > 0) {
635                 if (time_before(jiffies,
636                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
637                         tunnel->err_count--;
638
639                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
640                         dst_link_failure(skb);
641                 } else
642                         tunnel->err_count = 0;
643         }
644
645         ttl = tnl_params->ttl;
646         if (ttl == 0) {
647                 if (skb->protocol == htons(ETH_P_IP))
648                         ttl = inner_iph->ttl;
649 #if IS_ENABLED(CONFIG_IPV6)
650                 else if (skb->protocol == htons(ETH_P_IPV6))
651                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
652 #endif
653                 else
654                         ttl = ip4_dst_hoplimit(&rt->dst);
655         }
656
657         df = tnl_params->frag_off;
658         if (skb->protocol == htons(ETH_P_IP))
659                 df |= (inner_iph->frag_off&htons(IP_DF));
660
661         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
662                                                + rt->dst.header_len;
663         if (max_headroom > dev->needed_headroom)
664                 dev->needed_headroom = max_headroom;
665
666         if (skb_cow_head(skb, dev->needed_headroom)) {
667                 dev->stats.tx_dropped++;
668                 dev_kfree_skb(skb);
669                 return;
670         }
671
672         skb_dst_drop(skb);
673         skb_dst_set(skb, &rt->dst);
674
675         /* Push down and install the IP header. */
676         skb_push(skb, sizeof(struct iphdr));
677         skb_reset_network_header(skb);
678
679         iph = ip_hdr(skb);
680         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
681
682         iph->version    =       4;
683         iph->ihl        =       sizeof(struct iphdr) >> 2;
684         iph->frag_off   =       df;
685         iph->protocol   =       tnl_params->protocol;
686         iph->tos        =       ip_tunnel_ecn_encap(tos, inner_iph, skb);
687         iph->daddr      =       fl4.daddr;
688         iph->saddr      =       fl4.saddr;
689         iph->ttl        =       ttl;
690         __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
691
692         iptunnel_xmit(skb, dev);
693         return;
694
695 #if IS_ENABLED(CONFIG_IPV6)
696 tx_error_icmp:
697         dst_link_failure(skb);
698 #endif
699 tx_error:
700         dev->stats.tx_errors++;
701         dev_kfree_skb(skb);
702 }
703 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
704
705 static void ip_tunnel_update(struct ip_tunnel_net *itn,
706                              struct ip_tunnel *t,
707                              struct net_device *dev,
708                              struct ip_tunnel_parm *p,
709                              bool set_mtu)
710 {
711         ip_tunnel_del(t);
712         t->parms.iph.saddr = p->iph.saddr;
713         t->parms.iph.daddr = p->iph.daddr;
714         t->parms.i_key = p->i_key;
715         t->parms.o_key = p->o_key;
716         if (dev->type != ARPHRD_ETHER) {
717                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
718                 memcpy(dev->broadcast, &p->iph.daddr, 4);
719         }
720         ip_tunnel_add(itn, t);
721
722         t->parms.iph.ttl = p->iph.ttl;
723         t->parms.iph.tos = p->iph.tos;
724         t->parms.iph.frag_off = p->iph.frag_off;
725
726         if (t->parms.link != p->link) {
727                 int mtu;
728
729                 t->parms.link = p->link;
730                 mtu = ip_tunnel_bind_dev(dev);
731                 if (set_mtu)
732                         dev->mtu = mtu;
733         }
734         netdev_state_change(dev);
735 }
736
737 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
738 {
739         int err = 0;
740         struct ip_tunnel *t;
741         struct net *net = dev_net(dev);
742         struct ip_tunnel *tunnel = netdev_priv(dev);
743         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
744
745         BUG_ON(!itn->fb_tunnel_dev);
746         switch (cmd) {
747         case SIOCGETTUNNEL:
748                 t = NULL;
749                 if (dev == itn->fb_tunnel_dev)
750                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
751                 if (t == NULL)
752                         t = netdev_priv(dev);
753                 memcpy(p, &t->parms, sizeof(*p));
754                 break;
755
756         case SIOCADDTUNNEL:
757         case SIOCCHGTUNNEL:
758                 err = -EPERM;
759                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
760                         goto done;
761                 if (p->iph.ttl)
762                         p->iph.frag_off |= htons(IP_DF);
763                 if (!(p->i_flags&TUNNEL_KEY))
764                         p->i_key = 0;
765                 if (!(p->o_flags&TUNNEL_KEY))
766                         p->o_key = 0;
767
768                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
769
770                 if (!t && (cmd == SIOCADDTUNNEL))
771                         t = ip_tunnel_create(net, itn, p);
772
773                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
774                         if (t != NULL) {
775                                 if (t->dev != dev) {
776                                         err = -EEXIST;
777                                         break;
778                                 }
779                         } else {
780                                 unsigned int nflags = 0;
781
782                                 if (ipv4_is_multicast(p->iph.daddr))
783                                         nflags = IFF_BROADCAST;
784                                 else if (p->iph.daddr)
785                                         nflags = IFF_POINTOPOINT;
786
787                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
788                                         err = -EINVAL;
789                                         break;
790                                 }
791
792                                 t = netdev_priv(dev);
793                         }
794                 }
795
796                 if (t) {
797                         err = 0;
798                         ip_tunnel_update(itn, t, dev, p, true);
799                 } else
800                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
801                 break;
802
803         case SIOCDELTUNNEL:
804                 err = -EPERM;
805                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
806                         goto done;
807
808                 if (dev == itn->fb_tunnel_dev) {
809                         err = -ENOENT;
810                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
811                         if (t == NULL)
812                                 goto done;
813                         err = -EPERM;
814                         if (t == netdev_priv(itn->fb_tunnel_dev))
815                                 goto done;
816                         dev = t->dev;
817                 }
818                 unregister_netdevice(dev);
819                 err = 0;
820                 break;
821
822         default:
823                 err = -EINVAL;
824         }
825
826 done:
827         return err;
828 }
829 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
830
831 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
832 {
833         struct ip_tunnel *tunnel = netdev_priv(dev);
834         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
835
836         if (new_mtu < 68 ||
837             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
838                 return -EINVAL;
839         dev->mtu = new_mtu;
840         return 0;
841 }
842 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
843
844 static void ip_tunnel_dev_free(struct net_device *dev)
845 {
846         struct ip_tunnel *tunnel = netdev_priv(dev);
847
848         gro_cells_destroy(&tunnel->gro_cells);
849         free_percpu(dev->tstats);
850         free_netdev(dev);
851 }
852
853 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
854 {
855         struct net *net = dev_net(dev);
856         struct ip_tunnel *tunnel = netdev_priv(dev);
857         struct ip_tunnel_net *itn;
858
859         itn = net_generic(net, tunnel->ip_tnl_net_id);
860
861         if (itn->fb_tunnel_dev != dev) {
862                 ip_tunnel_del(netdev_priv(dev));
863                 unregister_netdevice_queue(dev, head);
864         }
865 }
866 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
867
868 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
869                                   struct rtnl_link_ops *ops, char *devname)
870 {
871         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
872         struct ip_tunnel_parm parms;
873
874         itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
875         if (!itn->tunnels)
876                 return -ENOMEM;
877
878         if (!ops) {
879                 itn->fb_tunnel_dev = NULL;
880                 return 0;
881         }
882         memset(&parms, 0, sizeof(parms));
883         if (devname)
884                 strlcpy(parms.name, devname, IFNAMSIZ);
885
886         rtnl_lock();
887         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
888         rtnl_unlock();
889         if (IS_ERR(itn->fb_tunnel_dev)) {
890                 kfree(itn->tunnels);
891                 return PTR_ERR(itn->fb_tunnel_dev);
892         }
893
894         return 0;
895 }
896 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
897
898 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
899 {
900         int h;
901
902         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
903                 struct ip_tunnel *t;
904                 struct hlist_node *n;
905                 struct hlist_head *thead = &itn->tunnels[h];
906
907                 hlist_for_each_entry_safe(t, n, thead, hash_node)
908                         unregister_netdevice_queue(t->dev, head);
909         }
910         if (itn->fb_tunnel_dev)
911                 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
912 }
913
914 void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
915 {
916         LIST_HEAD(list);
917
918         rtnl_lock();
919         ip_tunnel_destroy(itn, &list);
920         unregister_netdevice_many(&list);
921         rtnl_unlock();
922         kfree(itn->tunnels);
923 }
924 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
925
926 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
927                       struct ip_tunnel_parm *p)
928 {
929         struct ip_tunnel *nt;
930         struct net *net = dev_net(dev);
931         struct ip_tunnel_net *itn;
932         int mtu;
933         int err;
934
935         nt = netdev_priv(dev);
936         itn = net_generic(net, nt->ip_tnl_net_id);
937
938         if (ip_tunnel_find(itn, p, dev->type))
939                 return -EEXIST;
940
941         nt->parms = *p;
942         err = register_netdevice(dev);
943         if (err)
944                 goto out;
945
946         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
947                 eth_hw_addr_random(dev);
948
949         mtu = ip_tunnel_bind_dev(dev);
950         if (!tb[IFLA_MTU])
951                 dev->mtu = mtu;
952
953         ip_tunnel_add(itn, nt);
954
955 out:
956         return err;
957 }
958 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
959
960 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
961                          struct ip_tunnel_parm *p)
962 {
963         struct ip_tunnel *t, *nt;
964         struct net *net = dev_net(dev);
965         struct ip_tunnel *tunnel = netdev_priv(dev);
966         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
967
968         if (dev == itn->fb_tunnel_dev)
969                 return -EINVAL;
970
971         nt = netdev_priv(dev);
972
973         t = ip_tunnel_find(itn, p, dev->type);
974
975         if (t) {
976                 if (t->dev != dev)
977                         return -EEXIST;
978         } else {
979                 t = nt;
980
981                 if (dev->type != ARPHRD_ETHER) {
982                         unsigned int nflags = 0;
983
984                         if (ipv4_is_multicast(p->iph.daddr))
985                                 nflags = IFF_BROADCAST;
986                         else if (p->iph.daddr)
987                                 nflags = IFF_POINTOPOINT;
988
989                         if ((dev->flags ^ nflags) &
990                             (IFF_POINTOPOINT | IFF_BROADCAST))
991                                 return -EINVAL;
992                 }
993         }
994
995         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
996         return 0;
997 }
998 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
999
1000 int ip_tunnel_init(struct net_device *dev)
1001 {
1002         struct ip_tunnel *tunnel = netdev_priv(dev);
1003         struct iphdr *iph = &tunnel->parms.iph;
1004         int err;
1005
1006         dev->destructor = ip_tunnel_dev_free;
1007         dev->tstats = alloc_percpu(struct pcpu_tstats);
1008         if (!dev->tstats)
1009                 return -ENOMEM;
1010
1011         err = gro_cells_init(&tunnel->gro_cells, dev);
1012         if (err) {
1013                 free_percpu(dev->tstats);
1014                 return err;
1015         }
1016
1017         tunnel->dev = dev;
1018         strcpy(tunnel->parms.name, dev->name);
1019         iph->version            = 4;
1020         iph->ihl                = 5;
1021
1022         return 0;
1023 }
1024 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1025
1026 void ip_tunnel_uninit(struct net_device *dev)
1027 {
1028         struct net *net = dev_net(dev);
1029         struct ip_tunnel *tunnel = netdev_priv(dev);
1030         struct ip_tunnel_net *itn;
1031
1032         itn = net_generic(net, tunnel->ip_tnl_net_id);
1033         /* fb_tunnel_dev will be unregisted in net-exit call. */
1034         if (itn->fb_tunnel_dev != dev)
1035                 ip_tunnel_del(netdev_priv(dev));
1036 }
1037 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1038
1039 /* Do least required initialization, rest of init is done in tunnel_init call */
1040 void ip_tunnel_setup(struct net_device *dev, int net_id)
1041 {
1042         struct ip_tunnel *tunnel = netdev_priv(dev);
1043         tunnel->ip_tnl_net_id = net_id;
1044 }
1045 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1046
1047 MODULE_LICENSE("GPL");