net: convert __IPTUNNEL_XMIT() to an inline function
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142
143 /*
144  * Locking : hash tables are protected by RCU and RTNL
145  */
146
147 #define for_each_ip_tunnel_rcu(start) \
148         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
149
150 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
151                                                   struct rtnl_link_stats64 *tot)
152 {
153         int i;
154
155         for_each_possible_cpu(i) {
156                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
157                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
158                 unsigned int start;
159
160                 do {
161                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
162                         rx_packets = tstats->rx_packets;
163                         tx_packets = tstats->tx_packets;
164                         rx_bytes = tstats->rx_bytes;
165                         tx_bytes = tstats->tx_bytes;
166                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
167
168                 tot->rx_packets += rx_packets;
169                 tot->tx_packets += tx_packets;
170                 tot->rx_bytes   += rx_bytes;
171                 tot->tx_bytes   += tx_bytes;
172         }
173
174         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
175         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
176         tot->tx_dropped = dev->stats.tx_dropped;
177         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
178         tot->tx_errors = dev->stats.tx_errors;
179         tot->collisions = dev->stats.collisions;
180
181         return tot;
182 }
183
184 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
185                 __be32 remote, __be32 local)
186 {
187         unsigned int h0 = HASH(remote);
188         unsigned int h1 = HASH(local);
189         struct ip_tunnel *t;
190         struct ipip_net *ipn = net_generic(net, ipip_net_id);
191
192         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
193                 if (local == t->parms.iph.saddr &&
194                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
195                         return t;
196
197         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
198                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
199                         return t;
200
201         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
202                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
203                         return t;
204
205         t = rcu_dereference(ipn->tunnels_wc[0]);
206         if (t && (t->dev->flags&IFF_UP))
207                 return t;
208         return NULL;
209 }
210
211 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
212                 struct ip_tunnel_parm *parms)
213 {
214         __be32 remote = parms->iph.daddr;
215         __be32 local = parms->iph.saddr;
216         unsigned int h = 0;
217         int prio = 0;
218
219         if (remote) {
220                 prio |= 2;
221                 h ^= HASH(remote);
222         }
223         if (local) {
224                 prio |= 1;
225                 h ^= HASH(local);
226         }
227         return &ipn->tunnels[prio][h];
228 }
229
230 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
231                 struct ip_tunnel *t)
232 {
233         return __ipip_bucket(ipn, &t->parms);
234 }
235
236 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
237 {
238         struct ip_tunnel __rcu **tp;
239         struct ip_tunnel *iter;
240
241         for (tp = ipip_bucket(ipn, t);
242              (iter = rtnl_dereference(*tp)) != NULL;
243              tp = &iter->next) {
244                 if (t == iter) {
245                         rcu_assign_pointer(*tp, t->next);
246                         break;
247                 }
248         }
249 }
250
251 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
252 {
253         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
254
255         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
256         rcu_assign_pointer(*tp, t);
257 }
258
259 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
260                 struct ip_tunnel_parm *parms, int create)
261 {
262         __be32 remote = parms->iph.daddr;
263         __be32 local = parms->iph.saddr;
264         struct ip_tunnel *t, *nt;
265         struct ip_tunnel __rcu **tp;
266         struct net_device *dev;
267         char name[IFNAMSIZ];
268         struct ipip_net *ipn = net_generic(net, ipip_net_id);
269
270         for (tp = __ipip_bucket(ipn, parms);
271                  (t = rtnl_dereference(*tp)) != NULL;
272                  tp = &t->next) {
273                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
274                         return t;
275         }
276         if (!create)
277                 return NULL;
278
279         if (parms->name[0])
280                 strlcpy(name, parms->name, IFNAMSIZ);
281         else
282                 strcpy(name, "tunl%d");
283
284         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
285         if (dev == NULL)
286                 return NULL;
287
288         dev_net_set(dev, net);
289
290         nt = netdev_priv(dev);
291         nt->parms = *parms;
292
293         if (ipip_tunnel_init(dev) < 0)
294                 goto failed_free;
295
296         if (register_netdevice(dev) < 0)
297                 goto failed_free;
298
299         strcpy(nt->parms.name, dev->name);
300         dev->rtnl_link_ops = &ipip_link_ops;
301
302         dev_hold(dev);
303         ipip_tunnel_link(ipn, nt);
304         return nt;
305
306 failed_free:
307         ipip_dev_free(dev);
308         return NULL;
309 }
310
311 /* called with RTNL */
312 static void ipip_tunnel_uninit(struct net_device *dev)
313 {
314         struct net *net = dev_net(dev);
315         struct ipip_net *ipn = net_generic(net, ipip_net_id);
316
317         if (dev == ipn->fb_tunnel_dev)
318                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
319         else
320                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
321         dev_put(dev);
322 }
323
324 static int ipip_err(struct sk_buff *skb, u32 info)
325 {
326
327 /* All the routers (except for Linux) return only
328    8 bytes of packet payload. It means, that precise relaying of
329    ICMP in the real Internet is absolutely infeasible.
330  */
331         const struct iphdr *iph = (const struct iphdr *)skb->data;
332         const int type = icmp_hdr(skb)->type;
333         const int code = icmp_hdr(skb)->code;
334         struct ip_tunnel *t;
335         int err;
336
337         switch (type) {
338         default:
339         case ICMP_PARAMETERPROB:
340                 return 0;
341
342         case ICMP_DEST_UNREACH:
343                 switch (code) {
344                 case ICMP_SR_FAILED:
345                 case ICMP_PORT_UNREACH:
346                         /* Impossible event. */
347                         return 0;
348                 default:
349                         /* All others are translated to HOST_UNREACH.
350                            rfc2003 contains "deep thoughts" about NET_UNREACH,
351                            I believe they are just ether pollution. --ANK
352                          */
353                         break;
354                 }
355                 break;
356         case ICMP_TIME_EXCEEDED:
357                 if (code != ICMP_EXC_TTL)
358                         return 0;
359                 break;
360         case ICMP_REDIRECT:
361                 break;
362         }
363
364         err = -ENOENT;
365         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
366         if (t == NULL)
367                 goto out;
368
369         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
370                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
371                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
372                 err = 0;
373                 goto out;
374         }
375
376         if (type == ICMP_REDIRECT) {
377                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
378                               IPPROTO_IPIP, 0);
379                 err = 0;
380                 goto out;
381         }
382
383         if (t->parms.iph.daddr == 0)
384                 goto out;
385
386         err = 0;
387         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
388                 goto out;
389
390         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
391                 t->err_count++;
392         else
393                 t->err_count = 1;
394         t->err_time = jiffies;
395 out:
396
397         return err;
398 }
399
400 static int ipip_rcv(struct sk_buff *skb)
401 {
402         struct ip_tunnel *tunnel;
403         const struct iphdr *iph = ip_hdr(skb);
404         int err;
405
406         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
407         if (tunnel != NULL) {
408                 struct pcpu_tstats *tstats;
409
410                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
411                         goto drop;
412
413                 secpath_reset(skb);
414
415                 skb->mac_header = skb->network_header;
416                 skb_reset_network_header(skb);
417                 skb->protocol = htons(ETH_P_IP);
418                 skb->pkt_type = PACKET_HOST;
419
420                 __skb_tunnel_rx(skb, tunnel->dev);
421
422                 err = IP_ECN_decapsulate(iph, skb);
423                 if (unlikely(err)) {
424                         if (log_ecn_error)
425                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
426                                                      &iph->saddr, iph->tos);
427                         if (err > 1) {
428                                 ++tunnel->dev->stats.rx_frame_errors;
429                                 ++tunnel->dev->stats.rx_errors;
430                                 goto drop;
431                         }
432                 }
433
434                 tstats = this_cpu_ptr(tunnel->dev->tstats);
435                 u64_stats_update_begin(&tstats->syncp);
436                 tstats->rx_packets++;
437                 tstats->rx_bytes += skb->len;
438                 u64_stats_update_end(&tstats->syncp);
439
440                 netif_rx(skb);
441                 return 0;
442         }
443
444         return -1;
445
446 drop:
447         kfree_skb(skb);
448         return 0;
449 }
450
451 /*
452  *      This function assumes it is being called from dev_queue_xmit()
453  *      and that skb is filled properly by that function.
454  */
455
456 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
457 {
458         struct ip_tunnel *tunnel = netdev_priv(dev);
459         const struct iphdr  *tiph = &tunnel->parms.iph;
460         u8     tos = tunnel->parms.iph.tos;
461         __be16 df = tiph->frag_off;
462         struct rtable *rt;                      /* Route to the other host */
463         struct net_device *tdev;                /* Device to other host */
464         const struct iphdr  *old_iph = ip_hdr(skb);
465         struct iphdr  *iph;                     /* Our new IP header */
466         unsigned int max_headroom;              /* The extra header space needed */
467         __be32 dst = tiph->daddr;
468         struct flowi4 fl4;
469         int    mtu;
470
471         if (skb->protocol != htons(ETH_P_IP))
472                 goto tx_error;
473
474         if (skb->ip_summed == CHECKSUM_PARTIAL &&
475             skb_checksum_help(skb))
476                 goto tx_error;
477
478         if (tos & 1)
479                 tos = old_iph->tos;
480
481         if (!dst) {
482                 /* NBMA tunnel */
483                 if ((rt = skb_rtable(skb)) == NULL) {
484                         dev->stats.tx_fifo_errors++;
485                         goto tx_error;
486                 }
487                 dst = rt_nexthop(rt, old_iph->daddr);
488         }
489
490         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
491                                    dst, tiph->saddr,
492                                    0, 0,
493                                    IPPROTO_IPIP, RT_TOS(tos),
494                                    tunnel->parms.link);
495         if (IS_ERR(rt)) {
496                 dev->stats.tx_carrier_errors++;
497                 goto tx_error_icmp;
498         }
499         tdev = rt->dst.dev;
500
501         if (tdev == dev) {
502                 ip_rt_put(rt);
503                 dev->stats.collisions++;
504                 goto tx_error;
505         }
506
507         df |= old_iph->frag_off & htons(IP_DF);
508
509         if (df) {
510                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
511
512                 if (mtu < 68) {
513                         dev->stats.collisions++;
514                         ip_rt_put(rt);
515                         goto tx_error;
516                 }
517
518                 if (skb_dst(skb))
519                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
520
521                 if ((old_iph->frag_off & htons(IP_DF)) &&
522                     mtu < ntohs(old_iph->tot_len)) {
523                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
524                                   htonl(mtu));
525                         ip_rt_put(rt);
526                         goto tx_error;
527                 }
528         }
529
530         if (tunnel->err_count > 0) {
531                 if (time_before(jiffies,
532                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
533                         tunnel->err_count--;
534                         dst_link_failure(skb);
535                 } else
536                         tunnel->err_count = 0;
537         }
538
539         /*
540          * Okay, now see if we can stuff it in the buffer as-is.
541          */
542         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
543
544         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
545             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
546                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
547                 if (!new_skb) {
548                         ip_rt_put(rt);
549                         dev->stats.tx_dropped++;
550                         dev_kfree_skb(skb);
551                         return NETDEV_TX_OK;
552                 }
553                 if (skb->sk)
554                         skb_set_owner_w(new_skb, skb->sk);
555                 dev_kfree_skb(skb);
556                 skb = new_skb;
557                 old_iph = ip_hdr(skb);
558         }
559
560         skb->transport_header = skb->network_header;
561         skb_push(skb, sizeof(struct iphdr));
562         skb_reset_network_header(skb);
563         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
564         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
565                               IPSKB_REROUTED);
566         skb_dst_drop(skb);
567         skb_dst_set(skb, &rt->dst);
568
569         /*
570          *      Push down and install the IPIP header.
571          */
572
573         iph                     =       ip_hdr(skb);
574         iph->version            =       4;
575         iph->ihl                =       sizeof(struct iphdr)>>2;
576         iph->frag_off           =       df;
577         iph->protocol           =       IPPROTO_IPIP;
578         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
579         iph->daddr              =       fl4.daddr;
580         iph->saddr              =       fl4.saddr;
581
582         if ((iph->ttl = tiph->ttl) == 0)
583                 iph->ttl        =       old_iph->ttl;
584
585         iptunnel_xmit(skb, dev);
586         return NETDEV_TX_OK;
587
588 tx_error_icmp:
589         dst_link_failure(skb);
590 tx_error:
591         dev->stats.tx_errors++;
592         dev_kfree_skb(skb);
593         return NETDEV_TX_OK;
594 }
595
596 static void ipip_tunnel_bind_dev(struct net_device *dev)
597 {
598         struct net_device *tdev = NULL;
599         struct ip_tunnel *tunnel;
600         const struct iphdr *iph;
601
602         tunnel = netdev_priv(dev);
603         iph = &tunnel->parms.iph;
604
605         if (iph->daddr) {
606                 struct rtable *rt;
607                 struct flowi4 fl4;
608
609                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
610                                            iph->daddr, iph->saddr,
611                                            0, 0,
612                                            IPPROTO_IPIP,
613                                            RT_TOS(iph->tos),
614                                            tunnel->parms.link);
615                 if (!IS_ERR(rt)) {
616                         tdev = rt->dst.dev;
617                         ip_rt_put(rt);
618                 }
619                 dev->flags |= IFF_POINTOPOINT;
620         }
621
622         if (!tdev && tunnel->parms.link)
623                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
624
625         if (tdev) {
626                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
627                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
628         }
629         dev->iflink = tunnel->parms.link;
630 }
631
632 static int
633 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
634 {
635         int err = 0;
636         struct ip_tunnel_parm p;
637         struct ip_tunnel *t;
638         struct net *net = dev_net(dev);
639         struct ipip_net *ipn = net_generic(net, ipip_net_id);
640
641         switch (cmd) {
642         case SIOCGETTUNNEL:
643                 t = NULL;
644                 if (dev == ipn->fb_tunnel_dev) {
645                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
646                                 err = -EFAULT;
647                                 break;
648                         }
649                         t = ipip_tunnel_locate(net, &p, 0);
650                 }
651                 if (t == NULL)
652                         t = netdev_priv(dev);
653                 memcpy(&p, &t->parms, sizeof(p));
654                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
655                         err = -EFAULT;
656                 break;
657
658         case SIOCADDTUNNEL:
659         case SIOCCHGTUNNEL:
660                 err = -EPERM;
661                 if (!capable(CAP_NET_ADMIN))
662                         goto done;
663
664                 err = -EFAULT;
665                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
666                         goto done;
667
668                 err = -EINVAL;
669                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
670                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
671                         goto done;
672                 if (p.iph.ttl)
673                         p.iph.frag_off |= htons(IP_DF);
674
675                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
676
677                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
678                         if (t != NULL) {
679                                 if (t->dev != dev) {
680                                         err = -EEXIST;
681                                         break;
682                                 }
683                         } else {
684                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
685                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
686                                         err = -EINVAL;
687                                         break;
688                                 }
689                                 t = netdev_priv(dev);
690                                 ipip_tunnel_unlink(ipn, t);
691                                 synchronize_net();
692                                 t->parms.iph.saddr = p.iph.saddr;
693                                 t->parms.iph.daddr = p.iph.daddr;
694                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
695                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
696                                 ipip_tunnel_link(ipn, t);
697                                 netdev_state_change(dev);
698                         }
699                 }
700
701                 if (t) {
702                         err = 0;
703                         if (cmd == SIOCCHGTUNNEL) {
704                                 t->parms.iph.ttl = p.iph.ttl;
705                                 t->parms.iph.tos = p.iph.tos;
706                                 t->parms.iph.frag_off = p.iph.frag_off;
707                                 if (t->parms.link != p.link) {
708                                         t->parms.link = p.link;
709                                         ipip_tunnel_bind_dev(dev);
710                                         netdev_state_change(dev);
711                                 }
712                         }
713                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
714                                 err = -EFAULT;
715                 } else
716                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
717                 break;
718
719         case SIOCDELTUNNEL:
720                 err = -EPERM;
721                 if (!capable(CAP_NET_ADMIN))
722                         goto done;
723
724                 if (dev == ipn->fb_tunnel_dev) {
725                         err = -EFAULT;
726                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
727                                 goto done;
728                         err = -ENOENT;
729                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
730                                 goto done;
731                         err = -EPERM;
732                         if (t->dev == ipn->fb_tunnel_dev)
733                                 goto done;
734                         dev = t->dev;
735                 }
736                 unregister_netdevice(dev);
737                 err = 0;
738                 break;
739
740         default:
741                 err = -EINVAL;
742         }
743
744 done:
745         return err;
746 }
747
748 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
749 {
750         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
751                 return -EINVAL;
752         dev->mtu = new_mtu;
753         return 0;
754 }
755
756 static const struct net_device_ops ipip_netdev_ops = {
757         .ndo_uninit     = ipip_tunnel_uninit,
758         .ndo_start_xmit = ipip_tunnel_xmit,
759         .ndo_do_ioctl   = ipip_tunnel_ioctl,
760         .ndo_change_mtu = ipip_tunnel_change_mtu,
761         .ndo_get_stats64 = ipip_get_stats64,
762 };
763
764 static void ipip_dev_free(struct net_device *dev)
765 {
766         free_percpu(dev->tstats);
767         free_netdev(dev);
768 }
769
770 #define IPIP_FEATURES (NETIF_F_SG |             \
771                        NETIF_F_FRAGLIST |       \
772                        NETIF_F_HIGHDMA |        \
773                        NETIF_F_HW_CSUM)
774
775 static void ipip_tunnel_setup(struct net_device *dev)
776 {
777         dev->netdev_ops         = &ipip_netdev_ops;
778         dev->destructor         = ipip_dev_free;
779
780         dev->type               = ARPHRD_TUNNEL;
781         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
782         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
783         dev->flags              = IFF_NOARP;
784         dev->iflink             = 0;
785         dev->addr_len           = 4;
786         dev->features           |= NETIF_F_NETNS_LOCAL;
787         dev->features           |= NETIF_F_LLTX;
788         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
789
790         dev->features           |= IPIP_FEATURES;
791         dev->hw_features        |= IPIP_FEATURES;
792 }
793
794 static int ipip_tunnel_init(struct net_device *dev)
795 {
796         struct ip_tunnel *tunnel = netdev_priv(dev);
797
798         tunnel->dev = dev;
799
800         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
801         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
802
803         ipip_tunnel_bind_dev(dev);
804
805         dev->tstats = alloc_percpu(struct pcpu_tstats);
806         if (!dev->tstats)
807                 return -ENOMEM;
808
809         return 0;
810 }
811
812 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
813 {
814         struct ip_tunnel *tunnel = netdev_priv(dev);
815         struct iphdr *iph = &tunnel->parms.iph;
816         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
817
818         tunnel->dev = dev;
819         strcpy(tunnel->parms.name, dev->name);
820
821         iph->version            = 4;
822         iph->protocol           = IPPROTO_IPIP;
823         iph->ihl                = 5;
824
825         dev->tstats = alloc_percpu(struct pcpu_tstats);
826         if (!dev->tstats)
827                 return -ENOMEM;
828
829         dev_hold(dev);
830         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
831         return 0;
832 }
833
834 static size_t ipip_get_size(const struct net_device *dev)
835 {
836         return
837                 /* IFLA_IPTUN_LINK */
838                 nla_total_size(4) +
839                 /* IFLA_IPTUN_LOCAL */
840                 nla_total_size(4) +
841                 /* IFLA_IPTUN_REMOTE */
842                 nla_total_size(4) +
843                 /* IFLA_IPTUN_TTL */
844                 nla_total_size(1) +
845                 /* IFLA_IPTUN_TOS */
846                 nla_total_size(1) +
847                 0;
848 }
849
850 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
851 {
852         struct ip_tunnel *tunnel = netdev_priv(dev);
853         struct ip_tunnel_parm *parm = &tunnel->parms;
854
855         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
856             nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
857             nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
858             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
859             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos))
860                 goto nla_put_failure;
861         return 0;
862
863 nla_put_failure:
864         return -EMSGSIZE;
865 }
866
867 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
868         .kind           = "ipip",
869         .maxtype        = IFLA_IPTUN_MAX,
870         .priv_size      = sizeof(struct ip_tunnel),
871         .get_size       = ipip_get_size,
872         .fill_info      = ipip_fill_info,
873 };
874
875 static struct xfrm_tunnel ipip_handler __read_mostly = {
876         .handler        =       ipip_rcv,
877         .err_handler    =       ipip_err,
878         .priority       =       1,
879 };
880
881 static const char banner[] __initconst =
882         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
883
884 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
885 {
886         int prio;
887
888         for (prio = 1; prio < 4; prio++) {
889                 int h;
890                 for (h = 0; h < HASH_SIZE; h++) {
891                         struct ip_tunnel *t;
892
893                         t = rtnl_dereference(ipn->tunnels[prio][h]);
894                         while (t != NULL) {
895                                 unregister_netdevice_queue(t->dev, head);
896                                 t = rtnl_dereference(t->next);
897                         }
898                 }
899         }
900 }
901
902 static int __net_init ipip_init_net(struct net *net)
903 {
904         struct ipip_net *ipn = net_generic(net, ipip_net_id);
905         struct ip_tunnel *t;
906         int err;
907
908         ipn->tunnels[0] = ipn->tunnels_wc;
909         ipn->tunnels[1] = ipn->tunnels_l;
910         ipn->tunnels[2] = ipn->tunnels_r;
911         ipn->tunnels[3] = ipn->tunnels_r_l;
912
913         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
914                                            "tunl0",
915                                            ipip_tunnel_setup);
916         if (!ipn->fb_tunnel_dev) {
917                 err = -ENOMEM;
918                 goto err_alloc_dev;
919         }
920         dev_net_set(ipn->fb_tunnel_dev, net);
921
922         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
923         if (err)
924                 goto err_reg_dev;
925
926         if ((err = register_netdev(ipn->fb_tunnel_dev)))
927                 goto err_reg_dev;
928
929         t = netdev_priv(ipn->fb_tunnel_dev);
930
931         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
932         return 0;
933
934 err_reg_dev:
935         ipip_dev_free(ipn->fb_tunnel_dev);
936 err_alloc_dev:
937         /* nothing */
938         return err;
939 }
940
941 static void __net_exit ipip_exit_net(struct net *net)
942 {
943         struct ipip_net *ipn = net_generic(net, ipip_net_id);
944         LIST_HEAD(list);
945
946         rtnl_lock();
947         ipip_destroy_tunnels(ipn, &list);
948         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
949         unregister_netdevice_many(&list);
950         rtnl_unlock();
951 }
952
953 static struct pernet_operations ipip_net_ops = {
954         .init = ipip_init_net,
955         .exit = ipip_exit_net,
956         .id   = &ipip_net_id,
957         .size = sizeof(struct ipip_net),
958 };
959
960 static int __init ipip_init(void)
961 {
962         int err;
963
964         printk(banner);
965
966         err = register_pernet_device(&ipip_net_ops);
967         if (err < 0)
968                 return err;
969         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
970         if (err < 0) {
971                 pr_info("%s: can't register tunnel\n", __func__);
972                 goto xfrm_tunnel_failed;
973         }
974         err = rtnl_link_register(&ipip_link_ops);
975         if (err < 0)
976                 goto rtnl_link_failed;
977
978 out:
979         return err;
980
981 rtnl_link_failed:
982         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
983 xfrm_tunnel_failed:
984         unregister_pernet_device(&ipip_net_ops);
985         goto out;
986 }
987
988 static void __exit ipip_fini(void)
989 {
990         rtnl_link_unregister(&ipip_link_ops);
991         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
992                 pr_info("%s: can't deregister tunnel\n", __func__);
993
994         unregister_pernet_device(&ipip_net_ops);
995 }
996
997 module_init(ipip_init);
998 module_exit(ipip_fini);
999 MODULE_LICENSE("GPL");
1000 MODULE_ALIAS_NETDEV("tunl0");