Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph...
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128         struct ip_tunnel __rcu *tunnels_wc[1];
129         struct ip_tunnel __rcu **tunnels[4];
130
131         struct net_device *fb_tunnel_dev;
132 };
133
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141
142 #define for_each_ip_tunnel_rcu(start) \
143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147         unsigned long   rx_packets;
148         unsigned long   rx_bytes;
149         unsigned long   tx_packets;
150         unsigned long   tx_bytes;
151 } __attribute__((aligned(4*sizeof(unsigned long))));
152
153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154 {
155         struct pcpu_tstats sum = { 0 };
156         int i;
157
158         for_each_possible_cpu(i) {
159                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160
161                 sum.rx_packets += tstats->rx_packets;
162                 sum.rx_bytes   += tstats->rx_bytes;
163                 sum.tx_packets += tstats->tx_packets;
164                 sum.tx_bytes   += tstats->tx_bytes;
165         }
166         dev->stats.rx_packets = sum.rx_packets;
167         dev->stats.rx_bytes   = sum.rx_bytes;
168         dev->stats.tx_packets = sum.tx_packets;
169         dev->stats.tx_bytes   = sum.tx_bytes;
170         return &dev->stats;
171 }
172
173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
174                 __be32 remote, __be32 local)
175 {
176         unsigned int h0 = HASH(remote);
177         unsigned int h1 = HASH(local);
178         struct ip_tunnel *t;
179         struct ipip_net *ipn = net_generic(net, ipip_net_id);
180
181         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
182                 if (local == t->parms.iph.saddr &&
183                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
184                         return t;
185
186         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
187                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
191                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         t = rcu_dereference(ipn->tunnels_wc[0]);
195         if (t && (t->dev->flags&IFF_UP))
196                 return t;
197         return NULL;
198 }
199
200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201                 struct ip_tunnel_parm *parms)
202 {
203         __be32 remote = parms->iph.daddr;
204         __be32 local = parms->iph.saddr;
205         unsigned int h = 0;
206         int prio = 0;
207
208         if (remote) {
209                 prio |= 2;
210                 h ^= HASH(remote);
211         }
212         if (local) {
213                 prio |= 1;
214                 h ^= HASH(local);
215         }
216         return &ipn->tunnels[prio][h];
217 }
218
219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220                 struct ip_tunnel *t)
221 {
222         return __ipip_bucket(ipn, &t->parms);
223 }
224
225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
226 {
227         struct ip_tunnel __rcu **tp;
228         struct ip_tunnel *iter;
229
230         for (tp = ipip_bucket(ipn, t);
231              (iter = rtnl_dereference(*tp)) != NULL;
232              tp = &iter->next) {
233                 if (t == iter) {
234                         rcu_assign_pointer(*tp, t->next);
235                         break;
236                 }
237         }
238 }
239
240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
243
244         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
245         rcu_assign_pointer(*tp, t);
246 }
247
248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
249                 struct ip_tunnel_parm *parms, int create)
250 {
251         __be32 remote = parms->iph.daddr;
252         __be32 local = parms->iph.saddr;
253         struct ip_tunnel *t, *nt;
254         struct ip_tunnel __rcu **tp;
255         struct net_device *dev;
256         char name[IFNAMSIZ];
257         struct ipip_net *ipn = net_generic(net, ipip_net_id);
258
259         for (tp = __ipip_bucket(ipn, parms);
260                  (t = rtnl_dereference(*tp)) != NULL;
261                  tp = &t->next) {
262                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
263                         return t;
264         }
265         if (!create)
266                 return NULL;
267
268         if (parms->name[0])
269                 strlcpy(name, parms->name, IFNAMSIZ);
270         else
271                 strcpy(name, "tunl%d");
272
273         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
274         if (dev == NULL)
275                 return NULL;
276
277         dev_net_set(dev, net);
278
279         nt = netdev_priv(dev);
280         nt->parms = *parms;
281
282         if (ipip_tunnel_init(dev) < 0)
283                 goto failed_free;
284
285         if (register_netdevice(dev) < 0)
286                 goto failed_free;
287
288         strcpy(nt->parms.name, dev->name);
289
290         dev_hold(dev);
291         ipip_tunnel_link(ipn, nt);
292         return nt;
293
294 failed_free:
295         ipip_dev_free(dev);
296         return NULL;
297 }
298
299 /* called with RTNL */
300 static void ipip_tunnel_uninit(struct net_device *dev)
301 {
302         struct net *net = dev_net(dev);
303         struct ipip_net *ipn = net_generic(net, ipip_net_id);
304
305         if (dev == ipn->fb_tunnel_dev)
306                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
307         else
308                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
309         dev_put(dev);
310 }
311
312 static int ipip_err(struct sk_buff *skb, u32 info)
313 {
314
315 /* All the routers (except for Linux) return only
316    8 bytes of packet payload. It means, that precise relaying of
317    ICMP in the real Internet is absolutely infeasible.
318  */
319         const struct iphdr *iph = (const struct iphdr *)skb->data;
320         const int type = icmp_hdr(skb)->type;
321         const int code = icmp_hdr(skb)->code;
322         struct ip_tunnel *t;
323         int err;
324
325         switch (type) {
326         default:
327         case ICMP_PARAMETERPROB:
328                 return 0;
329
330         case ICMP_DEST_UNREACH:
331                 switch (code) {
332                 case ICMP_SR_FAILED:
333                 case ICMP_PORT_UNREACH:
334                         /* Impossible event. */
335                         return 0;
336                 case ICMP_FRAG_NEEDED:
337                         /* Soft state for pmtu is maintained by IP core. */
338                         return 0;
339                 default:
340                         /* All others are translated to HOST_UNREACH.
341                            rfc2003 contains "deep thoughts" about NET_UNREACH,
342                            I believe they are just ether pollution. --ANK
343                          */
344                         break;
345                 }
346                 break;
347         case ICMP_TIME_EXCEEDED:
348                 if (code != ICMP_EXC_TTL)
349                         return 0;
350                 break;
351         }
352
353         err = -ENOENT;
354
355         rcu_read_lock();
356         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
357         if (t == NULL || t->parms.iph.daddr == 0)
358                 goto out;
359
360         err = 0;
361         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
362                 goto out;
363
364         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
365                 t->err_count++;
366         else
367                 t->err_count = 1;
368         t->err_time = jiffies;
369 out:
370         rcu_read_unlock();
371         return err;
372 }
373
374 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
375                                         struct sk_buff *skb)
376 {
377         struct iphdr *inner_iph = ip_hdr(skb);
378
379         if (INET_ECN_is_ce(outer_iph->tos))
380                 IP_ECN_set_ce(inner_iph);
381 }
382
383 static int ipip_rcv(struct sk_buff *skb)
384 {
385         struct ip_tunnel *tunnel;
386         const struct iphdr *iph = ip_hdr(skb);
387
388         rcu_read_lock();
389         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
390         if (tunnel != NULL) {
391                 struct pcpu_tstats *tstats;
392
393                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
394                         rcu_read_unlock();
395                         kfree_skb(skb);
396                         return 0;
397                 }
398
399                 secpath_reset(skb);
400
401                 skb->mac_header = skb->network_header;
402                 skb_reset_network_header(skb);
403                 skb->protocol = htons(ETH_P_IP);
404                 skb->pkt_type = PACKET_HOST;
405
406                 tstats = this_cpu_ptr(tunnel->dev->tstats);
407                 tstats->rx_packets++;
408                 tstats->rx_bytes += skb->len;
409
410                 __skb_tunnel_rx(skb, tunnel->dev);
411
412                 ipip_ecn_decapsulate(iph, skb);
413
414                 netif_rx(skb);
415
416                 rcu_read_unlock();
417                 return 0;
418         }
419         rcu_read_unlock();
420
421         return -1;
422 }
423
424 /*
425  *      This function assumes it is being called from dev_queue_xmit()
426  *      and that skb is filled properly by that function.
427  */
428
429 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
430 {
431         struct ip_tunnel *tunnel = netdev_priv(dev);
432         struct pcpu_tstats *tstats;
433         const struct iphdr  *tiph = &tunnel->parms.iph;
434         u8     tos = tunnel->parms.iph.tos;
435         __be16 df = tiph->frag_off;
436         struct rtable *rt;                      /* Route to the other host */
437         struct net_device *tdev;                /* Device to other host */
438         const struct iphdr  *old_iph = ip_hdr(skb);
439         struct iphdr  *iph;                     /* Our new IP header */
440         unsigned int max_headroom;              /* The extra header space needed */
441         __be32 dst = tiph->daddr;
442         struct flowi4 fl4;
443         int    mtu;
444
445         if (skb->protocol != htons(ETH_P_IP))
446                 goto tx_error;
447
448         if (tos & 1)
449                 tos = old_iph->tos;
450
451         if (!dst) {
452                 /* NBMA tunnel */
453                 if ((rt = skb_rtable(skb)) == NULL) {
454                         dev->stats.tx_fifo_errors++;
455                         goto tx_error;
456                 }
457                 dst = rt->rt_gateway;
458         }
459
460         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
461                                    dst, tiph->saddr,
462                                    0, 0,
463                                    IPPROTO_IPIP, RT_TOS(tos),
464                                    tunnel->parms.link);
465         if (IS_ERR(rt)) {
466                 dev->stats.tx_carrier_errors++;
467                 goto tx_error_icmp;
468         }
469         tdev = rt->dst.dev;
470
471         if (tdev == dev) {
472                 ip_rt_put(rt);
473                 dev->stats.collisions++;
474                 goto tx_error;
475         }
476
477         df |= old_iph->frag_off & htons(IP_DF);
478
479         if (df) {
480                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
481
482                 if (mtu < 68) {
483                         dev->stats.collisions++;
484                         ip_rt_put(rt);
485                         goto tx_error;
486                 }
487
488                 if (skb_dst(skb))
489                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
490
491                 if ((old_iph->frag_off & htons(IP_DF)) &&
492                     mtu < ntohs(old_iph->tot_len)) {
493                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
494                                   htonl(mtu));
495                         ip_rt_put(rt);
496                         goto tx_error;
497                 }
498         }
499
500         if (tunnel->err_count > 0) {
501                 if (time_before(jiffies,
502                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
503                         tunnel->err_count--;
504                         dst_link_failure(skb);
505                 } else
506                         tunnel->err_count = 0;
507         }
508
509         /*
510          * Okay, now see if we can stuff it in the buffer as-is.
511          */
512         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
513
514         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
515             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
516                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
517                 if (!new_skb) {
518                         ip_rt_put(rt);
519                         dev->stats.tx_dropped++;
520                         dev_kfree_skb(skb);
521                         return NETDEV_TX_OK;
522                 }
523                 if (skb->sk)
524                         skb_set_owner_w(new_skb, skb->sk);
525                 dev_kfree_skb(skb);
526                 skb = new_skb;
527                 old_iph = ip_hdr(skb);
528         }
529
530         skb->transport_header = skb->network_header;
531         skb_push(skb, sizeof(struct iphdr));
532         skb_reset_network_header(skb);
533         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
534         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
535                               IPSKB_REROUTED);
536         skb_dst_drop(skb);
537         skb_dst_set(skb, &rt->dst);
538
539         /*
540          *      Push down and install the IPIP header.
541          */
542
543         iph                     =       ip_hdr(skb);
544         iph->version            =       4;
545         iph->ihl                =       sizeof(struct iphdr)>>2;
546         iph->frag_off           =       df;
547         iph->protocol           =       IPPROTO_IPIP;
548         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
549         iph->daddr              =       fl4.daddr;
550         iph->saddr              =       fl4.saddr;
551
552         if ((iph->ttl = tiph->ttl) == 0)
553                 iph->ttl        =       old_iph->ttl;
554
555         nf_reset(skb);
556         tstats = this_cpu_ptr(dev->tstats);
557         __IPTUNNEL_XMIT(tstats, &dev->stats);
558         return NETDEV_TX_OK;
559
560 tx_error_icmp:
561         dst_link_failure(skb);
562 tx_error:
563         dev->stats.tx_errors++;
564         dev_kfree_skb(skb);
565         return NETDEV_TX_OK;
566 }
567
568 static void ipip_tunnel_bind_dev(struct net_device *dev)
569 {
570         struct net_device *tdev = NULL;
571         struct ip_tunnel *tunnel;
572         const struct iphdr *iph;
573
574         tunnel = netdev_priv(dev);
575         iph = &tunnel->parms.iph;
576
577         if (iph->daddr) {
578                 struct rtable *rt;
579                 struct flowi4 fl4;
580
581                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
582                                            iph->daddr, iph->saddr,
583                                            0, 0,
584                                            IPPROTO_IPIP,
585                                            RT_TOS(iph->tos),
586                                            tunnel->parms.link);
587                 if (!IS_ERR(rt)) {
588                         tdev = rt->dst.dev;
589                         ip_rt_put(rt);
590                 }
591                 dev->flags |= IFF_POINTOPOINT;
592         }
593
594         if (!tdev && tunnel->parms.link)
595                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
596
597         if (tdev) {
598                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
599                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
600         }
601         dev->iflink = tunnel->parms.link;
602 }
603
604 static int
605 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
606 {
607         int err = 0;
608         struct ip_tunnel_parm p;
609         struct ip_tunnel *t;
610         struct net *net = dev_net(dev);
611         struct ipip_net *ipn = net_generic(net, ipip_net_id);
612
613         switch (cmd) {
614         case SIOCGETTUNNEL:
615                 t = NULL;
616                 if (dev == ipn->fb_tunnel_dev) {
617                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
618                                 err = -EFAULT;
619                                 break;
620                         }
621                         t = ipip_tunnel_locate(net, &p, 0);
622                 }
623                 if (t == NULL)
624                         t = netdev_priv(dev);
625                 memcpy(&p, &t->parms, sizeof(p));
626                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
627                         err = -EFAULT;
628                 break;
629
630         case SIOCADDTUNNEL:
631         case SIOCCHGTUNNEL:
632                 err = -EPERM;
633                 if (!capable(CAP_NET_ADMIN))
634                         goto done;
635
636                 err = -EFAULT;
637                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
638                         goto done;
639
640                 err = -EINVAL;
641                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
642                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
643                         goto done;
644                 if (p.iph.ttl)
645                         p.iph.frag_off |= htons(IP_DF);
646
647                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
648
649                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
650                         if (t != NULL) {
651                                 if (t->dev != dev) {
652                                         err = -EEXIST;
653                                         break;
654                                 }
655                         } else {
656                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
657                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
658                                         err = -EINVAL;
659                                         break;
660                                 }
661                                 t = netdev_priv(dev);
662                                 ipip_tunnel_unlink(ipn, t);
663                                 synchronize_net();
664                                 t->parms.iph.saddr = p.iph.saddr;
665                                 t->parms.iph.daddr = p.iph.daddr;
666                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
667                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
668                                 ipip_tunnel_link(ipn, t);
669                                 netdev_state_change(dev);
670                         }
671                 }
672
673                 if (t) {
674                         err = 0;
675                         if (cmd == SIOCCHGTUNNEL) {
676                                 t->parms.iph.ttl = p.iph.ttl;
677                                 t->parms.iph.tos = p.iph.tos;
678                                 t->parms.iph.frag_off = p.iph.frag_off;
679                                 if (t->parms.link != p.link) {
680                                         t->parms.link = p.link;
681                                         ipip_tunnel_bind_dev(dev);
682                                         netdev_state_change(dev);
683                                 }
684                         }
685                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
686                                 err = -EFAULT;
687                 } else
688                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
689                 break;
690
691         case SIOCDELTUNNEL:
692                 err = -EPERM;
693                 if (!capable(CAP_NET_ADMIN))
694                         goto done;
695
696                 if (dev == ipn->fb_tunnel_dev) {
697                         err = -EFAULT;
698                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
699                                 goto done;
700                         err = -ENOENT;
701                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
702                                 goto done;
703                         err = -EPERM;
704                         if (t->dev == ipn->fb_tunnel_dev)
705                                 goto done;
706                         dev = t->dev;
707                 }
708                 unregister_netdevice(dev);
709                 err = 0;
710                 break;
711
712         default:
713                 err = -EINVAL;
714         }
715
716 done:
717         return err;
718 }
719
720 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
721 {
722         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
723                 return -EINVAL;
724         dev->mtu = new_mtu;
725         return 0;
726 }
727
728 static const struct net_device_ops ipip_netdev_ops = {
729         .ndo_uninit     = ipip_tunnel_uninit,
730         .ndo_start_xmit = ipip_tunnel_xmit,
731         .ndo_do_ioctl   = ipip_tunnel_ioctl,
732         .ndo_change_mtu = ipip_tunnel_change_mtu,
733         .ndo_get_stats  = ipip_get_stats,
734 };
735
736 static void ipip_dev_free(struct net_device *dev)
737 {
738         free_percpu(dev->tstats);
739         free_netdev(dev);
740 }
741
742 static void ipip_tunnel_setup(struct net_device *dev)
743 {
744         dev->netdev_ops         = &ipip_netdev_ops;
745         dev->destructor         = ipip_dev_free;
746
747         dev->type               = ARPHRD_TUNNEL;
748         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
749         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
750         dev->flags              = IFF_NOARP;
751         dev->iflink             = 0;
752         dev->addr_len           = 4;
753         dev->features           |= NETIF_F_NETNS_LOCAL;
754         dev->features           |= NETIF_F_LLTX;
755         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
756 }
757
758 static int ipip_tunnel_init(struct net_device *dev)
759 {
760         struct ip_tunnel *tunnel = netdev_priv(dev);
761
762         tunnel->dev = dev;
763
764         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
765         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
766
767         ipip_tunnel_bind_dev(dev);
768
769         dev->tstats = alloc_percpu(struct pcpu_tstats);
770         if (!dev->tstats)
771                 return -ENOMEM;
772
773         return 0;
774 }
775
776 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
777 {
778         struct ip_tunnel *tunnel = netdev_priv(dev);
779         struct iphdr *iph = &tunnel->parms.iph;
780         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
781
782         tunnel->dev = dev;
783         strcpy(tunnel->parms.name, dev->name);
784
785         iph->version            = 4;
786         iph->protocol           = IPPROTO_IPIP;
787         iph->ihl                = 5;
788
789         dev->tstats = alloc_percpu(struct pcpu_tstats);
790         if (!dev->tstats)
791                 return -ENOMEM;
792
793         dev_hold(dev);
794         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
795         return 0;
796 }
797
798 static struct xfrm_tunnel ipip_handler __read_mostly = {
799         .handler        =       ipip_rcv,
800         .err_handler    =       ipip_err,
801         .priority       =       1,
802 };
803
804 static const char banner[] __initconst =
805         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
806
807 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
808 {
809         int prio;
810
811         for (prio = 1; prio < 4; prio++) {
812                 int h;
813                 for (h = 0; h < HASH_SIZE; h++) {
814                         struct ip_tunnel *t;
815
816                         t = rtnl_dereference(ipn->tunnels[prio][h]);
817                         while (t != NULL) {
818                                 unregister_netdevice_queue(t->dev, head);
819                                 t = rtnl_dereference(t->next);
820                         }
821                 }
822         }
823 }
824
825 static int __net_init ipip_init_net(struct net *net)
826 {
827         struct ipip_net *ipn = net_generic(net, ipip_net_id);
828         struct ip_tunnel *t;
829         int err;
830
831         ipn->tunnels[0] = ipn->tunnels_wc;
832         ipn->tunnels[1] = ipn->tunnels_l;
833         ipn->tunnels[2] = ipn->tunnels_r;
834         ipn->tunnels[3] = ipn->tunnels_r_l;
835
836         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
837                                            "tunl0",
838                                            ipip_tunnel_setup);
839         if (!ipn->fb_tunnel_dev) {
840                 err = -ENOMEM;
841                 goto err_alloc_dev;
842         }
843         dev_net_set(ipn->fb_tunnel_dev, net);
844
845         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
846         if (err)
847                 goto err_reg_dev;
848
849         if ((err = register_netdev(ipn->fb_tunnel_dev)))
850                 goto err_reg_dev;
851
852         t = netdev_priv(ipn->fb_tunnel_dev);
853
854         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
855         return 0;
856
857 err_reg_dev:
858         ipip_dev_free(ipn->fb_tunnel_dev);
859 err_alloc_dev:
860         /* nothing */
861         return err;
862 }
863
864 static void __net_exit ipip_exit_net(struct net *net)
865 {
866         struct ipip_net *ipn = net_generic(net, ipip_net_id);
867         LIST_HEAD(list);
868
869         rtnl_lock();
870         ipip_destroy_tunnels(ipn, &list);
871         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
872         unregister_netdevice_many(&list);
873         rtnl_unlock();
874 }
875
876 static struct pernet_operations ipip_net_ops = {
877         .init = ipip_init_net,
878         .exit = ipip_exit_net,
879         .id   = &ipip_net_id,
880         .size = sizeof(struct ipip_net),
881 };
882
883 static int __init ipip_init(void)
884 {
885         int err;
886
887         printk(banner);
888
889         err = register_pernet_device(&ipip_net_ops);
890         if (err < 0)
891                 return err;
892         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
893         if (err < 0) {
894                 unregister_pernet_device(&ipip_net_ops);
895                 pr_info("%s: can't register tunnel\n", __func__);
896         }
897         return err;
898 }
899
900 static void __exit ipip_fini(void)
901 {
902         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
903                 pr_info("%s: can't deregister tunnel\n", __func__);
904
905         unregister_pernet_device(&ipip_net_ops);
906 }
907
908 module_init(ipip_init);
909 module_exit(ipip_fini);
910 MODULE_LICENSE("GPL");
911 MODULE_ALIAS_NETDEV("tunl0");