Merge remote-tracking branch 'lsk/v3.10/topic/arm64-misc' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / ip_vti.c
1 /*
2  *      Linux NET3: IP/IP protocol decoder modified to support
3  *                  virtual tunnel interface
4  *
5  *      Authors:
6  *              Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  */
14
15 /*
16    This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
17
18    For comments look at net/ipv4/ip_gre.c --ANK
19  */
20
21
22 #include <linux/capability.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/if_ether.h>
37
38 #include <net/sock.h>
39 #include <net/ip.h>
40 #include <net/icmp.h>
41 #include <net/ip_tunnels.h>
42 #include <net/inet_ecn.h>
43 #include <net/xfrm.h>
44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h>
46
47 #define HASH_SIZE  16
48 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
49
50 static struct rtnl_link_ops vti_link_ops __read_mostly;
51
52 static int vti_net_id __read_mostly;
53 struct vti_net {
54         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
55         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
56         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
57         struct ip_tunnel __rcu *tunnels_wc[1];
58         struct ip_tunnel __rcu **tunnels[4];
59
60         struct net_device *fb_tunnel_dev;
61 };
62
63 static int vti_fb_tunnel_init(struct net_device *dev);
64 static int vti_tunnel_init(struct net_device *dev);
65 static void vti_tunnel_setup(struct net_device *dev);
66 static void vti_dev_free(struct net_device *dev);
67 static int vti_tunnel_bind_dev(struct net_device *dev);
68
69 #define VTI_XMIT(stats1, stats2) do {                           \
70         int err;                                                \
71         int pkt_len = skb->len;                                 \
72         err = dst_output(skb);                                  \
73         if (net_xmit_eval(err) == 0) {                          \
74                 u64_stats_update_begin(&(stats1)->syncp);       \
75                 (stats1)->tx_bytes += pkt_len;                  \
76                 (stats1)->tx_packets++;                         \
77                 u64_stats_update_end(&(stats1)->syncp);         \
78         } else {                                                \
79                 (stats2)->tx_errors++;                          \
80                 (stats2)->tx_aborted_errors++;                  \
81         }                                                       \
82 } while (0)
83
84
85 static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
86                                            __be32 remote, __be32 local)
87 {
88         unsigned h0 = HASH(remote);
89         unsigned h1 = HASH(local);
90         struct ip_tunnel *t;
91         struct vti_net *ipn = net_generic(net, vti_net_id);
92
93         for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
94                 if (local == t->parms.iph.saddr &&
95                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
96                         return t;
97         for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
98                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
99                         return t;
100
101         for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
102                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
103                         return t;
104
105         for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
106                 if (t && (t->dev->flags&IFF_UP))
107                         return t;
108         return NULL;
109 }
110
111 static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
112                                              struct ip_tunnel_parm *parms)
113 {
114         __be32 remote = parms->iph.daddr;
115         __be32 local = parms->iph.saddr;
116         unsigned h = 0;
117         int prio = 0;
118
119         if (remote) {
120                 prio |= 2;
121                 h ^= HASH(remote);
122         }
123         if (local) {
124                 prio |= 1;
125                 h ^= HASH(local);
126         }
127         return &ipn->tunnels[prio][h];
128 }
129
130 static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
131                                                   struct ip_tunnel *t)
132 {
133         return __vti_bucket(ipn, &t->parms);
134 }
135
136 static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
137 {
138         struct ip_tunnel __rcu **tp;
139         struct ip_tunnel *iter;
140
141         for (tp = vti_bucket(ipn, t);
142              (iter = rtnl_dereference(*tp)) != NULL;
143              tp = &iter->next) {
144                 if (t == iter) {
145                         rcu_assign_pointer(*tp, t->next);
146                         break;
147                 }
148         }
149 }
150
151 static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
152 {
153         struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
154
155         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
156         rcu_assign_pointer(*tp, t);
157 }
158
159 static struct ip_tunnel *vti_tunnel_locate(struct net *net,
160                                            struct ip_tunnel_parm *parms,
161                                            int create)
162 {
163         __be32 remote = parms->iph.daddr;
164         __be32 local = parms->iph.saddr;
165         struct ip_tunnel *t, *nt;
166         struct ip_tunnel __rcu **tp;
167         struct net_device *dev;
168         char name[IFNAMSIZ];
169         struct vti_net *ipn = net_generic(net, vti_net_id);
170
171         for (tp = __vti_bucket(ipn, parms);
172              (t = rtnl_dereference(*tp)) != NULL;
173              tp = &t->next) {
174                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
175                         return t;
176         }
177         if (!create)
178                 return NULL;
179
180         if (parms->name[0])
181                 strlcpy(name, parms->name, IFNAMSIZ);
182         else
183                 strcpy(name, "vti%d");
184
185         dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
186         if (dev == NULL)
187                 return NULL;
188
189         dev_net_set(dev, net);
190
191         nt = netdev_priv(dev);
192         nt->parms = *parms;
193         dev->rtnl_link_ops = &vti_link_ops;
194
195         vti_tunnel_bind_dev(dev);
196
197         if (register_netdevice(dev) < 0)
198                 goto failed_free;
199
200         dev_hold(dev);
201         vti_tunnel_link(ipn, nt);
202         return nt;
203
204 failed_free:
205         free_netdev(dev);
206         return NULL;
207 }
208
209 static void vti_tunnel_uninit(struct net_device *dev)
210 {
211         struct net *net = dev_net(dev);
212         struct vti_net *ipn = net_generic(net, vti_net_id);
213
214         vti_tunnel_unlink(ipn, netdev_priv(dev));
215         dev_put(dev);
216 }
217
218 static int vti_err(struct sk_buff *skb, u32 info)
219 {
220
221         /* All the routers (except for Linux) return only
222          * 8 bytes of packet payload. It means, that precise relaying of
223          * ICMP in the real Internet is absolutely infeasible.
224          */
225         struct iphdr *iph = (struct iphdr *)skb->data;
226         const int type = icmp_hdr(skb)->type;
227         const int code = icmp_hdr(skb)->code;
228         struct ip_tunnel *t;
229         int err;
230
231         switch (type) {
232         default:
233         case ICMP_PARAMETERPROB:
234                 return 0;
235
236         case ICMP_DEST_UNREACH:
237                 switch (code) {
238                 case ICMP_SR_FAILED:
239                 case ICMP_PORT_UNREACH:
240                         /* Impossible event. */
241                         return 0;
242                 default:
243                         /* All others are translated to HOST_UNREACH. */
244                         break;
245                 }
246                 break;
247         case ICMP_TIME_EXCEEDED:
248                 if (code != ICMP_EXC_TTL)
249                         return 0;
250                 break;
251         }
252
253         err = -ENOENT;
254
255         t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
256         if (t == NULL)
257                 goto out;
258
259         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
260                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
261                                  t->parms.link, 0, IPPROTO_IPIP, 0);
262                 err = 0;
263                 goto out;
264         }
265
266         err = 0;
267         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
268                 goto out;
269
270         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
271                 t->err_count++;
272         else
273                 t->err_count = 1;
274         t->err_time = jiffies;
275 out:
276         return err;
277 }
278
279 /* We dont digest the packet therefore let the packet pass */
280 static int vti_rcv(struct sk_buff *skb)
281 {
282         struct ip_tunnel *tunnel;
283         const struct iphdr *iph = ip_hdr(skb);
284
285         tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
286         if (tunnel != NULL) {
287                 struct pcpu_tstats *tstats;
288                 u32 oldmark = skb->mark;
289                 int ret;
290
291
292                 /* temporarily mark the skb with the tunnel o_key, to
293                  * only match policies with this mark.
294                  */
295                 skb->mark = be32_to_cpu(tunnel->parms.o_key);
296                 ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
297                 skb->mark = oldmark;
298                 if (!ret)
299                         return -1;
300
301                 tstats = this_cpu_ptr(tunnel->dev->tstats);
302                 u64_stats_update_begin(&tstats->syncp);
303                 tstats->rx_packets++;
304                 tstats->rx_bytes += skb->len;
305                 u64_stats_update_end(&tstats->syncp);
306
307                 secpath_reset(skb);
308                 skb->dev = tunnel->dev;
309                 return 1;
310         }
311
312         return -1;
313 }
314
315 /* This function assumes it is being called from dev_queue_xmit()
316  * and that skb is filled properly by that function.
317  */
318
319 static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
320 {
321         struct ip_tunnel *tunnel = netdev_priv(dev);
322         struct pcpu_tstats *tstats;
323         struct iphdr  *tiph = &tunnel->parms.iph;
324         u8     tos;
325         struct rtable *rt;              /* Route to the other host */
326         struct net_device *tdev;        /* Device to other host */
327         struct iphdr  *old_iph = ip_hdr(skb);
328         __be32 dst = tiph->daddr;
329         struct flowi4 fl4;
330
331         if (skb->protocol != htons(ETH_P_IP))
332                 goto tx_error;
333
334         tos = old_iph->tos;
335
336         memset(&fl4, 0, sizeof(fl4));
337         flowi4_init_output(&fl4, tunnel->parms.link,
338                            be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
339                            RT_SCOPE_UNIVERSE,
340                            IPPROTO_IPIP, 0,
341                            dst, tiph->saddr, 0, 0);
342         rt = ip_route_output_key(dev_net(dev), &fl4);
343         if (IS_ERR(rt)) {
344                 dev->stats.tx_carrier_errors++;
345                 goto tx_error_icmp;
346         }
347         /* if there is no transform then this tunnel is not functional.
348          * Or if the xfrm is not mode tunnel.
349          */
350         if (!rt->dst.xfrm ||
351             rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
352                 dev->stats.tx_carrier_errors++;
353                 ip_rt_put(rt);
354                 goto tx_error_icmp;
355         }
356         tdev = rt->dst.dev;
357
358         if (tdev == dev) {
359                 ip_rt_put(rt);
360                 dev->stats.collisions++;
361                 goto tx_error;
362         }
363
364         if (tunnel->err_count > 0) {
365                 if (time_before(jiffies,
366                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
367                         tunnel->err_count--;
368                         dst_link_failure(skb);
369                 } else
370                         tunnel->err_count = 0;
371         }
372
373         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
374         skb_dst_drop(skb);
375         skb_dst_set(skb, &rt->dst);
376         nf_reset(skb);
377         skb->dev = skb_dst(skb)->dev;
378
379         tstats = this_cpu_ptr(dev->tstats);
380         VTI_XMIT(tstats, &dev->stats);
381         return NETDEV_TX_OK;
382
383 tx_error_icmp:
384         dst_link_failure(skb);
385 tx_error:
386         dev->stats.tx_errors++;
387         dev_kfree_skb(skb);
388         return NETDEV_TX_OK;
389 }
390
391 static int vti_tunnel_bind_dev(struct net_device *dev)
392 {
393         struct net_device *tdev = NULL;
394         struct ip_tunnel *tunnel;
395         struct iphdr *iph;
396
397         tunnel = netdev_priv(dev);
398         iph = &tunnel->parms.iph;
399
400         if (iph->daddr) {
401                 struct rtable *rt;
402                 struct flowi4 fl4;
403                 memset(&fl4, 0, sizeof(fl4));
404                 flowi4_init_output(&fl4, tunnel->parms.link,
405                                    be32_to_cpu(tunnel->parms.i_key),
406                                    RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
407                                    IPPROTO_IPIP, 0,
408                                    iph->daddr, iph->saddr, 0, 0);
409                 rt = ip_route_output_key(dev_net(dev), &fl4);
410                 if (!IS_ERR(rt)) {
411                         tdev = rt->dst.dev;
412                         ip_rt_put(rt);
413                 }
414                 dev->flags |= IFF_POINTOPOINT;
415         }
416
417         if (!tdev && tunnel->parms.link)
418                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
419
420         if (tdev) {
421                 dev->hard_header_len = tdev->hard_header_len +
422                                        sizeof(struct iphdr);
423                 dev->mtu = tdev->mtu;
424         }
425         dev->iflink = tunnel->parms.link;
426         return dev->mtu;
427 }
428
429 static int
430 vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
431 {
432         int err = 0;
433         struct ip_tunnel_parm p;
434         struct ip_tunnel *t;
435         struct net *net = dev_net(dev);
436         struct vti_net *ipn = net_generic(net, vti_net_id);
437
438         switch (cmd) {
439         case SIOCGETTUNNEL:
440                 t = NULL;
441                 if (dev == ipn->fb_tunnel_dev) {
442                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
443                                            sizeof(p))) {
444                                 err = -EFAULT;
445                                 break;
446                         }
447                         t = vti_tunnel_locate(net, &p, 0);
448                 }
449                 if (t == NULL)
450                         t = netdev_priv(dev);
451                 memcpy(&p, &t->parms, sizeof(p));
452                 p.i_flags |= GRE_KEY | VTI_ISVTI;
453                 p.o_flags |= GRE_KEY;
454                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
455                         err = -EFAULT;
456                 break;
457
458         case SIOCADDTUNNEL:
459         case SIOCCHGTUNNEL:
460                 err = -EPERM;
461                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
462                         goto done;
463
464                 err = -EFAULT;
465                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
466                         goto done;
467
468                 err = -EINVAL;
469                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
470                     p.iph.ihl != 5)
471                         goto done;
472
473                 t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
474
475                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
476                         if (t != NULL) {
477                                 if (t->dev != dev) {
478                                         err = -EEXIST;
479                                         break;
480                                 }
481                         } else {
482                                 if (((dev->flags&IFF_POINTOPOINT) &&
483                                     !p.iph.daddr) ||
484                                     (!(dev->flags&IFF_POINTOPOINT) &&
485                                     p.iph.daddr)) {
486                                         err = -EINVAL;
487                                         break;
488                                 }
489                                 t = netdev_priv(dev);
490                                 vti_tunnel_unlink(ipn, t);
491                                 synchronize_net();
492                                 t->parms.iph.saddr = p.iph.saddr;
493                                 t->parms.iph.daddr = p.iph.daddr;
494                                 t->parms.i_key = p.i_key;
495                                 t->parms.o_key = p.o_key;
496                                 t->parms.iph.protocol = IPPROTO_IPIP;
497                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
498                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
499                                 vti_tunnel_link(ipn, t);
500                                 netdev_state_change(dev);
501                         }
502                 }
503
504                 if (t) {
505                         err = 0;
506                         if (cmd == SIOCCHGTUNNEL) {
507                                 t->parms.i_key = p.i_key;
508                                 t->parms.o_key = p.o_key;
509                                 if (t->parms.link != p.link) {
510                                         t->parms.link = p.link;
511                                         vti_tunnel_bind_dev(dev);
512                                         netdev_state_change(dev);
513                                 }
514                         }
515                         p.i_flags |= GRE_KEY | VTI_ISVTI;
516                         p.o_flags |= GRE_KEY;
517                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
518                                          sizeof(p)))
519                                 err = -EFAULT;
520                 } else
521                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
522                 break;
523
524         case SIOCDELTUNNEL:
525                 err = -EPERM;
526                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
527                         goto done;
528
529                 if (dev == ipn->fb_tunnel_dev) {
530                         err = -EFAULT;
531                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
532                                            sizeof(p)))
533                                 goto done;
534                         err = -ENOENT;
535
536                         t = vti_tunnel_locate(net, &p, 0);
537                         if (t == NULL)
538                                 goto done;
539                         err = -EPERM;
540                         if (t->dev == ipn->fb_tunnel_dev)
541                                 goto done;
542                         dev = t->dev;
543                 }
544                 unregister_netdevice(dev);
545                 err = 0;
546                 break;
547
548         default:
549                 err = -EINVAL;
550         }
551
552 done:
553         return err;
554 }
555
556 static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
557 {
558         if (new_mtu < 68 || new_mtu > 0xFFF8)
559                 return -EINVAL;
560         dev->mtu = new_mtu;
561         return 0;
562 }
563
564 static const struct net_device_ops vti_netdev_ops = {
565         .ndo_init       = vti_tunnel_init,
566         .ndo_uninit     = vti_tunnel_uninit,
567         .ndo_start_xmit = vti_tunnel_xmit,
568         .ndo_do_ioctl   = vti_tunnel_ioctl,
569         .ndo_change_mtu = vti_tunnel_change_mtu,
570         .ndo_get_stats64 = ip_tunnel_get_stats64,
571 };
572
573 static void vti_dev_free(struct net_device *dev)
574 {
575         free_percpu(dev->tstats);
576         free_netdev(dev);
577 }
578
579 static void vti_tunnel_setup(struct net_device *dev)
580 {
581         dev->netdev_ops         = &vti_netdev_ops;
582         dev->destructor         = vti_dev_free;
583
584         dev->type               = ARPHRD_TUNNEL;
585         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
586         dev->mtu                = ETH_DATA_LEN;
587         dev->flags              = IFF_NOARP;
588         dev->iflink             = 0;
589         dev->addr_len           = 4;
590         dev->features           |= NETIF_F_NETNS_LOCAL;
591         dev->features           |= NETIF_F_LLTX;
592         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
593 }
594
595 static int vti_tunnel_init(struct net_device *dev)
596 {
597         struct ip_tunnel *tunnel = netdev_priv(dev);
598
599         tunnel->dev = dev;
600         strcpy(tunnel->parms.name, dev->name);
601
602         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
603         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
604
605         dev->tstats = alloc_percpu(struct pcpu_tstats);
606         if (!dev->tstats)
607                 return -ENOMEM;
608
609         return 0;
610 }
611
612 static int __net_init vti_fb_tunnel_init(struct net_device *dev)
613 {
614         struct ip_tunnel *tunnel = netdev_priv(dev);
615         struct iphdr *iph = &tunnel->parms.iph;
616         struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
617
618         iph->version            = 4;
619         iph->protocol           = IPPROTO_IPIP;
620         iph->ihl                = 5;
621
622         dev_hold(dev);
623         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
624         return 0;
625 }
626
627 static struct xfrm_tunnel vti_handler __read_mostly = {
628         .handler        =       vti_rcv,
629         .err_handler    =       vti_err,
630         .priority       =       1,
631 };
632
633 static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
634 {
635         int prio;
636
637         for (prio = 1; prio < 4; prio++) {
638                 int h;
639                 for (h = 0; h < HASH_SIZE; h++) {
640                         struct ip_tunnel *t;
641
642                         t = rtnl_dereference(ipn->tunnels[prio][h]);
643                         while (t != NULL) {
644                                 unregister_netdevice_queue(t->dev, head);
645                                 t = rtnl_dereference(t->next);
646                         }
647                 }
648         }
649 }
650
651 static int __net_init vti_init_net(struct net *net)
652 {
653         int err;
654         struct vti_net *ipn = net_generic(net, vti_net_id);
655
656         ipn->tunnels[0] = ipn->tunnels_wc;
657         ipn->tunnels[1] = ipn->tunnels_l;
658         ipn->tunnels[2] = ipn->tunnels_r;
659         ipn->tunnels[3] = ipn->tunnels_r_l;
660
661         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
662                                           "ip_vti0",
663                                           vti_tunnel_setup);
664         if (!ipn->fb_tunnel_dev) {
665                 err = -ENOMEM;
666                 goto err_alloc_dev;
667         }
668         dev_net_set(ipn->fb_tunnel_dev, net);
669
670         err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
671         if (err)
672                 goto err_reg_dev;
673         ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
674
675         err = register_netdev(ipn->fb_tunnel_dev);
676         if (err)
677                 goto err_reg_dev;
678         return 0;
679
680 err_reg_dev:
681         vti_dev_free(ipn->fb_tunnel_dev);
682 err_alloc_dev:
683         /* nothing */
684         return err;
685 }
686
687 static void __net_exit vti_exit_net(struct net *net)
688 {
689         struct vti_net *ipn = net_generic(net, vti_net_id);
690         LIST_HEAD(list);
691
692         rtnl_lock();
693         vti_destroy_tunnels(ipn, &list);
694         unregister_netdevice_many(&list);
695         rtnl_unlock();
696 }
697
698 static struct pernet_operations vti_net_ops = {
699         .init = vti_init_net,
700         .exit = vti_exit_net,
701         .id   = &vti_net_id,
702         .size = sizeof(struct vti_net),
703 };
704
705 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
706 {
707         return 0;
708 }
709
710 static void vti_netlink_parms(struct nlattr *data[],
711                               struct ip_tunnel_parm *parms)
712 {
713         memset(parms, 0, sizeof(*parms));
714
715         parms->iph.protocol = IPPROTO_IPIP;
716
717         if (!data)
718                 return;
719
720         if (data[IFLA_VTI_LINK])
721                 parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
722
723         if (data[IFLA_VTI_IKEY])
724                 parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
725
726         if (data[IFLA_VTI_OKEY])
727                 parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
728
729         if (data[IFLA_VTI_LOCAL])
730                 parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
731
732         if (data[IFLA_VTI_REMOTE])
733                 parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
734
735 }
736
737 static int vti_newlink(struct net *src_net, struct net_device *dev,
738                        struct nlattr *tb[], struct nlattr *data[])
739 {
740         struct ip_tunnel *nt;
741         struct net *net = dev_net(dev);
742         struct vti_net *ipn = net_generic(net, vti_net_id);
743         int mtu;
744         int err;
745
746         nt = netdev_priv(dev);
747         vti_netlink_parms(data, &nt->parms);
748
749         if (vti_tunnel_locate(net, &nt->parms, 0))
750                 return -EEXIST;
751
752         mtu = vti_tunnel_bind_dev(dev);
753         if (!tb[IFLA_MTU])
754                 dev->mtu = mtu;
755
756         err = register_netdevice(dev);
757         if (err)
758                 goto out;
759
760         dev_hold(dev);
761         vti_tunnel_link(ipn, nt);
762
763 out:
764         return err;
765 }
766
767 static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
768                           struct nlattr *data[])
769 {
770         struct ip_tunnel *t, *nt;
771         struct net *net = dev_net(dev);
772         struct vti_net *ipn = net_generic(net, vti_net_id);
773         struct ip_tunnel_parm p;
774         int mtu;
775
776         if (dev == ipn->fb_tunnel_dev)
777                 return -EINVAL;
778
779         nt = netdev_priv(dev);
780         vti_netlink_parms(data, &p);
781
782         t = vti_tunnel_locate(net, &p, 0);
783
784         if (t) {
785                 if (t->dev != dev)
786                         return -EEXIST;
787         } else {
788                 t = nt;
789
790                 vti_tunnel_unlink(ipn, t);
791                 t->parms.iph.saddr = p.iph.saddr;
792                 t->parms.iph.daddr = p.iph.daddr;
793                 t->parms.i_key = p.i_key;
794                 t->parms.o_key = p.o_key;
795                 if (dev->type != ARPHRD_ETHER) {
796                         memcpy(dev->dev_addr, &p.iph.saddr, 4);
797                         memcpy(dev->broadcast, &p.iph.daddr, 4);
798                 }
799                 vti_tunnel_link(ipn, t);
800                 netdev_state_change(dev);
801         }
802
803         if (t->parms.link != p.link) {
804                 t->parms.link = p.link;
805                 mtu = vti_tunnel_bind_dev(dev);
806                 if (!tb[IFLA_MTU])
807                         dev->mtu = mtu;
808                 netdev_state_change(dev);
809         }
810
811         return 0;
812 }
813
814 static size_t vti_get_size(const struct net_device *dev)
815 {
816         return
817                 /* IFLA_VTI_LINK */
818                 nla_total_size(4) +
819                 /* IFLA_VTI_IKEY */
820                 nla_total_size(4) +
821                 /* IFLA_VTI_OKEY */
822                 nla_total_size(4) +
823                 /* IFLA_VTI_LOCAL */
824                 nla_total_size(4) +
825                 /* IFLA_VTI_REMOTE */
826                 nla_total_size(4) +
827                 0;
828 }
829
830 static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
831 {
832         struct ip_tunnel *t = netdev_priv(dev);
833         struct ip_tunnel_parm *p = &t->parms;
834
835         nla_put_u32(skb, IFLA_VTI_LINK, p->link);
836         nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
837         nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
838         nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
839         nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
840
841         return 0;
842 }
843
844 static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
845         [IFLA_VTI_LINK]         = { .type = NLA_U32 },
846         [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
847         [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
848         [IFLA_VTI_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
849         [IFLA_VTI_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
850 };
851
852 static struct rtnl_link_ops vti_link_ops __read_mostly = {
853         .kind           = "vti",
854         .maxtype        = IFLA_VTI_MAX,
855         .policy         = vti_policy,
856         .priv_size      = sizeof(struct ip_tunnel),
857         .setup          = vti_tunnel_setup,
858         .validate       = vti_tunnel_validate,
859         .newlink        = vti_newlink,
860         .changelink     = vti_changelink,
861         .get_size       = vti_get_size,
862         .fill_info      = vti_fill_info,
863 };
864
865 static int __init vti_init(void)
866 {
867         int err;
868
869         pr_info("IPv4 over IPSec tunneling driver\n");
870
871         err = register_pernet_device(&vti_net_ops);
872         if (err < 0)
873                 return err;
874         err = xfrm4_mode_tunnel_input_register(&vti_handler);
875         if (err < 0) {
876                 unregister_pernet_device(&vti_net_ops);
877                 pr_info(KERN_INFO "vti init: can't register tunnel\n");
878         }
879
880         err = rtnl_link_register(&vti_link_ops);
881         if (err < 0)
882                 goto rtnl_link_failed;
883
884         return err;
885
886 rtnl_link_failed:
887         xfrm4_mode_tunnel_input_deregister(&vti_handler);
888         unregister_pernet_device(&vti_net_ops);
889         return err;
890 }
891
892 static void __exit vti_fini(void)
893 {
894         rtnl_link_unregister(&vti_link_ops);
895         if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
896                 pr_info("vti close: can't deregister tunnel\n");
897
898         unregister_pernet_device(&vti_net_ops);
899 }
900
901 module_init(vti_init);
902 module_exit(vti_fini);
903 MODULE_LICENSE("GPL");
904 MODULE_ALIAS_RTNL_LINK("vti");
905 MODULE_ALIAS_NETDEV("ip_vti0");