x86/vdso: Remove x32 intermediates during 'make clean'
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         struct neighbour *neigh;
64         struct in6_addr *nexthop;
65         int ret;
66
67         skb->protocol = htons(ETH_P_IPV6);
68         skb->dev = dev;
69
70         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74                     ((mroute6_socket(dev_net(dev), skb) &&
75                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77                                          &ipv6_hdr(skb)->saddr))) {
78                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80                         /* Do not check for IFF_ALLMULTI; multicast routing
81                            is not supported in any case.
82                          */
83                         if (newskb)
84                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85                                         newskb, NULL, newskb->dev,
86                                         dev_loopback_xmit);
87
88                         if (ipv6_hdr(skb)->hop_limit == 0) {
89                                 IP6_INC_STATS(dev_net(dev), idev,
90                                               IPSTATS_MIB_OUTDISCARDS);
91                                 kfree_skb(skb);
92                                 return 0;
93                         }
94                 }
95
96                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97                                 skb->len);
98
99                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100                     IPV6_ADDR_SCOPE_NODELOCAL &&
101                     !(dev->flags & IFF_LOOPBACK)) {
102                         kfree_skb(skb);
103                         return 0;
104                 }
105         }
106
107         rcu_read_lock_bh();
108         nexthop = rt6_nexthop((struct rt6_info *)dst);
109         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110         if (unlikely(!neigh))
111                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112         if (!IS_ERR(neigh)) {
113                 ret = dst_neigh_output(dst, neigh, skb);
114                 rcu_read_unlock_bh();
115                 return ret;
116         }
117         rcu_read_unlock_bh();
118
119         IP6_INC_STATS(dev_net(dst->dev),
120                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121         kfree_skb(skb);
122         return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128             dst_allfrag(skb_dst(skb)) ||
129             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130                 return ip6_fragment(skb, ip6_finish_output2);
131         else
132                 return ip6_finish_output2(skb);
133 }
134
135 int ip6_output(struct sock *sk, struct sk_buff *skb)
136 {
137         struct net_device *dev = skb_dst(skb)->dev;
138         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139         if (unlikely(idev->cnf.disable_ipv6)) {
140                 IP6_INC_STATS(dev_net(dev), idev,
141                               IPSTATS_MIB_OUTDISCARDS);
142                 kfree_skb(skb);
143                 return 0;
144         }
145
146         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147                             ip6_finish_output,
148                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150
151 /*
152  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
153  */
154
155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156              struct ipv6_txoptions *opt, int tclass)
157 {
158         struct net *net = sock_net(sk);
159         struct ipv6_pinfo *np = inet6_sk(sk);
160         struct in6_addr *first_hop = &fl6->daddr;
161         struct dst_entry *dst = skb_dst(skb);
162         struct ipv6hdr *hdr;
163         u8  proto = fl6->flowi6_proto;
164         int seg_len = skb->len;
165         int hlimit = -1;
166         u32 mtu;
167
168         if (opt) {
169                 unsigned int head_room;
170
171                 /* First: exthdrs may take lots of space (~8K for now)
172                    MAX_HEADER is not enough.
173                  */
174                 head_room = opt->opt_nflen + opt->opt_flen;
175                 seg_len += head_room;
176                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178                 if (skb_headroom(skb) < head_room) {
179                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180                         if (skb2 == NULL) {
181                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182                                               IPSTATS_MIB_OUTDISCARDS);
183                                 kfree_skb(skb);
184                                 return -ENOBUFS;
185                         }
186                         consume_skb(skb);
187                         skb = skb2;
188                         skb_set_owner_w(skb, sk);
189                 }
190                 if (opt->opt_flen)
191                         ipv6_push_frag_opts(skb, opt, &proto);
192                 if (opt->opt_nflen)
193                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194         }
195
196         skb_push(skb, sizeof(struct ipv6hdr));
197         skb_reset_network_header(skb);
198         hdr = ipv6_hdr(skb);
199
200         /*
201          *      Fill in the IPv6 header
202          */
203         if (np)
204                 hlimit = np->hop_limit;
205         if (hlimit < 0)
206                 hlimit = ip6_dst_hoplimit(dst);
207
208         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
209                                                      np->autoflowlabel));
210
211         hdr->payload_len = htons(seg_len);
212         hdr->nexthdr = proto;
213         hdr->hop_limit = hlimit;
214
215         hdr->saddr = fl6->saddr;
216         hdr->daddr = *first_hop;
217
218         skb->protocol = htons(ETH_P_IPV6);
219         skb->priority = sk->sk_priority;
220         skb->mark = sk->sk_mark;
221
222         mtu = dst_mtu(dst);
223         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
224                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
225                               IPSTATS_MIB_OUT, skb->len);
226                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
227                                dst->dev, dst_output);
228         }
229
230         skb->dev = dst->dev;
231         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
232         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
233         kfree_skb(skb);
234         return -EMSGSIZE;
235 }
236 EXPORT_SYMBOL(ip6_xmit);
237
238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240         struct ip6_ra_chain *ra;
241         struct sock *last = NULL;
242
243         read_lock(&ip6_ra_lock);
244         for (ra = ip6_ra_chain; ra; ra = ra->next) {
245                 struct sock *sk = ra->sk;
246                 if (sk && ra->sel == sel &&
247                     (!sk->sk_bound_dev_if ||
248                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
249                         if (last) {
250                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251                                 if (skb2)
252                                         rawv6_rcv(last, skb2);
253                         }
254                         last = sk;
255                 }
256         }
257
258         if (last) {
259                 rawv6_rcv(last, skb);
260                 read_unlock(&ip6_ra_lock);
261                 return 1;
262         }
263         read_unlock(&ip6_ra_lock);
264         return 0;
265 }
266
267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269         struct ipv6hdr *hdr = ipv6_hdr(skb);
270         u8 nexthdr = hdr->nexthdr;
271         __be16 frag_off;
272         int offset;
273
274         if (ipv6_ext_hdr(nexthdr)) {
275                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276                 if (offset < 0)
277                         return 0;
278         } else
279                 offset = sizeof(struct ipv6hdr);
280
281         if (nexthdr == IPPROTO_ICMPV6) {
282                 struct icmp6hdr *icmp6;
283
284                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285                                          offset + 1 - skb->data)))
286                         return 0;
287
288                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289
290                 switch (icmp6->icmp6_type) {
291                 case NDISC_ROUTER_SOLICITATION:
292                 case NDISC_ROUTER_ADVERTISEMENT:
293                 case NDISC_NEIGHBOUR_SOLICITATION:
294                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295                 case NDISC_REDIRECT:
296                         /* For reaction involving unicast neighbor discovery
297                          * message destined to the proxied address, pass it to
298                          * input function.
299                          */
300                         return 1;
301                 default:
302                         break;
303                 }
304         }
305
306         /*
307          * The proxying router can't forward traffic sent to a link-local
308          * address, so signal the sender and discard the packet. This
309          * behavior is clarified by the MIPv6 specification.
310          */
311         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312                 dst_link_failure(skb);
313                 return -1;
314         }
315
316         return 0;
317 }
318
319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321         skb_sender_cpu_clear(skb);
322         return dst_output(skb);
323 }
324
325 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
326 {
327         unsigned int mtu;
328         struct inet6_dev *idev;
329
330         if (dst_metric_locked(dst, RTAX_MTU)) {
331                 mtu = dst_metric_raw(dst, RTAX_MTU);
332                 if (mtu)
333                         return mtu;
334         }
335
336         mtu = IPV6_MIN_MTU;
337         rcu_read_lock();
338         idev = __in6_dev_get(dst->dev);
339         if (idev)
340                 mtu = idev->cnf.mtu6;
341         rcu_read_unlock();
342
343         return mtu;
344 }
345
346 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
347 {
348         if (skb->len <= mtu)
349                 return false;
350
351         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
352         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
353                 return true;
354
355         if (skb->ignore_df)
356                 return false;
357
358         if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
359                 return false;
360
361         return true;
362 }
363
364 int ip6_forward(struct sk_buff *skb)
365 {
366         struct dst_entry *dst = skb_dst(skb);
367         struct ipv6hdr *hdr = ipv6_hdr(skb);
368         struct inet6_skb_parm *opt = IP6CB(skb);
369         struct net *net = dev_net(dst->dev);
370         u32 mtu;
371
372         if (net->ipv6.devconf_all->forwarding == 0)
373                 goto error;
374
375         if (skb->pkt_type != PACKET_HOST)
376                 goto drop;
377
378         if (skb_warn_if_lro(skb))
379                 goto drop;
380
381         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
382                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
383                                  IPSTATS_MIB_INDISCARDS);
384                 goto drop;
385         }
386
387         skb_forward_csum(skb);
388
389         /*
390          *      We DO NOT make any processing on
391          *      RA packets, pushing them to user level AS IS
392          *      without ane WARRANTY that application will be able
393          *      to interpret them. The reason is that we
394          *      cannot make anything clever here.
395          *
396          *      We are not end-node, so that if packet contains
397          *      AH/ESP, we cannot make anything.
398          *      Defragmentation also would be mistake, RA packets
399          *      cannot be fragmented, because there is no warranty
400          *      that different fragments will go along one path. --ANK
401          */
402         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
403                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
404                         return 0;
405         }
406
407         /*
408          *      check and decrement ttl
409          */
410         if (hdr->hop_limit <= 1) {
411                 /* Force OUTPUT device used as source address */
412                 skb->dev = dst->dev;
413                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
414                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
415                                  IPSTATS_MIB_INHDRERRORS);
416
417                 kfree_skb(skb);
418                 return -ETIMEDOUT;
419         }
420
421         /* XXX: idev->cnf.proxy_ndp? */
422         if (net->ipv6.devconf_all->proxy_ndp &&
423             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
424                 int proxied = ip6_forward_proxy_check(skb);
425                 if (proxied > 0)
426                         return ip6_input(skb);
427                 else if (proxied < 0) {
428                         IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429                                          IPSTATS_MIB_INDISCARDS);
430                         goto drop;
431                 }
432         }
433
434         if (!xfrm6_route_forward(skb)) {
435                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
436                                  IPSTATS_MIB_INDISCARDS);
437                 goto drop;
438         }
439         dst = skb_dst(skb);
440
441         /* IPv6 specs say nothing about it, but it is clear that we cannot
442            send redirects to source routed frames.
443            We don't send redirects to frames decapsulated from IPsec.
444          */
445         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
446                 struct in6_addr *target = NULL;
447                 struct inet_peer *peer;
448                 struct rt6_info *rt;
449
450                 /*
451                  *      incoming and outgoing devices are the same
452                  *      send a redirect.
453                  */
454
455                 rt = (struct rt6_info *) dst;
456                 if (rt->rt6i_flags & RTF_GATEWAY)
457                         target = &rt->rt6i_gateway;
458                 else
459                         target = &hdr->daddr;
460
461                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
462
463                 /* Limit redirects both by destination (here)
464                    and by source (inside ndisc_send_redirect)
465                  */
466                 if (inet_peer_xrlim_allow(peer, 1*HZ))
467                         ndisc_send_redirect(skb, target);
468                 if (peer)
469                         inet_putpeer(peer);
470         } else {
471                 int addrtype = ipv6_addr_type(&hdr->saddr);
472
473                 /* This check is security critical. */
474                 if (addrtype == IPV6_ADDR_ANY ||
475                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
476                         goto error;
477                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
479                                     ICMPV6_NOT_NEIGHBOUR, 0);
480                         goto error;
481                 }
482         }
483
484         mtu = ip6_dst_mtu_forward(dst);
485         if (mtu < IPV6_MIN_MTU)
486                 mtu = IPV6_MIN_MTU;
487
488         if (ip6_pkt_too_big(skb, mtu)) {
489                 /* Again, force OUTPUT device used as source address */
490                 skb->dev = dst->dev;
491                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
493                                  IPSTATS_MIB_INTOOBIGERRORS);
494                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495                                  IPSTATS_MIB_FRAGFAILS);
496                 kfree_skb(skb);
497                 return -EMSGSIZE;
498         }
499
500         if (skb_cow(skb, dst->dev->hard_header_len)) {
501                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
502                                  IPSTATS_MIB_OUTDISCARDS);
503                 goto drop;
504         }
505
506         hdr = ipv6_hdr(skb);
507
508         /* Mangling hops number delayed to point after skb COW */
509
510         hdr->hop_limit--;
511
512         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
513         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
514         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
515                        ip6_forward_finish);
516
517 error:
518         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
519 drop:
520         kfree_skb(skb);
521         return -EINVAL;
522 }
523
524 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
525 {
526         to->pkt_type = from->pkt_type;
527         to->priority = from->priority;
528         to->protocol = from->protocol;
529         skb_dst_drop(to);
530         skb_dst_set(to, dst_clone(skb_dst(from)));
531         to->dev = from->dev;
532         to->mark = from->mark;
533
534 #ifdef CONFIG_NET_SCHED
535         to->tc_index = from->tc_index;
536 #endif
537         nf_copy(to, from);
538         skb_copy_secmark(to, from);
539 }
540
541 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
542 {
543         struct sk_buff *frag;
544         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
545         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
546         struct ipv6hdr *tmp_hdr;
547         struct frag_hdr *fh;
548         unsigned int mtu, hlen, left, len;
549         int hroom, troom;
550         __be32 frag_id = 0;
551         int ptr, offset = 0, err = 0;
552         u8 *prevhdr, nexthdr = 0;
553         struct net *net = dev_net(skb_dst(skb)->dev);
554
555         hlen = ip6_find_1stfragopt(skb, &prevhdr);
556         nexthdr = *prevhdr;
557
558         mtu = ip6_skb_dst_mtu(skb);
559
560         /* We must not fragment if the socket is set to force MTU discovery
561          * or if the skb it not generated by a local socket.
562          */
563         if (unlikely(!skb->ignore_df && skb->len > mtu) ||
564                      (IP6CB(skb)->frag_max_size &&
565                       IP6CB(skb)->frag_max_size > mtu)) {
566                 if (skb->sk && dst_allfrag(skb_dst(skb)))
567                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
568
569                 skb->dev = skb_dst(skb)->dev;
570                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
571                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
572                               IPSTATS_MIB_FRAGFAILS);
573                 kfree_skb(skb);
574                 return -EMSGSIZE;
575         }
576
577         if (np && np->frag_size < mtu) {
578                 if (np->frag_size)
579                         mtu = np->frag_size;
580         }
581         mtu -= hlen + sizeof(struct frag_hdr);
582
583         if (skb_has_frag_list(skb)) {
584                 int first_len = skb_pagelen(skb);
585                 struct sk_buff *frag2;
586
587                 if (first_len - hlen > mtu ||
588                     ((first_len - hlen) & 7) ||
589                     skb_cloned(skb))
590                         goto slow_path;
591
592                 skb_walk_frags(skb, frag) {
593                         /* Correct geometry. */
594                         if (frag->len > mtu ||
595                             ((frag->len & 7) && frag->next) ||
596                             skb_headroom(frag) < hlen)
597                                 goto slow_path_clean;
598
599                         /* Partially cloned skb? */
600                         if (skb_shared(frag))
601                                 goto slow_path_clean;
602
603                         BUG_ON(frag->sk);
604                         if (skb->sk) {
605                                 frag->sk = skb->sk;
606                                 frag->destructor = sock_wfree;
607                         }
608                         skb->truesize -= frag->truesize;
609                 }
610
611                 err = 0;
612                 offset = 0;
613                 frag = skb_shinfo(skb)->frag_list;
614                 skb_frag_list_init(skb);
615                 /* BUILD HEADER */
616
617                 *prevhdr = NEXTHDR_FRAGMENT;
618                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
619                 if (!tmp_hdr) {
620                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
621                                       IPSTATS_MIB_FRAGFAILS);
622                         return -ENOMEM;
623                 }
624
625                 __skb_pull(skb, hlen);
626                 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
627                 __skb_push(skb, hlen);
628                 skb_reset_network_header(skb);
629                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
630
631                 ipv6_select_ident(fh, rt);
632                 fh->nexthdr = nexthdr;
633                 fh->reserved = 0;
634                 fh->frag_off = htons(IP6_MF);
635                 frag_id = fh->identification;
636
637                 first_len = skb_pagelen(skb);
638                 skb->data_len = first_len - skb_headlen(skb);
639                 skb->len = first_len;
640                 ipv6_hdr(skb)->payload_len = htons(first_len -
641                                                    sizeof(struct ipv6hdr));
642
643                 dst_hold(&rt->dst);
644
645                 for (;;) {
646                         /* Prepare header of the next frame,
647                          * before previous one went down. */
648                         if (frag) {
649                                 frag->ip_summed = CHECKSUM_NONE;
650                                 skb_reset_transport_header(frag);
651                                 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
652                                 __skb_push(frag, hlen);
653                                 skb_reset_network_header(frag);
654                                 memcpy(skb_network_header(frag), tmp_hdr,
655                                        hlen);
656                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
657                                 fh->nexthdr = nexthdr;
658                                 fh->reserved = 0;
659                                 fh->frag_off = htons(offset);
660                                 if (frag->next != NULL)
661                                         fh->frag_off |= htons(IP6_MF);
662                                 fh->identification = frag_id;
663                                 ipv6_hdr(frag)->payload_len =
664                                                 htons(frag->len -
665                                                       sizeof(struct ipv6hdr));
666                                 ip6_copy_metadata(frag, skb);
667                         }
668
669                         err = output(skb);
670                         if (!err)
671                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
672                                               IPSTATS_MIB_FRAGCREATES);
673
674                         if (err || !frag)
675                                 break;
676
677                         skb = frag;
678                         frag = skb->next;
679                         skb->next = NULL;
680                 }
681
682                 kfree(tmp_hdr);
683
684                 if (err == 0) {
685                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
686                                       IPSTATS_MIB_FRAGOKS);
687                         ip6_rt_put(rt);
688                         return 0;
689                 }
690
691                 kfree_skb_list(frag);
692
693                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
694                               IPSTATS_MIB_FRAGFAILS);
695                 ip6_rt_put(rt);
696                 return err;
697
698 slow_path_clean:
699                 skb_walk_frags(skb, frag2) {
700                         if (frag2 == frag)
701                                 break;
702                         frag2->sk = NULL;
703                         frag2->destructor = NULL;
704                         skb->truesize += frag2->truesize;
705                 }
706         }
707
708 slow_path:
709         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
710             skb_checksum_help(skb))
711                 goto fail;
712
713         left = skb->len - hlen;         /* Space per frame */
714         ptr = hlen;                     /* Where to start from */
715
716         /*
717          *      Fragment the datagram.
718          */
719
720         *prevhdr = NEXTHDR_FRAGMENT;
721         hroom = LL_RESERVED_SPACE(rt->dst.dev);
722         troom = rt->dst.dev->needed_tailroom;
723
724         /*
725          *      Keep copying data until we run out.
726          */
727         while (left > 0)        {
728                 len = left;
729                 /* IF: it doesn't fit, use 'mtu' - the data space left */
730                 if (len > mtu)
731                         len = mtu;
732                 /* IF: we are not sending up to and including the packet end
733                    then align the next start on an eight byte boundary */
734                 if (len < left) {
735                         len &= ~7;
736                 }
737
738                 /* Allocate buffer */
739                 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
740                                  hroom + troom, GFP_ATOMIC);
741                 if (!frag) {
742                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
743                                       IPSTATS_MIB_FRAGFAILS);
744                         err = -ENOMEM;
745                         goto fail;
746                 }
747
748                 /*
749                  *      Set up data on packet
750                  */
751
752                 ip6_copy_metadata(frag, skb);
753                 skb_reserve(frag, hroom);
754                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
755                 skb_reset_network_header(frag);
756                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
757                 frag->transport_header = (frag->network_header + hlen +
758                                           sizeof(struct frag_hdr));
759
760                 /*
761                  *      Charge the memory for the fragment to any owner
762                  *      it might possess
763                  */
764                 if (skb->sk)
765                         skb_set_owner_w(frag, skb->sk);
766
767                 /*
768                  *      Copy the packet header into the new buffer.
769                  */
770                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
771
772                 /*
773                  *      Build fragment header.
774                  */
775                 fh->nexthdr = nexthdr;
776                 fh->reserved = 0;
777                 if (!frag_id) {
778                         ipv6_select_ident(fh, rt);
779                         frag_id = fh->identification;
780                 } else
781                         fh->identification = frag_id;
782
783                 /*
784                  *      Copy a block of the IP datagram.
785                  */
786                 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
787                                      len));
788                 left -= len;
789
790                 fh->frag_off = htons(offset);
791                 if (left > 0)
792                         fh->frag_off |= htons(IP6_MF);
793                 ipv6_hdr(frag)->payload_len = htons(frag->len -
794                                                     sizeof(struct ipv6hdr));
795
796                 ptr += len;
797                 offset += len;
798
799                 /*
800                  *      Put this fragment into the sending queue.
801                  */
802                 err = output(frag);
803                 if (err)
804                         goto fail;
805
806                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
807                               IPSTATS_MIB_FRAGCREATES);
808         }
809         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
810                       IPSTATS_MIB_FRAGOKS);
811         consume_skb(skb);
812         return err;
813
814 fail:
815         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
816                       IPSTATS_MIB_FRAGFAILS);
817         kfree_skb(skb);
818         return err;
819 }
820
821 static inline int ip6_rt_check(const struct rt6key *rt_key,
822                                const struct in6_addr *fl_addr,
823                                const struct in6_addr *addr_cache)
824 {
825         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
826                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
827 }
828
829 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
830                                           struct dst_entry *dst,
831                                           const struct flowi6 *fl6)
832 {
833         struct ipv6_pinfo *np = inet6_sk(sk);
834         struct rt6_info *rt;
835
836         if (!dst)
837                 goto out;
838
839         if (dst->ops->family != AF_INET6) {
840                 dst_release(dst);
841                 return NULL;
842         }
843
844         rt = (struct rt6_info *)dst;
845         /* Yes, checking route validity in not connected
846          * case is not very simple. Take into account,
847          * that we do not support routing by source, TOS,
848          * and MSG_DONTROUTE            --ANK (980726)
849          *
850          * 1. ip6_rt_check(): If route was host route,
851          *    check that cached destination is current.
852          *    If it is network route, we still may
853          *    check its validity using saved pointer
854          *    to the last used address: daddr_cache.
855          *    We do not want to save whole address now,
856          *    (because main consumer of this service
857          *    is tcp, which has not this problem),
858          *    so that the last trick works only on connected
859          *    sockets.
860          * 2. oif also should be the same.
861          */
862         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
863 #ifdef CONFIG_IPV6_SUBTREES
864             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
865 #endif
866             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
867                 dst_release(dst);
868                 dst = NULL;
869         }
870
871 out:
872         return dst;
873 }
874
875 static int ip6_dst_lookup_tail(struct sock *sk,
876                                struct dst_entry **dst, struct flowi6 *fl6)
877 {
878         struct net *net = sock_net(sk);
879 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
880         struct neighbour *n;
881         struct rt6_info *rt;
882 #endif
883         int err;
884
885         if (*dst == NULL)
886                 *dst = ip6_route_output(net, sk, fl6);
887
888         err = (*dst)->error;
889         if (err)
890                 goto out_err_release;
891
892         if (ipv6_addr_any(&fl6->saddr)) {
893                 struct rt6_info *rt = (struct rt6_info *) *dst;
894                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
895                                           sk ? inet6_sk(sk)->srcprefs : 0,
896                                           &fl6->saddr);
897                 if (err)
898                         goto out_err_release;
899         }
900
901 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
902         /*
903          * Here if the dst entry we've looked up
904          * has a neighbour entry that is in the INCOMPLETE
905          * state and the src address from the flow is
906          * marked as OPTIMISTIC, we release the found
907          * dst entry and replace it instead with the
908          * dst entry of the nexthop router
909          */
910         rt = (struct rt6_info *) *dst;
911         rcu_read_lock_bh();
912         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
913         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
914         rcu_read_unlock_bh();
915
916         if (err) {
917                 struct inet6_ifaddr *ifp;
918                 struct flowi6 fl_gw6;
919                 int redirect;
920
921                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
922                                       (*dst)->dev, 1);
923
924                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
925                 if (ifp)
926                         in6_ifa_put(ifp);
927
928                 if (redirect) {
929                         /*
930                          * We need to get the dst entry for the
931                          * default router instead
932                          */
933                         dst_release(*dst);
934                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
935                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
936                         *dst = ip6_route_output(net, sk, &fl_gw6);
937                         err = (*dst)->error;
938                         if (err)
939                                 goto out_err_release;
940                 }
941         }
942 #endif
943
944         return 0;
945
946 out_err_release:
947         if (err == -ENETUNREACH)
948                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
949         dst_release(*dst);
950         *dst = NULL;
951         return err;
952 }
953
954 /**
955  *      ip6_dst_lookup - perform route lookup on flow
956  *      @sk: socket which provides route info
957  *      @dst: pointer to dst_entry * for result
958  *      @fl6: flow to lookup
959  *
960  *      This function performs a route lookup on the given flow.
961  *
962  *      It returns zero on success, or a standard errno code on error.
963  */
964 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
965 {
966         *dst = NULL;
967         return ip6_dst_lookup_tail(sk, dst, fl6);
968 }
969 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
970
971 /**
972  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
973  *      @sk: socket which provides route info
974  *      @fl6: flow to lookup
975  *      @final_dst: final destination address for ipsec lookup
976  *
977  *      This function performs a route lookup on the given flow.
978  *
979  *      It returns a valid dst pointer on success, or a pointer encoded
980  *      error code.
981  */
982 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
983                                       const struct in6_addr *final_dst)
984 {
985         struct dst_entry *dst = NULL;
986         int err;
987
988         err = ip6_dst_lookup_tail(sk, &dst, fl6);
989         if (err)
990                 return ERR_PTR(err);
991         if (final_dst)
992                 fl6->daddr = *final_dst;
993
994         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
995 }
996 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
997
998 /**
999  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1000  *      @sk: socket which provides the dst cache and route info
1001  *      @fl6: flow to lookup
1002  *      @final_dst: final destination address for ipsec lookup
1003  *
1004  *      This function performs a route lookup on the given flow with the
1005  *      possibility of using the cached route in the socket if it is valid.
1006  *      It will take the socket dst lock when operating on the dst cache.
1007  *      As a result, this function can only be used in process context.
1008  *
1009  *      It returns a valid dst pointer on success, or a pointer encoded
1010  *      error code.
1011  */
1012 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1013                                          const struct in6_addr *final_dst)
1014 {
1015         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1016         int err;
1017
1018         dst = ip6_sk_dst_check(sk, dst, fl6);
1019
1020         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1021         if (err)
1022                 return ERR_PTR(err);
1023         if (final_dst)
1024                 fl6->daddr = *final_dst;
1025
1026         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1027 }
1028 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1029
1030 static inline int ip6_ufo_append_data(struct sock *sk,
1031                         struct sk_buff_head *queue,
1032                         int getfrag(void *from, char *to, int offset, int len,
1033                         int odd, struct sk_buff *skb),
1034                         void *from, int length, int hh_len, int fragheaderlen,
1035                         int transhdrlen, int mtu, unsigned int flags,
1036                         struct rt6_info *rt)
1037
1038 {
1039         struct sk_buff *skb;
1040         struct frag_hdr fhdr;
1041         int err;
1042
1043         /* There is support for UDP large send offload by network
1044          * device, so create one single skb packet containing complete
1045          * udp datagram
1046          */
1047         skb = skb_peek_tail(queue);
1048         if (skb == NULL) {
1049                 skb = sock_alloc_send_skb(sk,
1050                         hh_len + fragheaderlen + transhdrlen + 20,
1051                         (flags & MSG_DONTWAIT), &err);
1052                 if (skb == NULL)
1053                         return err;
1054
1055                 /* reserve space for Hardware header */
1056                 skb_reserve(skb, hh_len);
1057
1058                 /* create space for UDP/IP header */
1059                 skb_put(skb, fragheaderlen + transhdrlen);
1060
1061                 /* initialize network header pointer */
1062                 skb_reset_network_header(skb);
1063
1064                 /* initialize protocol header pointer */
1065                 skb->transport_header = skb->network_header + fragheaderlen;
1066
1067                 skb->protocol = htons(ETH_P_IPV6);
1068                 skb->csum = 0;
1069
1070                 __skb_queue_tail(queue, skb);
1071         } else if (skb_is_gso(skb)) {
1072                 goto append;
1073         }
1074
1075         skb->ip_summed = CHECKSUM_PARTIAL;
1076         /* Specify the length of each IPv6 datagram fragment.
1077          * It has to be a multiple of 8.
1078          */
1079         skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1080                                      sizeof(struct frag_hdr)) & ~7;
1081         skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1082         ipv6_select_ident(&fhdr, rt);
1083         skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1084
1085 append:
1086         return skb_append_datato_frags(sk, skb, getfrag, from,
1087                                        (length - transhdrlen));
1088 }
1089
1090 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1091                                                gfp_t gfp)
1092 {
1093         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1094 }
1095
1096 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1097                                                 gfp_t gfp)
1098 {
1099         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1100 }
1101
1102 static void ip6_append_data_mtu(unsigned int *mtu,
1103                                 int *maxfraglen,
1104                                 unsigned int fragheaderlen,
1105                                 struct sk_buff *skb,
1106                                 struct rt6_info *rt,
1107                                 unsigned int orig_mtu)
1108 {
1109         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1110                 if (skb == NULL) {
1111                         /* first fragment, reserve header_len */
1112                         *mtu = orig_mtu - rt->dst.header_len;
1113
1114                 } else {
1115                         /*
1116                          * this fragment is not first, the headers
1117                          * space is regarded as data space.
1118                          */
1119                         *mtu = orig_mtu;
1120                 }
1121                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1122                               + fragheaderlen - sizeof(struct frag_hdr);
1123         }
1124 }
1125
1126 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1127                           struct inet6_cork *v6_cork,
1128                           int hlimit, int tclass, struct ipv6_txoptions *opt,
1129                           struct rt6_info *rt, struct flowi6 *fl6)
1130 {
1131         struct ipv6_pinfo *np = inet6_sk(sk);
1132         unsigned int mtu;
1133
1134         /*
1135          * setup for corking
1136          */
1137         if (opt) {
1138                 if (WARN_ON(v6_cork->opt))
1139                         return -EINVAL;
1140
1141                 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1142                 if (unlikely(v6_cork->opt == NULL))
1143                         return -ENOBUFS;
1144
1145                 v6_cork->opt->tot_len = opt->tot_len;
1146                 v6_cork->opt->opt_flen = opt->opt_flen;
1147                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1148
1149                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1150                                                     sk->sk_allocation);
1151                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1152                         return -ENOBUFS;
1153
1154                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1155                                                     sk->sk_allocation);
1156                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1157                         return -ENOBUFS;
1158
1159                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1160                                                    sk->sk_allocation);
1161                 if (opt->hopopt && !v6_cork->opt->hopopt)
1162                         return -ENOBUFS;
1163
1164                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1165                                                     sk->sk_allocation);
1166                 if (opt->srcrt && !v6_cork->opt->srcrt)
1167                         return -ENOBUFS;
1168
1169                 /* need source address above miyazawa*/
1170         }
1171         dst_hold(&rt->dst);
1172         cork->base.dst = &rt->dst;
1173         cork->fl.u.ip6 = *fl6;
1174         v6_cork->hop_limit = hlimit;
1175         v6_cork->tclass = tclass;
1176         if (rt->dst.flags & DST_XFRM_TUNNEL)
1177                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1178                       rt->dst.dev->mtu : dst_mtu(&rt->dst);
1179         else
1180                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1181                       rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1182         if (np->frag_size < mtu) {
1183                 if (np->frag_size)
1184                         mtu = np->frag_size;
1185         }
1186         cork->base.fragsize = mtu;
1187         if (dst_allfrag(rt->dst.path))
1188                 cork->base.flags |= IPCORK_ALLFRAG;
1189         cork->base.length = 0;
1190
1191         return 0;
1192 }
1193
1194 static int __ip6_append_data(struct sock *sk,
1195                              struct flowi6 *fl6,
1196                              struct sk_buff_head *queue,
1197                              struct inet_cork *cork,
1198                              struct inet6_cork *v6_cork,
1199                              struct page_frag *pfrag,
1200                              int getfrag(void *from, char *to, int offset,
1201                                          int len, int odd, struct sk_buff *skb),
1202                              void *from, int length, int transhdrlen,
1203                              unsigned int flags, int dontfrag)
1204 {
1205         struct sk_buff *skb, *skb_prev = NULL;
1206         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1207         int exthdrlen = 0;
1208         int dst_exthdrlen = 0;
1209         int hh_len;
1210         int copy;
1211         int err;
1212         int offset = 0;
1213         __u8 tx_flags = 0;
1214         u32 tskey = 0;
1215         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1216         struct ipv6_txoptions *opt = v6_cork->opt;
1217         int csummode = CHECKSUM_NONE;
1218
1219         skb = skb_peek_tail(queue);
1220         if (!skb) {
1221                 exthdrlen = opt ? opt->opt_flen : 0;
1222                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1223         }
1224
1225         mtu = cork->fragsize;
1226         orig_mtu = mtu;
1227
1228         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1229
1230         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1231                         (opt ? opt->opt_nflen : 0);
1232         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1233                      sizeof(struct frag_hdr);
1234
1235         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1236                 unsigned int maxnonfragsize, headersize;
1237
1238                 headersize = sizeof(struct ipv6hdr) +
1239                              (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1240                              (dst_allfrag(&rt->dst) ?
1241                               sizeof(struct frag_hdr) : 0) +
1242                              rt->rt6i_nfheader_len;
1243
1244                 if (ip6_sk_ignore_df(sk))
1245                         maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1246                 else
1247                         maxnonfragsize = mtu;
1248
1249                 /* dontfrag active */
1250                 if ((cork->length + length > mtu - headersize) && dontfrag &&
1251                     (sk->sk_protocol == IPPROTO_UDP ||
1252                      sk->sk_protocol == IPPROTO_RAW)) {
1253                         ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1254                                                    sizeof(struct ipv6hdr));
1255                         goto emsgsize;
1256                 }
1257
1258                 if (cork->length + length > maxnonfragsize - headersize) {
1259 emsgsize:
1260                         ipv6_local_error(sk, EMSGSIZE, fl6,
1261                                          mtu - headersize +
1262                                          sizeof(struct ipv6hdr));
1263                         return -EMSGSIZE;
1264                 }
1265         }
1266
1267         if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1268                 sock_tx_timestamp(sk, &tx_flags);
1269                 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1270                     sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1271                         tskey = sk->sk_tskey++;
1272         }
1273
1274         /* If this is the first and only packet and device
1275          * supports checksum offloading, let's use it.
1276          */
1277         if (!skb && sk->sk_protocol == IPPROTO_UDP &&
1278             length + fragheaderlen < mtu &&
1279             rt->dst.dev->features & NETIF_F_V6_CSUM &&
1280             !exthdrlen)
1281                 csummode = CHECKSUM_PARTIAL;
1282         /*
1283          * Let's try using as much space as possible.
1284          * Use MTU if total length of the message fits into the MTU.
1285          * Otherwise, we need to reserve fragment header and
1286          * fragment alignment (= 8-15 octects, in total).
1287          *
1288          * Note that we may need to "move" the data from the tail of
1289          * of the buffer to the new fragment when we split
1290          * the message.
1291          *
1292          * FIXME: It may be fragmented into multiple chunks
1293          *        at once if non-fragmentable extension headers
1294          *        are too large.
1295          * --yoshfuji
1296          */
1297
1298         cork->length += length;
1299         if (((length > mtu) ||
1300              (skb && skb_is_gso(skb))) &&
1301             (sk->sk_protocol == IPPROTO_UDP) &&
1302             (rt->dst.dev->features & NETIF_F_UFO) &&
1303             (sk->sk_type == SOCK_DGRAM)) {
1304                 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1305                                           hh_len, fragheaderlen,
1306                                           transhdrlen, mtu, flags, rt);
1307                 if (err)
1308                         goto error;
1309                 return 0;
1310         }
1311
1312         if (!skb)
1313                 goto alloc_new_skb;
1314
1315         while (length > 0) {
1316                 /* Check if the remaining data fits into current packet. */
1317                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1318                 if (copy < length)
1319                         copy = maxfraglen - skb->len;
1320
1321                 if (copy <= 0) {
1322                         char *data;
1323                         unsigned int datalen;
1324                         unsigned int fraglen;
1325                         unsigned int fraggap;
1326                         unsigned int alloclen;
1327 alloc_new_skb:
1328                         /* There's no room in the current skb */
1329                         if (skb)
1330                                 fraggap = skb->len - maxfraglen;
1331                         else
1332                                 fraggap = 0;
1333                         /* update mtu and maxfraglen if necessary */
1334                         if (skb == NULL || skb_prev == NULL)
1335                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1336                                                     fragheaderlen, skb, rt,
1337                                                     orig_mtu);
1338
1339                         skb_prev = skb;
1340
1341                         /*
1342                          * If remaining data exceeds the mtu,
1343                          * we know we need more fragment(s).
1344                          */
1345                         datalen = length + fraggap;
1346
1347                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1348                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1349                         if ((flags & MSG_MORE) &&
1350                             !(rt->dst.dev->features&NETIF_F_SG))
1351                                 alloclen = mtu;
1352                         else
1353                                 alloclen = datalen + fragheaderlen;
1354
1355                         alloclen += dst_exthdrlen;
1356
1357                         if (datalen != length + fraggap) {
1358                                 /*
1359                                  * this is not the last fragment, the trailer
1360                                  * space is regarded as data space.
1361                                  */
1362                                 datalen += rt->dst.trailer_len;
1363                         }
1364
1365                         alloclen += rt->dst.trailer_len;
1366                         fraglen = datalen + fragheaderlen;
1367
1368                         /*
1369                          * We just reserve space for fragment header.
1370                          * Note: this may be overallocation if the message
1371                          * (without MSG_MORE) fits into the MTU.
1372                          */
1373                         alloclen += sizeof(struct frag_hdr);
1374
1375                         if (transhdrlen) {
1376                                 skb = sock_alloc_send_skb(sk,
1377                                                 alloclen + hh_len,
1378                                                 (flags & MSG_DONTWAIT), &err);
1379                         } else {
1380                                 skb = NULL;
1381                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1382                                     2 * sk->sk_sndbuf)
1383                                         skb = sock_wmalloc(sk,
1384                                                            alloclen + hh_len, 1,
1385                                                            sk->sk_allocation);
1386                                 if (unlikely(skb == NULL))
1387                                         err = -ENOBUFS;
1388                         }
1389                         if (skb == NULL)
1390                                 goto error;
1391                         /*
1392                          *      Fill in the control structures
1393                          */
1394                         skb->protocol = htons(ETH_P_IPV6);
1395                         skb->ip_summed = csummode;
1396                         skb->csum = 0;
1397                         /* reserve for fragmentation and ipsec header */
1398                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1399                                     dst_exthdrlen);
1400
1401                         /* Only the initial fragment is time stamped */
1402                         skb_shinfo(skb)->tx_flags = tx_flags;
1403                         tx_flags = 0;
1404                         skb_shinfo(skb)->tskey = tskey;
1405                         tskey = 0;
1406
1407                         /*
1408                          *      Find where to start putting bytes
1409                          */
1410                         data = skb_put(skb, fraglen);
1411                         skb_set_network_header(skb, exthdrlen);
1412                         data += fragheaderlen;
1413                         skb->transport_header = (skb->network_header +
1414                                                  fragheaderlen);
1415                         if (fraggap) {
1416                                 skb->csum = skb_copy_and_csum_bits(
1417                                         skb_prev, maxfraglen,
1418                                         data + transhdrlen, fraggap, 0);
1419                                 skb_prev->csum = csum_sub(skb_prev->csum,
1420                                                           skb->csum);
1421                                 data += fraggap;
1422                                 pskb_trim_unique(skb_prev, maxfraglen);
1423                         }
1424                         copy = datalen - transhdrlen - fraggap;
1425
1426                         if (copy < 0) {
1427                                 err = -EINVAL;
1428                                 kfree_skb(skb);
1429                                 goto error;
1430                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1431                                 err = -EFAULT;
1432                                 kfree_skb(skb);
1433                                 goto error;
1434                         }
1435
1436                         offset += copy;
1437                         length -= datalen - fraggap;
1438                         transhdrlen = 0;
1439                         exthdrlen = 0;
1440                         dst_exthdrlen = 0;
1441
1442                         /*
1443                          * Put the packet on the pending queue
1444                          */
1445                         __skb_queue_tail(queue, skb);
1446                         continue;
1447                 }
1448
1449                 if (copy > length)
1450                         copy = length;
1451
1452                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1453                         unsigned int off;
1454
1455                         off = skb->len;
1456                         if (getfrag(from, skb_put(skb, copy),
1457                                                 offset, copy, off, skb) < 0) {
1458                                 __skb_trim(skb, off);
1459                                 err = -EFAULT;
1460                                 goto error;
1461                         }
1462                 } else {
1463                         int i = skb_shinfo(skb)->nr_frags;
1464
1465                         err = -ENOMEM;
1466                         if (!sk_page_frag_refill(sk, pfrag))
1467                                 goto error;
1468
1469                         if (!skb_can_coalesce(skb, i, pfrag->page,
1470                                               pfrag->offset)) {
1471                                 err = -EMSGSIZE;
1472                                 if (i == MAX_SKB_FRAGS)
1473                                         goto error;
1474
1475                                 __skb_fill_page_desc(skb, i, pfrag->page,
1476                                                      pfrag->offset, 0);
1477                                 skb_shinfo(skb)->nr_frags = ++i;
1478                                 get_page(pfrag->page);
1479                         }
1480                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1481                         if (getfrag(from,
1482                                     page_address(pfrag->page) + pfrag->offset,
1483                                     offset, copy, skb->len, skb) < 0)
1484                                 goto error_efault;
1485
1486                         pfrag->offset += copy;
1487                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1488                         skb->len += copy;
1489                         skb->data_len += copy;
1490                         skb->truesize += copy;
1491                         atomic_add(copy, &sk->sk_wmem_alloc);
1492                 }
1493                 offset += copy;
1494                 length -= copy;
1495         }
1496
1497         return 0;
1498
1499 error_efault:
1500         err = -EFAULT;
1501 error:
1502         cork->length -= length;
1503         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1504         return err;
1505 }
1506
1507 int ip6_append_data(struct sock *sk,
1508                     int getfrag(void *from, char *to, int offset, int len,
1509                                 int odd, struct sk_buff *skb),
1510                     void *from, int length, int transhdrlen, int hlimit,
1511                     int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1512                     struct rt6_info *rt, unsigned int flags, int dontfrag)
1513 {
1514         struct inet_sock *inet = inet_sk(sk);
1515         struct ipv6_pinfo *np = inet6_sk(sk);
1516         int exthdrlen;
1517         int err;
1518
1519         if (flags&MSG_PROBE)
1520                 return 0;
1521         if (skb_queue_empty(&sk->sk_write_queue)) {
1522                 /*
1523                  * setup for corking
1524                  */
1525                 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1526                                      tclass, opt, rt, fl6);
1527                 if (err)
1528                         return err;
1529
1530                 exthdrlen = (opt ? opt->opt_flen : 0);
1531                 length += exthdrlen;
1532                 transhdrlen += exthdrlen;
1533         } else {
1534                 fl6 = &inet->cork.fl.u.ip6;
1535                 transhdrlen = 0;
1536         }
1537
1538         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1539                                  &np->cork, sk_page_frag(sk), getfrag,
1540                                  from, length, transhdrlen, flags, dontfrag);
1541 }
1542 EXPORT_SYMBOL_GPL(ip6_append_data);
1543
1544 static void ip6_cork_release(struct inet_cork_full *cork,
1545                              struct inet6_cork *v6_cork)
1546 {
1547         if (v6_cork->opt) {
1548                 kfree(v6_cork->opt->dst0opt);
1549                 kfree(v6_cork->opt->dst1opt);
1550                 kfree(v6_cork->opt->hopopt);
1551                 kfree(v6_cork->opt->srcrt);
1552                 kfree(v6_cork->opt);
1553                 v6_cork->opt = NULL;
1554         }
1555
1556         if (cork->base.dst) {
1557                 dst_release(cork->base.dst);
1558                 cork->base.dst = NULL;
1559                 cork->base.flags &= ~IPCORK_ALLFRAG;
1560         }
1561         memset(&cork->fl, 0, sizeof(cork->fl));
1562 }
1563
1564 struct sk_buff *__ip6_make_skb(struct sock *sk,
1565                                struct sk_buff_head *queue,
1566                                struct inet_cork_full *cork,
1567                                struct inet6_cork *v6_cork)
1568 {
1569         struct sk_buff *skb, *tmp_skb;
1570         struct sk_buff **tail_skb;
1571         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1572         struct ipv6_pinfo *np = inet6_sk(sk);
1573         struct net *net = sock_net(sk);
1574         struct ipv6hdr *hdr;
1575         struct ipv6_txoptions *opt = v6_cork->opt;
1576         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1577         struct flowi6 *fl6 = &cork->fl.u.ip6;
1578         unsigned char proto = fl6->flowi6_proto;
1579
1580         skb = __skb_dequeue(queue);
1581         if (skb == NULL)
1582                 goto out;
1583         tail_skb = &(skb_shinfo(skb)->frag_list);
1584
1585         /* move skb->data to ip header from ext header */
1586         if (skb->data < skb_network_header(skb))
1587                 __skb_pull(skb, skb_network_offset(skb));
1588         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1589                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1590                 *tail_skb = tmp_skb;
1591                 tail_skb = &(tmp_skb->next);
1592                 skb->len += tmp_skb->len;
1593                 skb->data_len += tmp_skb->len;
1594                 skb->truesize += tmp_skb->truesize;
1595                 tmp_skb->destructor = NULL;
1596                 tmp_skb->sk = NULL;
1597         }
1598
1599         /* Allow local fragmentation. */
1600         skb->ignore_df = ip6_sk_ignore_df(sk);
1601
1602         *final_dst = fl6->daddr;
1603         __skb_pull(skb, skb_network_header_len(skb));
1604         if (opt && opt->opt_flen)
1605                 ipv6_push_frag_opts(skb, opt, &proto);
1606         if (opt && opt->opt_nflen)
1607                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1608
1609         skb_push(skb, sizeof(struct ipv6hdr));
1610         skb_reset_network_header(skb);
1611         hdr = ipv6_hdr(skb);
1612
1613         ip6_flow_hdr(hdr, v6_cork->tclass,
1614                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1615                                         np->autoflowlabel));
1616         hdr->hop_limit = v6_cork->hop_limit;
1617         hdr->nexthdr = proto;
1618         hdr->saddr = fl6->saddr;
1619         hdr->daddr = *final_dst;
1620
1621         skb->priority = sk->sk_priority;
1622         skb->mark = sk->sk_mark;
1623
1624         skb_dst_set(skb, dst_clone(&rt->dst));
1625         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1626         if (proto == IPPROTO_ICMPV6) {
1627                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1628
1629                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1630                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1631         }
1632
1633         ip6_cork_release(cork, v6_cork);
1634 out:
1635         return skb;
1636 }
1637
1638 int ip6_send_skb(struct sk_buff *skb)
1639 {
1640         struct net *net = sock_net(skb->sk);
1641         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1642         int err;
1643
1644         err = ip6_local_out(skb);
1645         if (err) {
1646                 if (err > 0)
1647                         err = net_xmit_errno(err);
1648                 if (err)
1649                         IP6_INC_STATS(net, rt->rt6i_idev,
1650                                       IPSTATS_MIB_OUTDISCARDS);
1651         }
1652
1653         return err;
1654 }
1655
1656 int ip6_push_pending_frames(struct sock *sk)
1657 {
1658         struct sk_buff *skb;
1659
1660         skb = ip6_finish_skb(sk);
1661         if (!skb)
1662                 return 0;
1663
1664         return ip6_send_skb(skb);
1665 }
1666 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1667
1668 static void __ip6_flush_pending_frames(struct sock *sk,
1669                                        struct sk_buff_head *queue,
1670                                        struct inet_cork_full *cork,
1671                                        struct inet6_cork *v6_cork)
1672 {
1673         struct sk_buff *skb;
1674
1675         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1676                 if (skb_dst(skb))
1677                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1678                                       IPSTATS_MIB_OUTDISCARDS);
1679                 kfree_skb(skb);
1680         }
1681
1682         ip6_cork_release(cork, v6_cork);
1683 }
1684
1685 void ip6_flush_pending_frames(struct sock *sk)
1686 {
1687         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1688                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1689 }
1690 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1691
1692 struct sk_buff *ip6_make_skb(struct sock *sk,
1693                              int getfrag(void *from, char *to, int offset,
1694                                          int len, int odd, struct sk_buff *skb),
1695                              void *from, int length, int transhdrlen,
1696                              int hlimit, int tclass,
1697                              struct ipv6_txoptions *opt, struct flowi6 *fl6,
1698                              struct rt6_info *rt, unsigned int flags,
1699                              int dontfrag)
1700 {
1701         struct inet_cork_full cork;
1702         struct inet6_cork v6_cork;
1703         struct sk_buff_head queue;
1704         int exthdrlen = (opt ? opt->opt_flen : 0);
1705         int err;
1706
1707         if (flags & MSG_PROBE)
1708                 return NULL;
1709
1710         __skb_queue_head_init(&queue);
1711
1712         cork.base.flags = 0;
1713         cork.base.addr = 0;
1714         cork.base.opt = NULL;
1715         v6_cork.opt = NULL;
1716         err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1717         if (err)
1718                 return ERR_PTR(err);
1719
1720         if (dontfrag < 0)
1721                 dontfrag = inet6_sk(sk)->dontfrag;
1722
1723         err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1724                                 &current->task_frag, getfrag, from,
1725                                 length + exthdrlen, transhdrlen + exthdrlen,
1726                                 flags, dontfrag);
1727         if (err) {
1728                 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1729                 return ERR_PTR(err);
1730         }
1731
1732         return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1733 }