Showing error 1891

User: Jiri Slaby
Error type: Invalid Pointer Dereference
Error type description: A pointer which is invalid is being dereferenced
File location: net/ipv6/ip6_output.c
Line in file: 272
Project: Linux Kernel
Project version: 2.6.28
Tools: Smatch (1.59)
Entered: 2013-09-11 08:47:26 UTC


Source:

   1/*
   2 *        IPv6 output functions
   3 *        Linux INET6 implementation
   4 *
   5 *        Authors:
   6 *        Pedro Roque                <roque@di.fc.ul.pt>
   7 *
   8 *        Based on linux/net/ipv4/ip_output.c
   9 *
  10 *        This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *        Changes:
  16 *        A.N.Kuznetsov        :        airthmetics in fragmentation.
  17 *                                extension headers are implemented.
  18 *                                route changes now work.
  19 *                                ip6_forward does not confuse sniffers.
  20 *                                etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *        Imran Patel        :         frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *                        :       add ip6_append_data and related functions
  26 *                                for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40
  41#include <linux/netfilter.h>
  42#include <linux/netfilter_ipv6.h>
  43
  44#include <net/sock.h>
  45#include <net/snmp.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ndisc.h>
  49#include <net/protocol.h>
  50#include <net/ip6_route.h>
  51#include <net/addrconf.h>
  52#include <net/rawv6.h>
  53#include <net/icmp.h>
  54#include <net/xfrm.h>
  55#include <net/checksum.h>
  56#include <linux/mroute6.h>
  57
  58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  59
  60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  61{
  62        static u32 ipv6_fragmentation_id = 1;
  63        static DEFINE_SPINLOCK(ip6_id_lock);
  64
  65        spin_lock_bh(&ip6_id_lock);
  66        fhdr->identification = htonl(ipv6_fragmentation_id);
  67        if (++ipv6_fragmentation_id == 0)
  68                ipv6_fragmentation_id = 1;
  69        spin_unlock_bh(&ip6_id_lock);
  70}
  71
  72int __ip6_local_out(struct sk_buff *skb)
  73{
  74        int len;
  75
  76        len = skb->len - sizeof(struct ipv6hdr);
  77        if (len > IPV6_MAXPLEN)
  78                len = 0;
  79        ipv6_hdr(skb)->payload_len = htons(len);
  80
  81        return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
  82                       dst_output);
  83}
  84
  85int ip6_local_out(struct sk_buff *skb)
  86{
  87        int err;
  88
  89        err = __ip6_local_out(skb);
  90        if (likely(err == 1))
  91                err = dst_output(skb);
  92
  93        return err;
  94}
  95EXPORT_SYMBOL_GPL(ip6_local_out);
  96
  97static int ip6_output_finish(struct sk_buff *skb)
  98{
  99        struct dst_entry *dst = skb->dst;
 100
 101        if (dst->hh)
 102                return neigh_hh_output(dst->hh, skb);
 103        else if (dst->neighbour)
 104                return dst->neighbour->output(skb);
 105
 106        IP6_INC_STATS_BH(dev_net(dst->dev),
 107                         ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 108        kfree_skb(skb);
 109        return -EINVAL;
 110
 111}
 112
 113/* dev_loopback_xmit for use with netfilter. */
 114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 115{
 116        skb_reset_mac_header(newskb);
 117        __skb_pull(newskb, skb_network_offset(newskb));
 118        newskb->pkt_type = PACKET_LOOPBACK;
 119        newskb->ip_summed = CHECKSUM_UNNECESSARY;
 120        WARN_ON(!newskb->dst);
 121
 122        netif_rx(newskb);
 123        return 0;
 124}
 125
 126
 127static int ip6_output2(struct sk_buff *skb)
 128{
 129        struct dst_entry *dst = skb->dst;
 130        struct net_device *dev = dst->dev;
 131
 132        skb->protocol = htons(ETH_P_IPV6);
 133        skb->dev = dev;
 134
 135        if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 136                struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 137                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 138
 139                if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 140                    ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 141                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 142                                         &ipv6_hdr(skb)->saddr))) {
 143                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 144
 145                        /* Do not check for IFF_ALLMULTI; multicast routing
 146                           is not supported in any case.
 147                         */
 148                        if (newskb)
 149                                NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
 150                                        NULL, newskb->dev,
 151                                        ip6_dev_loopback_xmit);
 152
 153                        if (ipv6_hdr(skb)->hop_limit == 0) {
 154                                IP6_INC_STATS(dev_net(dev), idev,
 155                                              IPSTATS_MIB_OUTDISCARDS);
 156                                kfree_skb(skb);
 157                                return 0;
 158                        }
 159                }
 160
 161                IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
 162        }
 163
 164        return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 165                       ip6_output_finish);
 166}
 167
 168static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 169{
 170        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 171
 172        return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 173               skb->dst->dev->mtu : dst_mtu(skb->dst);
 174}
 175
 176int ip6_output(struct sk_buff *skb)
 177{
 178        struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 179        if (unlikely(idev->cnf.disable_ipv6)) {
 180                IP6_INC_STATS(dev_net(skb->dst->dev), idev,
 181                              IPSTATS_MIB_OUTDISCARDS);
 182                kfree_skb(skb);
 183                return 0;
 184        }
 185
 186        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 187                                dst_allfrag(skb->dst))
 188                return ip6_fragment(skb, ip6_output2);
 189        else
 190                return ip6_output2(skb);
 191}
 192
 193/*
 194 *        xmit an sk_buff (used by TCP)
 195 */
 196
 197int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 198             struct ipv6_txoptions *opt, int ipfragok)
 199{
 200        struct net *net = sock_net(sk);
 201        struct ipv6_pinfo *np = inet6_sk(sk);
 202        struct in6_addr *first_hop = &fl->fl6_dst;
 203        struct dst_entry *dst = skb->dst;
 204        struct ipv6hdr *hdr;
 205        u8  proto = fl->proto;
 206        int seg_len = skb->len;
 207        int hlimit, tclass;
 208        u32 mtu;
 209
 210        if (opt) {
 211                unsigned int head_room;
 212
 213                /* First: exthdrs may take lots of space (~8K for now)
 214                   MAX_HEADER is not enough.
 215                 */
 216                head_room = opt->opt_nflen + opt->opt_flen;
 217                seg_len += head_room;
 218                head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 219
 220                if (skb_headroom(skb) < head_room) {
 221                        struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 222                        if (skb2 == NULL) {
 223                                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 224                                              IPSTATS_MIB_OUTDISCARDS);
 225                                kfree_skb(skb);
 226                                return -ENOBUFS;
 227                        }
 228                        kfree_skb(skb);
 229                        skb = skb2;
 230                        if (sk)
 231                                skb_set_owner_w(skb, sk);
 232                }
 233                if (opt->opt_flen)
 234                        ipv6_push_frag_opts(skb, opt, &proto);
 235                if (opt->opt_nflen)
 236                        ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 237        }
 238
 239        skb_push(skb, sizeof(struct ipv6hdr));
 240        skb_reset_network_header(skb);
 241        hdr = ipv6_hdr(skb);
 242
 243        /* Allow local fragmentation. */
 244        if (ipfragok)
 245                skb->local_df = 1;
 246
 247        /*
 248         *        Fill in the IPv6 header
 249         */
 250
 251        hlimit = -1;
 252        if (np)
 253                hlimit = np->hop_limit;
 254        if (hlimit < 0)
 255                hlimit = ip6_dst_hoplimit(dst);
 256
 257        tclass = -1;
 258        if (np)
 259                tclass = np->tclass;
 260        if (tclass < 0)
 261                tclass = 0;
 262
 263        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 264
 265        hdr->payload_len = htons(seg_len);
 266        hdr->nexthdr = proto;
 267        hdr->hop_limit = hlimit;
 268
 269        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 270        ipv6_addr_copy(&hdr->daddr, first_hop);
 271
 272        skb->priority = sk->sk_priority;
 273        skb->mark = sk->sk_mark;
 274
 275        mtu = dst_mtu(dst);
 276        if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 277                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 278                              IPSTATS_MIB_OUTREQUESTS);
 279                return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 280                                dst_output);
 281        }
 282
 283        if (net_ratelimit())
 284                printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 285        skb->dev = dst->dev;
 286        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 287        IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 288        kfree_skb(skb);
 289        return -EMSGSIZE;
 290}
 291
 292EXPORT_SYMBOL(ip6_xmit);
 293
 294/*
 295 *        To avoid extra problems ND packets are send through this
 296 *        routine. It's code duplication but I really want to avoid
 297 *        extra checks since ipv6_build_header is used by TCP (which
 298 *        is for us performance critical)
 299 */
 300
 301int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 302               const struct in6_addr *saddr, const struct in6_addr *daddr,
 303               int proto, int len)
 304{
 305        struct ipv6_pinfo *np = inet6_sk(sk);
 306        struct ipv6hdr *hdr;
 307        int totlen;
 308
 309        skb->protocol = htons(ETH_P_IPV6);
 310        skb->dev = dev;
 311
 312        totlen = len + sizeof(struct ipv6hdr);
 313
 314        skb_reset_network_header(skb);
 315        skb_put(skb, sizeof(struct ipv6hdr));
 316        hdr = ipv6_hdr(skb);
 317
 318        *(__be32*)hdr = htonl(0x60000000);
 319
 320        hdr->payload_len = htons(len);
 321        hdr->nexthdr = proto;
 322        hdr->hop_limit = np->hop_limit;
 323
 324        ipv6_addr_copy(&hdr->saddr, saddr);
 325        ipv6_addr_copy(&hdr->daddr, daddr);
 326
 327        return 0;
 328}
 329
 330static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 331{
 332        struct ip6_ra_chain *ra;
 333        struct sock *last = NULL;
 334
 335        read_lock(&ip6_ra_lock);
 336        for (ra = ip6_ra_chain; ra; ra = ra->next) {
 337                struct sock *sk = ra->sk;
 338                if (sk && ra->sel == sel &&
 339                    (!sk->sk_bound_dev_if ||
 340                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 341                        if (last) {
 342                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 343                                if (skb2)
 344                                        rawv6_rcv(last, skb2);
 345                        }
 346                        last = sk;
 347                }
 348        }
 349
 350        if (last) {
 351                rawv6_rcv(last, skb);
 352                read_unlock(&ip6_ra_lock);
 353                return 1;
 354        }
 355        read_unlock(&ip6_ra_lock);
 356        return 0;
 357}
 358
 359static int ip6_forward_proxy_check(struct sk_buff *skb)
 360{
 361        struct ipv6hdr *hdr = ipv6_hdr(skb);
 362        u8 nexthdr = hdr->nexthdr;
 363        int offset;
 364
 365        if (ipv6_ext_hdr(nexthdr)) {
 366                offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 367                if (offset < 0)
 368                        return 0;
 369        } else
 370                offset = sizeof(struct ipv6hdr);
 371
 372        if (nexthdr == IPPROTO_ICMPV6) {
 373                struct icmp6hdr *icmp6;
 374
 375                if (!pskb_may_pull(skb, (skb_network_header(skb) +
 376                                         offset + 1 - skb->data)))
 377                        return 0;
 378
 379                icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 380
 381                switch (icmp6->icmp6_type) {
 382                case NDISC_ROUTER_SOLICITATION:
 383                case NDISC_ROUTER_ADVERTISEMENT:
 384                case NDISC_NEIGHBOUR_SOLICITATION:
 385                case NDISC_NEIGHBOUR_ADVERTISEMENT:
 386                case NDISC_REDIRECT:
 387                        /* For reaction involving unicast neighbor discovery
 388                         * message destined to the proxied address, pass it to
 389                         * input function.
 390                         */
 391                        return 1;
 392                default:
 393                        break;
 394                }
 395        }
 396
 397        /*
 398         * The proxying router can't forward traffic sent to a link-local
 399         * address, so signal the sender and discard the packet. This
 400         * behavior is clarified by the MIPv6 specification.
 401         */
 402        if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 403                dst_link_failure(skb);
 404                return -1;
 405        }
 406
 407        return 0;
 408}
 409
 410static inline int ip6_forward_finish(struct sk_buff *skb)
 411{
 412        return dst_output(skb);
 413}
 414
 415int ip6_forward(struct sk_buff *skb)
 416{
 417        struct dst_entry *dst = skb->dst;
 418        struct ipv6hdr *hdr = ipv6_hdr(skb);
 419        struct inet6_skb_parm *opt = IP6CB(skb);
 420        struct net *net = dev_net(dst->dev);
 421
 422        if (net->ipv6.devconf_all->forwarding == 0)
 423                goto error;
 424
 425        if (skb_warn_if_lro(skb))
 426                goto drop;
 427
 428        if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 429                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 430                goto drop;
 431        }
 432
 433        skb_forward_csum(skb);
 434
 435        /*
 436         *        We DO NOT make any processing on
 437         *        RA packets, pushing them to user level AS IS
 438         *        without ane WARRANTY that application will be able
 439         *        to interpret them. The reason is that we
 440         *        cannot make anything clever here.
 441         *
 442         *        We are not end-node, so that if packet contains
 443         *        AH/ESP, we cannot make anything.
 444         *        Defragmentation also would be mistake, RA packets
 445         *        cannot be fragmented, because there is no warranty
 446         *        that different fragments will go along one path. --ANK
 447         */
 448        if (opt->ra) {
 449                u8 *ptr = skb_network_header(skb) + opt->ra;
 450                if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 451                        return 0;
 452        }
 453
 454        /*
 455         *        check and decrement ttl
 456         */
 457        if (hdr->hop_limit <= 1) {
 458                /* Force OUTPUT device used as source address */
 459                skb->dev = dst->dev;
 460                icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 461                            0, skb->dev);
 462                IP6_INC_STATS_BH(net,
 463                                 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 464
 465                kfree_skb(skb);
 466                return -ETIMEDOUT;
 467        }
 468
 469        /* XXX: idev->cnf.proxy_ndp? */
 470        if (net->ipv6.devconf_all->proxy_ndp &&
 471            pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 472                int proxied = ip6_forward_proxy_check(skb);
 473                if (proxied > 0)
 474                        return ip6_input(skb);
 475                else if (proxied < 0) {
 476                        IP6_INC_STATS(net, ip6_dst_idev(dst),
 477                                      IPSTATS_MIB_INDISCARDS);
 478                        goto drop;
 479                }
 480        }
 481
 482        if (!xfrm6_route_forward(skb)) {
 483                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 484                goto drop;
 485        }
 486        dst = skb->dst;
 487
 488        /* IPv6 specs say nothing about it, but it is clear that we cannot
 489           send redirects to source routed frames.
 490           We don't send redirects to frames decapsulated from IPsec.
 491         */
 492        if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 493            !skb->sp) {
 494                struct in6_addr *target = NULL;
 495                struct rt6_info *rt;
 496                struct neighbour *n = dst->neighbour;
 497
 498                /*
 499                 *        incoming and outgoing devices are the same
 500                 *        send a redirect.
 501                 */
 502
 503                rt = (struct rt6_info *) dst;
 504                if ((rt->rt6i_flags & RTF_GATEWAY))
 505                        target = (struct in6_addr*)&n->primary_key;
 506                else
 507                        target = &hdr->daddr;
 508
 509                /* Limit redirects both by destination (here)
 510                   and by source (inside ndisc_send_redirect)
 511                 */
 512                if (xrlim_allow(dst, 1*HZ))
 513                        ndisc_send_redirect(skb, n, target);
 514        } else {
 515                int addrtype = ipv6_addr_type(&hdr->saddr);
 516
 517                /* This check is security critical. */
 518                if (addrtype == IPV6_ADDR_ANY ||
 519                    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 520                        goto error;
 521                if (addrtype & IPV6_ADDR_LINKLOCAL) {
 522                        icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 523                                ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 524                        goto error;
 525                }
 526        }
 527
 528        if (skb->len > dst_mtu(dst)) {
 529                /* Again, force OUTPUT device used as source address */
 530                skb->dev = dst->dev;
 531                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 532                IP6_INC_STATS_BH(net,
 533                                 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 534                IP6_INC_STATS_BH(net,
 535                                 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 536                kfree_skb(skb);
 537                return -EMSGSIZE;
 538        }
 539
 540        if (skb_cow(skb, dst->dev->hard_header_len)) {
 541                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 542                goto drop;
 543        }
 544
 545        hdr = ipv6_hdr(skb);
 546
 547        /* Mangling hops number delayed to point after skb COW */
 548
 549        hdr->hop_limit--;
 550
 551        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 552        return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 553                       ip6_forward_finish);
 554
 555error:
 556        IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 557drop:
 558        kfree_skb(skb);
 559        return -EINVAL;
 560}
 561
 562static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 563{
 564        to->pkt_type = from->pkt_type;
 565        to->priority = from->priority;
 566        to->protocol = from->protocol;
 567        dst_release(to->dst);
 568        to->dst = dst_clone(from->dst);
 569        to->dev = from->dev;
 570        to->mark = from->mark;
 571
 572#ifdef CONFIG_NET_SCHED
 573        to->tc_index = from->tc_index;
 574#endif
 575        nf_copy(to, from);
 576#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 577    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 578        to->nf_trace = from->nf_trace;
 579#endif
 580        skb_copy_secmark(to, from);
 581}
 582
 583int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 584{
 585        u16 offset = sizeof(struct ipv6hdr);
 586        struct ipv6_opt_hdr *exthdr =
 587                                (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 588        unsigned int packet_len = skb->tail - skb->network_header;
 589        int found_rhdr = 0;
 590        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 591
 592        while (offset + 1 <= packet_len) {
 593
 594                switch (**nexthdr) {
 595
 596                case NEXTHDR_HOP:
 597                        break;
 598                case NEXTHDR_ROUTING:
 599                        found_rhdr = 1;
 600                        break;
 601                case NEXTHDR_DEST:
 602#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 603                        if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 604                                break;
 605#endif
 606                        if (found_rhdr)
 607                                return offset;
 608                        break;
 609                default :
 610                        return offset;
 611                }
 612
 613                offset += ipv6_optlen(exthdr);
 614                *nexthdr = &exthdr->nexthdr;
 615                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 616                                                 offset);
 617        }
 618
 619        return offset;
 620}
 621
 622static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 623{
 624        struct sk_buff *frag;
 625        struct rt6_info *rt = (struct rt6_info*)skb->dst;
 626        struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 627        struct ipv6hdr *tmp_hdr;
 628        struct frag_hdr *fh;
 629        unsigned int mtu, hlen, left, len;
 630        __be32 frag_id = 0;
 631        int ptr, offset = 0, err=0;
 632        u8 *prevhdr, nexthdr = 0;
 633        struct net *net = dev_net(skb->dst->dev);
 634
 635        hlen = ip6_find_1stfragopt(skb, &prevhdr);
 636        nexthdr = *prevhdr;
 637
 638        mtu = ip6_skb_dst_mtu(skb);
 639
 640        /* We must not fragment if the socket is set to force MTU discovery
 641         * or if the skb it not generated by a local socket.  (This last
 642         * check should be redundant, but it's free.)
 643         */
 644        if (!skb->local_df) {
 645                skb->dev = skb->dst->dev;
 646                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 647                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 648                              IPSTATS_MIB_FRAGFAILS);
 649                kfree_skb(skb);
 650                return -EMSGSIZE;
 651        }
 652
 653        if (np && np->frag_size < mtu) {
 654                if (np->frag_size)
 655                        mtu = np->frag_size;
 656        }
 657        mtu -= hlen + sizeof(struct frag_hdr);
 658
 659        if (skb_shinfo(skb)->frag_list) {
 660                int first_len = skb_pagelen(skb);
 661                int truesizes = 0;
 662
 663                if (first_len - hlen > mtu ||
 664                    ((first_len - hlen) & 7) ||
 665                    skb_cloned(skb))
 666                        goto slow_path;
 667
 668                for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 669                        /* Correct geometry. */
 670                        if (frag->len > mtu ||
 671                            ((frag->len & 7) && frag->next) ||
 672                            skb_headroom(frag) < hlen)
 673                            goto slow_path;
 674
 675                        /* Partially cloned skb? */
 676                        if (skb_shared(frag))
 677                                goto slow_path;
 678
 679                        BUG_ON(frag->sk);
 680                        if (skb->sk) {
 681                                sock_hold(skb->sk);
 682                                frag->sk = skb->sk;
 683                                frag->destructor = sock_wfree;
 684                                truesizes += frag->truesize;
 685                        }
 686                }
 687
 688                err = 0;
 689                offset = 0;
 690                frag = skb_shinfo(skb)->frag_list;
 691                skb_shinfo(skb)->frag_list = NULL;
 692                /* BUILD HEADER */
 693
 694                *prevhdr = NEXTHDR_FRAGMENT;
 695                tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 696                if (!tmp_hdr) {
 697                        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 698                                      IPSTATS_MIB_FRAGFAILS);
 699                        return -ENOMEM;
 700                }
 701
 702                __skb_pull(skb, hlen);
 703                fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 704                __skb_push(skb, hlen);
 705                skb_reset_network_header(skb);
 706                memcpy(skb_network_header(skb), tmp_hdr, hlen);
 707
 708                ipv6_select_ident(skb, fh);
 709                fh->nexthdr = nexthdr;
 710                fh->reserved = 0;
 711                fh->frag_off = htons(IP6_MF);
 712                frag_id = fh->identification;
 713
 714                first_len = skb_pagelen(skb);
 715                skb->data_len = first_len - skb_headlen(skb);
 716                skb->truesize -= truesizes;
 717                skb->len = first_len;
 718                ipv6_hdr(skb)->payload_len = htons(first_len -
 719                                                   sizeof(struct ipv6hdr));
 720
 721                dst_hold(&rt->u.dst);
 722
 723                for (;;) {
 724                        /* Prepare header of the next frame,
 725                         * before previous one went down. */
 726                        if (frag) {
 727                                frag->ip_summed = CHECKSUM_NONE;
 728                                skb_reset_transport_header(frag);
 729                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 730                                __skb_push(frag, hlen);
 731                                skb_reset_network_header(frag);
 732                                memcpy(skb_network_header(frag), tmp_hdr,
 733                                       hlen);
 734                                offset += skb->len - hlen - sizeof(struct frag_hdr);
 735                                fh->nexthdr = nexthdr;
 736                                fh->reserved = 0;
 737                                fh->frag_off = htons(offset);
 738                                if (frag->next != NULL)
 739                                        fh->frag_off |= htons(IP6_MF);
 740                                fh->identification = frag_id;
 741                                ipv6_hdr(frag)->payload_len =
 742                                                htons(frag->len -
 743                                                      sizeof(struct ipv6hdr));
 744                                ip6_copy_metadata(frag, skb);
 745                        }
 746
 747                        err = output(skb);
 748                        if(!err)
 749                                IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
 750                                              IPSTATS_MIB_FRAGCREATES);
 751
 752                        if (err || !frag)
 753                                break;
 754
 755                        skb = frag;
 756                        frag = skb->next;
 757                        skb->next = NULL;
 758                }
 759
 760                kfree(tmp_hdr);
 761
 762                if (err == 0) {
 763                        IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
 764                                      IPSTATS_MIB_FRAGOKS);
 765                        dst_release(&rt->u.dst);
 766                        return 0;
 767                }
 768
 769                while (frag) {
 770                        skb = frag->next;
 771                        kfree_skb(frag);
 772                        frag = skb;
 773                }
 774
 775                IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
 776                              IPSTATS_MIB_FRAGFAILS);
 777                dst_release(&rt->u.dst);
 778                return err;
 779        }
 780
 781slow_path:
 782        left = skb->len - hlen;                /* Space per frame */
 783        ptr = hlen;                        /* Where to start from */
 784
 785        /*
 786         *        Fragment the datagram.
 787         */
 788
 789        *prevhdr = NEXTHDR_FRAGMENT;
 790
 791        /*
 792         *        Keep copying data until we run out.
 793         */
 794        while(left > 0)        {
 795                len = left;
 796                /* IF: it doesn't fit, use 'mtu' - the data space left */
 797                if (len > mtu)
 798                        len = mtu;
 799                /* IF: we are not sending upto and including the packet end
 800                   then align the next start on an eight byte boundary */
 801                if (len < left)        {
 802                        len &= ~7;
 803                }
 804                /*
 805                 *        Allocate buffer.
 806                 */
 807
 808                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 809                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 810                        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 811                                      IPSTATS_MIB_FRAGFAILS);
 812                        err = -ENOMEM;
 813                        goto fail;
 814                }
 815
 816                /*
 817                 *        Set up data on packet
 818                 */
 819
 820                ip6_copy_metadata(frag, skb);
 821                skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 822                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 823                skb_reset_network_header(frag);
 824                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 825                frag->transport_header = (frag->network_header + hlen +
 826                                          sizeof(struct frag_hdr));
 827
 828                /*
 829                 *        Charge the memory for the fragment to any owner
 830                 *        it might possess
 831                 */
 832                if (skb->sk)
 833                        skb_set_owner_w(frag, skb->sk);
 834
 835                /*
 836                 *        Copy the packet header into the new buffer.
 837                 */
 838                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 839
 840                /*
 841                 *        Build fragment header.
 842                 */
 843                fh->nexthdr = nexthdr;
 844                fh->reserved = 0;
 845                if (!frag_id) {
 846                        ipv6_select_ident(skb, fh);
 847                        frag_id = fh->identification;
 848                } else
 849                        fh->identification = frag_id;
 850
 851                /*
 852                 *        Copy a block of the IP datagram.
 853                 */
 854                if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 855                        BUG();
 856                left -= len;
 857
 858                fh->frag_off = htons(offset);
 859                if (left > 0)
 860                        fh->frag_off |= htons(IP6_MF);
 861                ipv6_hdr(frag)->payload_len = htons(frag->len -
 862                                                    sizeof(struct ipv6hdr));
 863
 864                ptr += len;
 865                offset += len;
 866
 867                /*
 868                 *        Put this fragment into the sending queue.
 869                 */
 870                err = output(frag);
 871                if (err)
 872                        goto fail;
 873
 874                IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 875                              IPSTATS_MIB_FRAGCREATES);
 876        }
 877        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 878                      IPSTATS_MIB_FRAGOKS);
 879        kfree_skb(skb);
 880        return err;
 881
 882fail:
 883        IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 884                      IPSTATS_MIB_FRAGFAILS);
 885        kfree_skb(skb);
 886        return err;
 887}
 888
 889static inline int ip6_rt_check(struct rt6key *rt_key,
 890                               struct in6_addr *fl_addr,
 891                               struct in6_addr *addr_cache)
 892{
 893        return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 894                (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 895}
 896
 897static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 898                                          struct dst_entry *dst,
 899                                          struct flowi *fl)
 900{
 901        struct ipv6_pinfo *np = inet6_sk(sk);
 902        struct rt6_info *rt = (struct rt6_info *)dst;
 903
 904        if (!dst)
 905                goto out;
 906
 907        /* Yes, checking route validity in not connected
 908         * case is not very simple. Take into account,
 909         * that we do not support routing by source, TOS,
 910         * and MSG_DONTROUTE                 --ANK (980726)
 911         *
 912         * 1. ip6_rt_check(): If route was host route,
 913         *    check that cached destination is current.
 914         *    If it is network route, we still may
 915         *    check its validity using saved pointer
 916         *    to the last used address: daddr_cache.
 917         *    We do not want to save whole address now,
 918         *    (because main consumer of this service
 919         *    is tcp, which has not this problem),
 920         *    so that the last trick works only on connected
 921         *    sockets.
 922         * 2. oif also should be the same.
 923         */
 924        if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 925#ifdef CONFIG_IPV6_SUBTREES
 926            ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 927#endif
 928            (fl->oif && fl->oif != dst->dev->ifindex)) {
 929                dst_release(dst);
 930                dst = NULL;
 931        }
 932
 933out:
 934        return dst;
 935}
 936
 937static int ip6_dst_lookup_tail(struct sock *sk,
 938                               struct dst_entry **dst, struct flowi *fl)
 939{
 940        int err;
 941        struct net *net = sock_net(sk);
 942
 943        if (*dst == NULL)
 944                *dst = ip6_route_output(net, sk, fl);
 945
 946        if ((err = (*dst)->error))
 947                goto out_err_release;
 948
 949        if (ipv6_addr_any(&fl->fl6_src)) {
 950                err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
 951                                         &fl->fl6_dst,
 952                                         sk ? inet6_sk(sk)->srcprefs : 0,
 953                                         &fl->fl6_src);
 954                if (err)
 955                        goto out_err_release;
 956        }
 957
 958#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 959        /*
 960         * Here if the dst entry we've looked up
 961         * has a neighbour entry that is in the INCOMPLETE
 962         * state and the src address from the flow is
 963         * marked as OPTIMISTIC, we release the found
 964         * dst entry and replace it instead with the
 965         * dst entry of the nexthop router
 966         */
 967        if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
 968                struct inet6_ifaddr *ifp;
 969                struct flowi fl_gw;
 970                int redirect;
 971
 972                ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
 973                                      (*dst)->dev, 1);
 974
 975                redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 976                if (ifp)
 977                        in6_ifa_put(ifp);
 978
 979                if (redirect) {
 980                        /*
 981                         * We need to get the dst entry for the
 982                         * default router instead
 983                         */
 984                        dst_release(*dst);
 985                        memcpy(&fl_gw, fl, sizeof(struct flowi));
 986                        memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 987                        *dst = ip6_route_output(net, sk, &fl_gw);
 988                        if ((err = (*dst)->error))
 989                                goto out_err_release;
 990                }
 991        }
 992#endif
 993
 994        return 0;
 995
 996out_err_release:
 997        if (err == -ENETUNREACH)
 998                IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 999        dst_release(*dst);
1000        *dst = NULL;
1001        return err;
1002}
1003
1004/**
1005 *        ip6_dst_lookup - perform route lookup on flow
1006 *        @sk: socket which provides route info
1007 *        @dst: pointer to dst_entry * for result
1008 *        @fl: flow to lookup
1009 *
1010 *        This function performs a route lookup on the given flow.
1011 *
1012 *        It returns zero on success, or a standard errno code on error.
1013 */
1014int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1015{
1016        *dst = NULL;
1017        return ip6_dst_lookup_tail(sk, dst, fl);
1018}
1019EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1020
1021/**
1022 *        ip6_sk_dst_lookup - perform socket cached route lookup on flow
1023 *        @sk: socket which provides the dst cache and route info
1024 *        @dst: pointer to dst_entry * for result
1025 *        @fl: flow to lookup
1026 *
1027 *        This function performs a route lookup on the given flow with the
1028 *        possibility of using the cached route in the socket if it is valid.
1029 *        It will take the socket dst lock when operating on the dst cache.
1030 *        As a result, this function can only be used in process context.
1031 *
1032 *        It returns zero on success, or a standard errno code on error.
1033 */
1034int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1035{
1036        *dst = NULL;
1037        if (sk) {
1038                *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1039                *dst = ip6_sk_dst_check(sk, *dst, fl);
1040        }
1041
1042        return ip6_dst_lookup_tail(sk, dst, fl);
1043}
1044EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1045
1046static inline int ip6_ufo_append_data(struct sock *sk,
1047                        int getfrag(void *from, char *to, int offset, int len,
1048                        int odd, struct sk_buff *skb),
1049                        void *from, int length, int hh_len, int fragheaderlen,
1050                        int transhdrlen, int mtu,unsigned int flags)
1051
1052{
1053        struct sk_buff *skb;
1054        int err;
1055
1056        /* There is support for UDP large send offload by network
1057         * device, so create one single skb packet containing complete
1058         * udp datagram
1059         */
1060        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1061                skb = sock_alloc_send_skb(sk,
1062                        hh_len + fragheaderlen + transhdrlen + 20,
1063                        (flags & MSG_DONTWAIT), &err);
1064                if (skb == NULL)
1065                        return -ENOMEM;
1066
1067                /* reserve space for Hardware header */
1068                skb_reserve(skb, hh_len);
1069
1070                /* create space for UDP/IP header */
1071                skb_put(skb,fragheaderlen + transhdrlen);
1072
1073                /* initialize network header pointer */
1074                skb_reset_network_header(skb);
1075
1076                /* initialize protocol header pointer */
1077                skb->transport_header = skb->network_header + fragheaderlen;
1078
1079                skb->ip_summed = CHECKSUM_PARTIAL;
1080                skb->csum = 0;
1081                sk->sk_sndmsg_off = 0;
1082        }
1083
1084        err = skb_append_datato_frags(sk,skb, getfrag, from,
1085                                      (length - transhdrlen));
1086        if (!err) {
1087                struct frag_hdr fhdr;
1088
1089                /* specify the length of each IP datagram fragment*/
1090                skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1091                                            sizeof(struct frag_hdr);
1092                skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1093                ipv6_select_ident(skb, &fhdr);
1094                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1095                __skb_queue_tail(&sk->sk_write_queue, skb);
1096
1097                return 0;
1098        }
1099        /* There is not enough support do UPD LSO,
1100         * so follow normal path
1101         */
1102        kfree_skb(skb);
1103
1104        return err;
1105}
1106
1107int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1108        int offset, int len, int odd, struct sk_buff *skb),
1109        void *from, int length, int transhdrlen,
1110        int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1111        struct rt6_info *rt, unsigned int flags)
1112{
1113        struct inet_sock *inet = inet_sk(sk);
1114        struct ipv6_pinfo *np = inet6_sk(sk);
1115        struct sk_buff *skb;
1116        unsigned int maxfraglen, fragheaderlen;
1117        int exthdrlen;
1118        int hh_len;
1119        int mtu;
1120        int copy;
1121        int err;
1122        int offset = 0;
1123        int csummode = CHECKSUM_NONE;
1124
1125        if (flags&MSG_PROBE)
1126                return 0;
1127        if (skb_queue_empty(&sk->sk_write_queue)) {
1128                /*
1129                 * setup for corking
1130                 */
1131                if (opt) {
1132                        if (np->cork.opt == NULL) {
1133                                np->cork.opt = kmalloc(opt->tot_len,
1134                                                       sk->sk_allocation);
1135                                if (unlikely(np->cork.opt == NULL))
1136                                        return -ENOBUFS;
1137                        } else if (np->cork.opt->tot_len < opt->tot_len) {
1138                                printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1139                                return -EINVAL;
1140                        }
1141                        memcpy(np->cork.opt, opt, opt->tot_len);
1142                        inet->cork.flags |= IPCORK_OPT;
1143                        /* need source address above miyazawa*/
1144                }
1145                dst_hold(&rt->u.dst);
1146                inet->cork.dst = &rt->u.dst;
1147                inet->cork.fl = *fl;
1148                np->cork.hop_limit = hlimit;
1149                np->cork.tclass = tclass;
1150                mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1151                      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1152                if (np->frag_size < mtu) {
1153                        if (np->frag_size)
1154                                mtu = np->frag_size;
1155                }
1156                inet->cork.fragsize = mtu;
1157                if (dst_allfrag(rt->u.dst.path))
1158                        inet->cork.flags |= IPCORK_ALLFRAG;
1159                inet->cork.length = 0;
1160                sk->sk_sndmsg_page = NULL;
1161                sk->sk_sndmsg_off = 0;
1162                exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1163                            rt->rt6i_nfheader_len;
1164                length += exthdrlen;
1165                transhdrlen += exthdrlen;
1166        } else {
1167                rt = (struct rt6_info *)inet->cork.dst;
1168                fl = &inet->cork.fl;
1169                if (inet->cork.flags & IPCORK_OPT)
1170                        opt = np->cork.opt;
1171                transhdrlen = 0;
1172                exthdrlen = 0;
1173                mtu = inet->cork.fragsize;
1174        }
1175
1176        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1177
1178        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1179                        (opt ? opt->opt_nflen : 0);
1180        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1181
1182        if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1183                if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1184                        ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1185                        return -EMSGSIZE;
1186                }
1187        }
1188
1189        /*
1190         * Let's try using as much space as possible.
1191         * Use MTU if total length of the message fits into the MTU.
1192         * Otherwise, we need to reserve fragment header and
1193         * fragment alignment (= 8-15 octects, in total).
1194         *
1195         * Note that we may need to "move" the data from the tail of
1196         * of the buffer to the new fragment when we split
1197         * the message.
1198         *
1199         * FIXME: It may be fragmented into multiple chunks
1200         *        at once if non-fragmentable extension headers
1201         *        are too large.
1202         * --yoshfuji
1203         */
1204
1205        inet->cork.length += length;
1206        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1207            (rt->u.dst.dev->features & NETIF_F_UFO)) {
1208
1209                err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1210                                          fragheaderlen, transhdrlen, mtu,
1211                                          flags);
1212                if (err)
1213                        goto error;
1214                return 0;
1215        }
1216
1217        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1218                goto alloc_new_skb;
1219
1220        while (length > 0) {
1221                /* Check if the remaining data fits into current packet. */
1222                copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1223                if (copy < length)
1224                        copy = maxfraglen - skb->len;
1225
1226                if (copy <= 0) {
1227                        char *data;
1228                        unsigned int datalen;
1229                        unsigned int fraglen;
1230                        unsigned int fraggap;
1231                        unsigned int alloclen;
1232                        struct sk_buff *skb_prev;
1233alloc_new_skb:
1234                        skb_prev = skb;
1235
1236                        /* There's no room in the current skb */
1237                        if (skb_prev)
1238                                fraggap = skb_prev->len - maxfraglen;
1239                        else
1240                                fraggap = 0;
1241
1242                        /*
1243                         * If remaining data exceeds the mtu,
1244                         * we know we need more fragment(s).
1245                         */
1246                        datalen = length + fraggap;
1247                        if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1248                                datalen = maxfraglen - fragheaderlen;
1249
1250                        fraglen = datalen + fragheaderlen;
1251                        if ((flags & MSG_MORE) &&
1252                            !(rt->u.dst.dev->features&NETIF_F_SG))
1253                                alloclen = mtu;
1254                        else
1255                                alloclen = datalen + fragheaderlen;
1256
1257                        /*
1258                         * The last fragment gets additional space at tail.
1259                         * Note: we overallocate on fragments with MSG_MODE
1260                         * because we have no idea if we're the last one.
1261                         */
1262                        if (datalen == length + fraggap)
1263                                alloclen += rt->u.dst.trailer_len;
1264
1265                        /*
1266                         * We just reserve space for fragment header.
1267                         * Note: this may be overallocation if the message
1268                         * (without MSG_MORE) fits into the MTU.
1269                         */
1270                        alloclen += sizeof(struct frag_hdr);
1271
1272                        if (transhdrlen) {
1273                                skb = sock_alloc_send_skb(sk,
1274                                                alloclen + hh_len,
1275                                                (flags & MSG_DONTWAIT), &err);
1276                        } else {
1277                                skb = NULL;
1278                                if (atomic_read(&sk->sk_wmem_alloc) <=
1279                                    2 * sk->sk_sndbuf)
1280                                        skb = sock_wmalloc(sk,
1281                                                           alloclen + hh_len, 1,
1282                                                           sk->sk_allocation);
1283                                if (unlikely(skb == NULL))
1284                                        err = -ENOBUFS;
1285                        }
1286                        if (skb == NULL)
1287                                goto error;
1288                        /*
1289                         *        Fill in the control structures
1290                         */
1291                        skb->ip_summed = csummode;
1292                        skb->csum = 0;
1293                        /* reserve for fragmentation */
1294                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1295
1296                        /*
1297                         *        Find where to start putting bytes
1298                         */
1299                        data = skb_put(skb, fraglen);
1300                        skb_set_network_header(skb, exthdrlen);
1301                        data += fragheaderlen;
1302                        skb->transport_header = (skb->network_header +
1303                                                 fragheaderlen);
1304                        if (fraggap) {
1305                                skb->csum = skb_copy_and_csum_bits(
1306                                        skb_prev, maxfraglen,
1307                                        data + transhdrlen, fraggap, 0);
1308                                skb_prev->csum = csum_sub(skb_prev->csum,
1309                                                          skb->csum);
1310                                data += fraggap;
1311                                pskb_trim_unique(skb_prev, maxfraglen);
1312                        }
1313                        copy = datalen - transhdrlen - fraggap;
1314                        if (copy < 0) {
1315                                err = -EINVAL;
1316                                kfree_skb(skb);
1317                                goto error;
1318                        } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1319                                err = -EFAULT;
1320                                kfree_skb(skb);
1321                                goto error;
1322                        }
1323
1324                        offset += copy;
1325                        length -= datalen - fraggap;
1326                        transhdrlen = 0;
1327                        exthdrlen = 0;
1328                        csummode = CHECKSUM_NONE;
1329
1330                        /*
1331                         * Put the packet on the pending queue
1332                         */
1333                        __skb_queue_tail(&sk->sk_write_queue, skb);
1334                        continue;
1335                }
1336
1337                if (copy > length)
1338                        copy = length;
1339
1340                if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1341                        unsigned int off;
1342
1343                        off = skb->len;
1344                        if (getfrag(from, skb_put(skb, copy),
1345                                                offset, copy, off, skb) < 0) {
1346                                __skb_trim(skb, off);
1347                                err = -EFAULT;
1348                                goto error;
1349                        }
1350                } else {
1351                        int i = skb_shinfo(skb)->nr_frags;
1352                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1353                        struct page *page = sk->sk_sndmsg_page;
1354                        int off = sk->sk_sndmsg_off;
1355                        unsigned int left;
1356
1357                        if (page && (left = PAGE_SIZE - off) > 0) {
1358                                if (copy >= left)
1359                                        copy = left;
1360                                if (page != frag->page) {
1361                                        if (i == MAX_SKB_FRAGS) {
1362                                                err = -EMSGSIZE;
1363                                                goto error;
1364                                        }
1365                                        get_page(page);
1366                                        skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1367                                        frag = &skb_shinfo(skb)->frags[i];
1368                                }
1369                        } else if(i < MAX_SKB_FRAGS) {
1370                                if (copy > PAGE_SIZE)
1371                                        copy = PAGE_SIZE;
1372                                page = alloc_pages(sk->sk_allocation, 0);
1373                                if (page == NULL) {
1374                                        err = -ENOMEM;
1375                                        goto error;
1376                                }
1377                                sk->sk_sndmsg_page = page;
1378                                sk->sk_sndmsg_off = 0;
1379
1380                                skb_fill_page_desc(skb, i, page, 0, 0);
1381                                frag = &skb_shinfo(skb)->frags[i];
1382                        } else {
1383                                err = -EMSGSIZE;
1384                                goto error;
1385                        }
1386                        if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1387                                err = -EFAULT;
1388                                goto error;
1389                        }
1390                        sk->sk_sndmsg_off += copy;
1391                        frag->size += copy;
1392                        skb->len += copy;
1393                        skb->data_len += copy;
1394                        skb->truesize += copy;
1395                        atomic_add(copy, &sk->sk_wmem_alloc);
1396                }
1397                offset += copy;
1398                length -= copy;
1399        }
1400        return 0;
1401error:
1402        inet->cork.length -= length;
1403        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1404        return err;
1405}
1406
1407static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1408{
1409        inet->cork.flags &= ~IPCORK_OPT;
1410        kfree(np->cork.opt);
1411        np->cork.opt = NULL;
1412        if (inet->cork.dst) {
1413                dst_release(inet->cork.dst);
1414                inet->cork.dst = NULL;
1415                inet->cork.flags &= ~IPCORK_ALLFRAG;
1416        }
1417        memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1418}
1419
1420int ip6_push_pending_frames(struct sock *sk)
1421{
1422        struct sk_buff *skb, *tmp_skb;
1423        struct sk_buff **tail_skb;
1424        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1425        struct inet_sock *inet = inet_sk(sk);
1426        struct ipv6_pinfo *np = inet6_sk(sk);
1427        struct net *net = sock_net(sk);
1428        struct ipv6hdr *hdr;
1429        struct ipv6_txoptions *opt = np->cork.opt;
1430        struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1431        struct flowi *fl = &inet->cork.fl;
1432        unsigned char proto = fl->proto;
1433        int err = 0;
1434
1435        if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1436                goto out;
1437        tail_skb = &(skb_shinfo(skb)->frag_list);
1438
1439        /* move skb->data to ip header from ext header */
1440        if (skb->data < skb_network_header(skb))
1441                __skb_pull(skb, skb_network_offset(skb));
1442        while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1443                __skb_pull(tmp_skb, skb_network_header_len(skb));
1444                *tail_skb = tmp_skb;
1445                tail_skb = &(tmp_skb->next);
1446                skb->len += tmp_skb->len;
1447                skb->data_len += tmp_skb->len;
1448                skb->truesize += tmp_skb->truesize;
1449                __sock_put(tmp_skb->sk);
1450                tmp_skb->destructor = NULL;
1451                tmp_skb->sk = NULL;
1452        }
1453
1454        /* Allow local fragmentation. */
1455        if (np->pmtudisc < IPV6_PMTUDISC_DO)
1456                skb->local_df = 1;
1457
1458        ipv6_addr_copy(final_dst, &fl->fl6_dst);
1459        __skb_pull(skb, skb_network_header_len(skb));
1460        if (opt && opt->opt_flen)
1461                ipv6_push_frag_opts(skb, opt, &proto);
1462        if (opt && opt->opt_nflen)
1463                ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1464
1465        skb_push(skb, sizeof(struct ipv6hdr));
1466        skb_reset_network_header(skb);
1467        hdr = ipv6_hdr(skb);
1468
1469        *(__be32*)hdr = fl->fl6_flowlabel |
1470                     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1471
1472        hdr->hop_limit = np->cork.hop_limit;
1473        hdr->nexthdr = proto;
1474        ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1475        ipv6_addr_copy(&hdr->daddr, final_dst);
1476
1477        skb->priority = sk->sk_priority;
1478        skb->mark = sk->sk_mark;
1479
1480        skb->dst = dst_clone(&rt->u.dst);
1481        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1482        if (proto == IPPROTO_ICMPV6) {
1483                struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1484
1485                ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1486                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1487        }
1488
1489        err = ip6_local_out(skb);
1490        if (err) {
1491                if (err > 0)
1492                        err = np->recverr ? net_xmit_errno(err) : 0;
1493                if (err)
1494                        goto error;
1495        }
1496
1497out:
1498        ip6_cork_release(inet, np);
1499        return err;
1500error:
1501        goto out;
1502}
1503
1504void ip6_flush_pending_frames(struct sock *sk)
1505{
1506        struct sk_buff *skb;
1507
1508        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1509                if (skb->dst)
1510                        IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
1511                                      IPSTATS_MIB_OUTDISCARDS);
1512                kfree_skb(skb);
1513        }
1514
1515        ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1516}