Merge remote-tracking branch 'stable/linux-3.0.y' into android-3.0
authorTodd Poynor <toddpoynor@google.com>
Thu, 1 Nov 2012 20:36:34 +0000 (13:36 -0700)
committerTodd Poynor <toddpoynor@google.com>
Thu, 1 Nov 2012 20:36:34 +0000 (13:36 -0700)
Change-Id: I9685feb9277b450da10d78a455b3c0674d6cfe18
Signed-off-by: Todd Poynor <toddpoynor@google.com>
37 files changed:
1  2 
MAINTAINERS
arch/arm/Kconfig
arch/arm/boot/compressed/head.S
arch/arm/kernel/smp.c
block/genhd.c
drivers/base/power/runtime.c
drivers/mmc/core/sd.c
drivers/mmc/core/sdio.c
drivers/mmc/host/sdhci.c
drivers/net/Kconfig
drivers/net/tun.c
drivers/tty/serial/serial_core.c
drivers/usb/gadget/u_ether.c
drivers/usb/host/ehci-q.c
fs/fuse/dev.c
include/linux/cpu.h
include/linux/mmc/host.h
include/linux/sched.h
kernel/cgroup.c
kernel/fork.c
kernel/futex.c
kernel/power/suspend.c
kernel/sched.c
kernel/time/timekeeping.c
mm/page_alloc.c
mm/shmem.c
net/bluetooth/hci_sock.c
net/bluetooth/l2cap_sock.c
net/bluetooth/rfcomm/sock.c
net/ipv4/tcp.c
net/ipv6/addrconf.c
net/ipv6/route.c
net/wireless/core.c
net/wireless/core.h
net/wireless/reg.c
net/xfrm/xfrm_policy.c
scripts/Kbuild.include

diff --combined MAINTAINERS
index b0209fbe1ab0be5dcb6d5da71aba1bf8ebc6928f,c8c087443817e0b921aec9c231091a15344d0ecf..74c8e4b73ae42a7998d6f835da90a55d3d3f9712
@@@ -5247,7 -5247,7 +5247,7 @@@ F:      Documentation/blockdev/ramdisk.tx
  F:    drivers/block/brd.c
  
  RANDOM NUMBER DRIVER
- M:    Matt Mackall <mpm@selenic.com>
+ M:    Theodore Ts'o" <tytso@mit.edu>
  S:    Maintained
  F:    drivers/char/random.c
  
@@@ -6358,13 -6358,6 +6358,13 @@@ S:    Maintaine
  F:    Documentation/filesystems/ufs.txt
  F:    fs/ufs/
  
 +UHID USERSPACE HID IO DRIVER:
 +M:    David Herrmann <dh.herrmann@googlemail.com>
 +L:    linux-input@vger.kernel.org
 +S:    Maintained
 +F:    drivers/hid/uhid.c
 +F:    include/linux/uhid.h
 +
  ULTRA-WIDEBAND (UWB) SUBSYSTEM:
  L:    linux-usb@vger.kernel.org
  S:    Orphan
diff --combined arch/arm/Kconfig
index d8e2570c79713059fb35efac43dbbe64afcfc311,17d179cb0744f17693c62fa59ee48cbe51014c0b..b9d60775cb8b48b1d43c5c1650e0805c0ef2067d
@@@ -1234,6 -1234,42 +1234,42 @@@ config ARM_ERRATA_75432
          This workaround defines cpu_relax() as smp_mb(), preventing correctly
          written polling loops from denying visibility of updates to memory.
  
+ config ARM_ERRATA_764369
+       bool "ARM errata: Data cache line maintenance operation by MVA may not succeed"
+       depends on CPU_V7 && SMP
+       help
+         This option enables the workaround for erratum 764369
+         affecting Cortex-A9 MPCore with two or more processors (all
+         current revisions). Under certain timing circumstances, a data
+         cache line maintenance operation by MVA targeting an Inner
+         Shareable memory region may fail to proceed up to either the
+         Point of Coherency or to the Point of Unification of the
+         system. This workaround adds a DSB instruction before the
+         relevant cache maintenance functions and sets a specific bit
+         in the diagnostic control register of the SCU.
+ config PL310_ERRATA_769419
+       bool "PL310 errata: no automatic Store Buffer drain"
+       depends on CACHE_L2X0
+       help
+         On revisions of the PL310 prior to r3p2, the Store Buffer does
+         not automatically drain. This can cause normal, non-cacheable
+         writes to be retained when the memory system is idle, leading
+         to suboptimal I/O performance for drivers using coherent DMA.
+         This option adds a write barrier to the cpu_idle loop so that,
+         on systems with an outer cache, the store buffer is drained
+         explicitly.
+ config ARM_ERRATA_775420
+        bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
+        depends on CPU_V7
+        help
+        This option enables the workaround for the 775420 Cortex-A9 (r2p2,
+        r2p6,r2p8,r2p10,r3p0) erratum. In case a date cache maintenance
+        operation aborts with MMU exception, it might cause the processor
+        to deadlock. This workaround puts DSB before executing ISB if
+        an abort may occur on cache maintenance.
  endmenu
  
  source "arch/arm/common/Kconfig"
@@@ -1298,32 -1334,6 +1334,6 @@@ source "drivers/pci/Kconfig
  
  source "drivers/pcmcia/Kconfig"
  
- config ARM_ERRATA_764369
-       bool "ARM errata: Data cache line maintenance operation by MVA may not succeed"
-       depends on CPU_V7 && SMP
-       help
-         This option enables the workaround for erratum 764369
-         affecting Cortex-A9 MPCore with two or more processors (all
-         current revisions). Under certain timing circumstances, a data
-         cache line maintenance operation by MVA targeting an Inner
-         Shareable memory region may fail to proceed up to either the
-         Point of Coherency or to the Point of Unification of the
-         system. This workaround adds a DSB instruction before the
-         relevant cache maintenance functions and sets a specific bit
-         in the diagnostic control register of the SCU.
- config PL310_ERRATA_769419
-       bool "PL310 errata: no automatic Store Buffer drain"
-       depends on CACHE_L2X0
-       help
-         On revisions of the PL310 prior to r3p2, the Store Buffer does
-         not automatically drain. This can cause normal, non-cacheable
-         writes to be retained when the memory system is idle, leading
-         to suboptimal I/O performance for drivers using coherent DMA.
-         This option adds a write barrier to the cpu_idle loop so that,
-         on systems with an outer cache, the store buffer is drained
-         explicitly.
  endmenu
  
  menu "Kernel Features"
@@@ -1697,15 -1707,6 +1707,15 @@@ config DEPRECATED_PARAM_STRUC
          This was deprecated in 2001 and announced to live on for 5 years.
          Some old boot loaders still use this way.
  
 +config ARM_FLUSH_CONSOLE_ON_RESTART
 +      bool "Force flush the console on restart"
 +      help
 +        If the console is locked while the system is rebooted, the messages
 +        in the temporary logbuffer would not have propogated to all the
 +        console drivers. This option forces the console lock to be
 +        released if it failed to be acquired, which will cause all the
 +        pending messages to be flushed.
 +
  endmenu
  
  menu "Boot options"
@@@ -1884,6 -1885,7 +1894,7 @@@ source "drivers/cpufreq/Kconfig
  config CPU_FREQ_IMX
        tristate "CPUfreq driver for i.MX CPUs"
        depends on ARCH_MXC && CPU_FREQ
+       select CPU_FREQ_TABLE
        help
          This enables the CPUfreq driver for i.MX CPUs.
  
index e58603b00060032c625bcc4ce11613ca96344732,4d1f07d5fa941aafde7d45d9fee17e3c0791f0b7..caddb9d35b7353e8d06ab09e12f126759221cf22
@@@ -539,6 -539,7 +539,7 @@@ __armv7_mmu_cache_on
                mcrne   p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
  #endif
                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
+               bic     r0, r0, #1 << 28        @ clear SCTLR.TRE
                orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
                orr     r0, r0, #0x003c         @ write buffer
  #ifdef CONFIG_MMU
@@@ -656,8 -657,6 +657,8 @@@ proc_types
  @             b       __arm6_mmu_cache_off
  @             b       __armv3_mmu_cache_flush
  
 +#if !defined(CONFIG_CPU_V7)
 +              /* This collides with some V7 IDs, preventing correct detection */
                .word   0x00000000              @ old ARM ID
                .word   0x0000f000
                mov     pc, lr
   THUMB(               nop                             )
                mov     pc, lr
   THUMB(               nop                             )
 +#endif
  
                .word   0x41007000              @ ARM7/710
                .word   0xfff8fe00
diff --combined arch/arm/kernel/smp.c
index 9739bb8a2d2863bc09c715c1b9ce6316a7780540,511eb0397c1ac31bf12a81ffd0df68c816c9f7f2..e895f97ab008844fb97192f67b33d4ea692b03ea
@@@ -53,7 -53,6 +53,7 @@@ enum ipi_msg_type 
        IPI_CALL_FUNC,
        IPI_CALL_FUNC_SINGLE,
        IPI_CPU_STOP,
 +      IPI_CPU_BACKTRACE,
  };
  
  int __cpuinit __cpu_up(unsigned int cpu)
@@@ -278,20 -277,26 +278,26 @@@ static void __cpuinit smp_store_cpu_inf
  asmlinkage void __cpuinit secondary_start_kernel(void)
  {
        struct mm_struct *mm = &init_mm;
-       unsigned int cpu = smp_processor_id();
+       unsigned int cpu;
  
-       printk("CPU%u: Booted secondary processor\n", cpu);
+       /*
+        * The identity mapping is uncached (strongly ordered), so
+        * switch away from it before attempting any exclusive accesses.
+        */
+       cpu_switch_mm(mm->pgd, mm);
+       enter_lazy_tlb(mm, current);
+       local_flush_tlb_all();
  
        /*
         * All kernel threads share the same mm context; grab a
         * reference and switch to it.
         */
+       cpu = smp_processor_id();
        atomic_inc(&mm->mm_count);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
-       cpu_switch_mm(mm->pgd, mm);
-       enter_lazy_tlb(mm, current);
-       local_flush_tlb_all();
+       printk("CPU%u: Booted secondary processor\n", cpu);
  
        cpu_init();
        preempt_disable();
         */
        platform_secondary_init(cpu);
  
 -      /*
 -       * Enable local interrupts.
 -       */
        notify_cpu_starting(cpu);
 -      local_irq_enable();
 -      local_fiq_enable();
 -
 -      /*
 -       * Setup the percpu timer for this CPU.
 -       */
 -      percpu_timer_setup();
  
        calibrate_delay();
  
         * before we continue.
         */
        set_cpu_online(cpu, true);
 +
 +      /*
 +       * Setup the percpu timer for this CPU.
 +       */
 +      percpu_timer_setup();
 +
        while (!cpu_active(cpu))
                cpu_relax();
  
 +      /*
 +       * cpu_active bit is set, so it's safe to enable interrupts
 +       * now.
 +       */
 +      local_irq_enable();
 +      local_fiq_enable();
 +
        /*
         * OK, it's off to the idle thread for us
         */
@@@ -409,7 -411,6 +415,7 @@@ static const char *ipi_types[NR_IPI] = 
        S(IPI_CALL_FUNC, "Function call interrupts"),
        S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
        S(IPI_CPU_STOP, "CPU stop interrupts"),
 +      S(IPI_CPU_BACKTRACE, "CPU backtrace"),
  };
  
  void show_ipi_list(struct seq_file *p, int prec)
@@@ -450,9 -451,7 +456,7 @@@ static DEFINE_PER_CPU(struct clock_even
  static void ipi_timer(void)
  {
        struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent);
-       irq_enter();
        evt->event_handler(evt);
-       irq_exit();
  }
  
  #ifdef CONFIG_LOCAL_TIMERS
@@@ -463,7 -462,9 +467,9 @@@ asmlinkage void __exception_irq_entry d
  
        if (local_timer_ack()) {
                __inc_irq_stat(cpu, local_timer_irqs);
+               irq_enter();
                ipi_timer();
+               irq_exit();
        }
  
        set_irq_regs(old_regs);
@@@ -560,58 -561,6 +566,58 @@@ static void ipi_cpu_stop(unsigned int c
                cpu_relax();
  }
  
 +static cpumask_t backtrace_mask;
 +static DEFINE_RAW_SPINLOCK(backtrace_lock);
 +
 +/* "in progress" flag of arch_trigger_all_cpu_backtrace */
 +static unsigned long backtrace_flag;
 +
 +void smp_send_all_cpu_backtrace(void)
 +{
 +      unsigned int this_cpu = smp_processor_id();
 +      int i;
 +
 +      if (test_and_set_bit(0, &backtrace_flag))
 +              /*
 +               * If there is already a trigger_all_cpu_backtrace() in progress
 +               * (backtrace_flag == 1), don't output double cpu dump infos.
 +               */
 +              return;
 +
 +      cpumask_copy(&backtrace_mask, cpu_online_mask);
 +      cpu_clear(this_cpu, backtrace_mask);
 +
 +      pr_info("Backtrace for cpu %d (current):\n", this_cpu);
 +      dump_stack();
 +
 +      pr_info("\nsending IPI to all other CPUs:\n");
 +      smp_cross_call(&backtrace_mask, IPI_CPU_BACKTRACE);
 +
 +      /* Wait for up to 10 seconds for all other CPUs to do the backtrace */
 +      for (i = 0; i < 10 * 1000; i++) {
 +              if (cpumask_empty(&backtrace_mask))
 +                      break;
 +              mdelay(1);
 +      }
 +
 +      clear_bit(0, &backtrace_flag);
 +      smp_mb__after_clear_bit();
 +}
 +
 +/*
 + * ipi_cpu_backtrace - handle IPI from smp_send_all_cpu_backtrace()
 + */
 +static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs)
 +{
 +      if (cpu_isset(cpu, backtrace_mask)) {
 +              raw_spin_lock(&backtrace_lock);
 +              pr_warning("IPI backtrace for cpu %d\n", cpu);
 +              show_regs(regs);
 +              raw_spin_unlock(&backtrace_lock);
 +              cpu_clear(cpu, backtrace_mask);
 +      }
 +}
 +
  /*
   * Main handler for inter-processor interrupts
   */
@@@ -625,7 -574,9 +631,9 @@@ asmlinkage void __exception_irq_entry d
  
        switch (ipinr) {
        case IPI_TIMER:
+               irq_enter();
                ipi_timer();
+               irq_exit();
                break;
  
        case IPI_RESCHEDULE:
                break;
  
        case IPI_CALL_FUNC:
+               irq_enter();
                generic_smp_call_function_interrupt();
+               irq_exit();
                break;
  
        case IPI_CALL_FUNC_SINGLE:
+               irq_enter();
                generic_smp_call_function_single_interrupt();
+               irq_exit();
                break;
  
        case IPI_CPU_STOP:
+               irq_enter();
                ipi_cpu_stop(cpu);
+               irq_exit();
                break;
  
 +      case IPI_CPU_BACKTRACE:
 +              ipi_cpu_backtrace(cpu, regs);
 +              break;
 +
        default:
                printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
                       cpu, ipinr);
diff --combined block/genhd.c
index 026d0700adc5e4443f5382aa2ddccadbead31573,d7f7d4e3da372a80d1c13b453b517276eda367f1..a1b0b9012cd457acbc78dc67b7f9a1602102096c
@@@ -744,7 -744,7 +744,7 @@@ void __init printk_all_partitions(void
                struct hd_struct *part;
                char name_buf[BDEVNAME_SIZE];
                char devt_buf[BDEVT_SIZE];
-               u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1];
+               char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
  
                /*
                 * Don't show empty devices or things that have been
                while ((part = disk_part_iter_next(&piter))) {
                        bool is_part0 = part == &disk->part0;
  
-                       uuid[0] = 0;
+                       uuid_buf[0] = '\0';
                        if (part->info)
-                               part_unpack_uuid(part->info->uuid, uuid);
+                               snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
+                                        part->info->uuid);
  
                        printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
                               bdevt_str(part_devt(part), devt_buf),
                               (unsigned long long)part->nr_sects >> 1,
-                              disk_name(disk, part->partno, name_buf), uuid);
+                              disk_name(disk, part->partno, name_buf),
+                              uuid_buf);
                        if (is_part0) {
                                if (disk->driverfs_dev != NULL &&
                                    disk->driverfs_dev->driver != NULL)
@@@ -1116,22 -1118,6 +1118,22 @@@ static void disk_release(struct device 
                blk_put_queue(disk->queue);
        kfree(disk);
  }
 +
 +static int disk_uevent(struct device *dev, struct kobj_uevent_env *env)
 +{
 +      struct gendisk *disk = dev_to_disk(dev);
 +      struct disk_part_iter piter;
 +      struct hd_struct *part;
 +      int cnt = 0;
 +
 +      disk_part_iter_init(&piter, disk, 0);
 +      while((part = disk_part_iter_next(&piter)))
 +              cnt++;
 +      disk_part_iter_exit(&piter);
 +      add_uevent_var(env, "NPARTS=%u", cnt);
 +      return 0;
 +}
 +
  struct class block_class = {
        .name           = "block",
  };
@@@ -1150,7 -1136,6 +1152,7 @@@ static struct device_type disk_type = 
        .groups         = disk_attr_groups,
        .release        = disk_release,
        .devnode        = block_devnode,
 +      .uevent         = disk_uevent,
  };
  
  #ifdef CONFIG_PROC_FS
index 184cf54fa01cfbde621a31047866e8c0f1246ade,13f7db6bcde7372cf37ef88da6d81df1e1b261f8..da39fa50e5630f618fb55268fdc2753c81391a42
@@@ -360,7 -360,6 +360,6 @@@ static int rpm_suspend(struct device *d
                goto repeat;
        }
  
-       dev->power.deferred_resume = false;
        if (dev->power.no_callbacks)
                goto no_callback;       /* Assume success. */
  
        wake_up_all(&dev->power.wait_queue);
  
        if (dev->power.deferred_resume) {
+               dev->power.deferred_resume = false;
                rpm_resume(dev, 0);
                retval = -EAGAIN;
                goto out;
@@@ -533,6 -533,7 +533,7 @@@ static int rpm_resume(struct device *de
                    || dev->parent->power.runtime_status == RPM_ACTIVE) {
                        atomic_inc(&dev->parent->power.child_count);
                        spin_unlock(&dev->parent->power.lock);
+                       retval = 1;
                        goto no_callback;       /* Assume success. */
                }
                spin_unlock(&dev->parent->power.lock);
        }
        wake_up_all(&dev->power.wait_queue);
  
-       if (!retval)
+       if (retval >= 0)
                rpm_idle(dev, RPM_ASYNC);
  
   out:
@@@ -746,8 -747,6 +747,8 @@@ int __pm_runtime_idle(struct device *de
        unsigned long flags;
        int retval;
  
 +      might_sleep_if(!(rpmflags & RPM_ASYNC));
 +
        if (rpmflags & RPM_GET_PUT) {
                if (!atomic_dec_and_test(&dev->power.usage_count))
                        return 0;
@@@ -777,8 -776,6 +778,8 @@@ int __pm_runtime_suspend(struct device 
        unsigned long flags;
        int retval;
  
 +      might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
 +
        if (rpmflags & RPM_GET_PUT) {
                if (!atomic_dec_and_test(&dev->power.usage_count))
                        return 0;
@@@ -807,8 -804,6 +808,8 @@@ int __pm_runtime_resume(struct device *
        unsigned long flags;
        int retval;
  
 +      might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
 +
        if (rpmflags & RPM_GET_PUT)
                atomic_inc(&dev->power.usage_count);
  
@@@ -998,7 -993,6 +999,7 @@@ EXPORT_SYMBOL_GPL(pm_runtime_barrier)
   */
  void __pm_runtime_disable(struct device *dev, bool check_resume)
  {
 +      might_sleep();
        spin_lock_irq(&dev->power.lock);
  
        if (dev->power.disable_depth > 0) {
@@@ -1205,8 -1199,6 +1206,8 @@@ void __pm_runtime_use_autosuspend(struc
  {
        int old_delay, old_use;
  
 +      might_sleep();
 +
        spin_lock_irq(&dev->power.lock);
        old_delay = dev->power.autosuspend_delay;
        old_use = dev->power.use_autosuspend;
diff --combined drivers/mmc/core/sd.c
index 0635da51a970a0589ad3f662d1edee86ec32c595,08b59b855b05a741e99dabd8df295888d3cd2e0e..e6629b986f059c4c92c8c743657bcf1857effd8d
@@@ -306,6 -306,9 +306,9 @@@ static int mmc_read_switch(struct mmc_c
                goto out;
        }
  
+       if (status[13] & UHS_SDR50_BUS_SPEED)
+               card->sw_caps.hs_max_dtr = 50000000;
        if (card->scr.sda_spec3) {
                card->sw_caps.sd3_bus_mode = status[13];
  
                }
  
                card->sw_caps.sd3_curr_limit = status[7];
-       } else {
-               if (status[13] & 0x02)
-                       card->sw_caps.hs_max_dtr = 50000000;
        }
  
  out:
@@@ -764,9 -764,6 +764,9 @@@ int mmc_sd_setup_card(struct mmc_host *
        bool reinit)
  {
        int err;
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +      int retries;
 +#endif
  
        if (!reinit) {
                /*
                /*
                 * Fetch switch information from card.
                 */
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +              for (retries = 1; retries <= 3; retries++) {
 +                      err = mmc_read_switch(card);
 +                      if (!err) {
 +                              if (retries > 1) {
 +                                      printk(KERN_WARNING
 +                                             "%s: recovered\n", 
 +                                             mmc_hostname(host));
 +                              }
 +                              break;
 +                      } else {
 +                              printk(KERN_WARNING
 +                                     "%s: read switch failed (attempt %d)\n",
 +                                     mmc_hostname(host), retries);
 +                      }
 +              }
 +#else
                err = mmc_read_switch(card);
 +#endif
 +
                if (err)
                        return err;
        }
@@@ -1011,36 -989,18 +1011,36 @@@ static void mmc_sd_remove(struct mmc_ho
   */
  static void mmc_sd_detect(struct mmc_host *host)
  {
 -      int err;
 +      int err = 0;
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +        int retries = 5;
 +#endif
  
        BUG_ON(!host);
        BUG_ON(!host->card);
 -
 +       
        mmc_claim_host(host);
  
        /*
         * Just check if our card has been removed.
         */
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +      while(retries) {
 +              err = mmc_send_status(host->card, NULL);
 +              if (err) {
 +                      retries--;
 +                      udelay(5);
 +                      continue;
 +              }
 +              break;
 +      }
 +      if (!retries) {
 +              printk(KERN_ERR "%s(%s): Unable to re-detect card (%d)\n",
 +                     __func__, mmc_hostname(host), err);
 +      }
 +#else
        err = mmc_send_status(host->card, NULL);
 -
 +#endif
        mmc_release_host(host);
  
        if (err) {
@@@ -1079,31 -1039,12 +1079,31 @@@ static int mmc_sd_suspend(struct mmc_ho
  static int mmc_sd_resume(struct mmc_host *host)
  {
        int err;
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +      int retries;
 +#endif
  
        BUG_ON(!host);
        BUG_ON(!host->card);
  
        mmc_claim_host(host);
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +      retries = 5;
 +      while (retries) {
 +              err = mmc_sd_init_card(host, host->ocr, host->card);
 +
 +              if (err) {
 +                      printk(KERN_ERR "%s: Re-init card rc = %d (retries = %d)\n",
 +                             mmc_hostname(host), err, retries);
 +                      mdelay(5);
 +                      retries--;
 +                      continue;
 +              }
 +              break;
 +      }
 +#else
        err = mmc_sd_init_card(host, host->ocr, host->card);
 +#endif
        mmc_release_host(host);
  
        return err;
@@@ -1155,9 -1096,6 +1155,9 @@@ int mmc_attach_sd(struct mmc_host *host
  {
        int err;
        u32 ocr;
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +      int retries;
 +#endif
  
        BUG_ON(!host);
        WARN_ON(!host->claimed);
        /*
         * Detect and init the card.
         */
 +#ifdef CONFIG_MMC_PARANOID_SD_INIT
 +      retries = 5;
 +      while (retries) {
 +              err = mmc_sd_init_card(host, host->ocr, NULL);
 +              if (err) {
 +                      retries--;
 +                      continue;
 +              }
 +              break;
 +      }
 +
 +      if (!retries) {
 +              printk(KERN_ERR "%s: mmc_sd_init_card() failure (err = %d)\n",
 +                     mmc_hostname(host), err);
 +              goto err;
 +      }
 +#else
        err = mmc_sd_init_card(host, host->ocr, NULL);
        if (err)
                goto err;
 +#endif
  
        mmc_release_host(host);
        err = mmc_add_card(host->card);
diff --combined drivers/mmc/core/sdio.c
index a2c1c4d83718a2b931f2eff2cef98be5ffe481c9,9b18b5416599dc81b5e9a711a6a6050155058a63..5d1719932664bca3f1b9144e52e17c47bcfaf1ab
  #include "sdio_ops.h"
  #include "sdio_cis.h"
  
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +#include <linux/mmc/sdio_ids.h>
 +#endif
 +
  static int sdio_read_fbr(struct sdio_func *func)
  {
        int ret;
@@@ -453,35 -449,19 +453,35 @@@ static int mmc_sdio_init_card(struct mm
                goto finish;
        }
  
 -      /*
 -       * Read the common registers.
 -       */
 -      err = sdio_read_cccr(card);
 -      if (err)
 -              goto remove;
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +      if (host->embedded_sdio_data.cccr)
 +              memcpy(&card->cccr, host->embedded_sdio_data.cccr, sizeof(struct sdio_cccr));
 +      else {
 +#endif
 +              /*
 +               * Read the common registers.
 +               */
 +              err = sdio_read_cccr(card);
 +              if (err)
 +                      goto remove;
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +      }
 +#endif
  
 -      /*
 -       * Read the common CIS tuples.
 -       */
 -      err = sdio_read_common_cis(card);
 -      if (err)
 -              goto remove;
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +      if (host->embedded_sdio_data.cis)
 +              memcpy(&card->cis, host->embedded_sdio_data.cis, sizeof(struct sdio_cis));
 +      else {
 +#endif
 +              /*
 +               * Read the common CIS tuples.
 +               */
 +              err = sdio_read_common_cis(card);
 +              if (err)
 +                      goto remove;
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +      }
 +#endif
  
        if (oldcard) {
                int same = (card->cis.vendor == oldcard->cis.vendor &&
@@@ -685,7 -665,7 +685,7 @@@ static int mmc_sdio_resume(struct mmc_h
        }
  
        if (!err && host->sdio_irqs)
-               mmc_signal_sdio_irq(host);
+               wake_up_process(host->sdio_irq_thread);
        mmc_release_host(host);
  
        /*
@@@ -847,36 -827,14 +847,36 @@@ int mmc_attach_sdio(struct mmc_host *ho
        funcs = (ocr & 0x70000000) >> 28;
        card->sdio_funcs = 0;
  
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +      if (host->embedded_sdio_data.funcs)
 +              card->sdio_funcs = funcs = host->embedded_sdio_data.num_funcs;
 +#endif
 +
        /*
         * Initialize (but don't add) all present functions.
         */
        for (i = 0; i < funcs; i++, card->sdio_funcs++) {
 -              err = sdio_init_func(host->card, i + 1);
 -              if (err)
 -                      goto remove;
 -
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +              if (host->embedded_sdio_data.funcs) {
 +                      struct sdio_func *tmp;
 +
 +                      tmp = sdio_alloc_func(host->card);
 +                      if (IS_ERR(tmp))
 +                              goto remove;
 +                      tmp->num = (i + 1);
 +                      card->sdio_func[i] = tmp;
 +                      tmp->class = host->embedded_sdio_data.funcs[i].f_class;
 +                      tmp->max_blksize = host->embedded_sdio_data.funcs[i].f_maxblksize;
 +                      tmp->vendor = card->cis.vendor;
 +                      tmp->device = card->cis.device;
 +              } else {
 +#endif
 +                      err = sdio_init_func(host->card, i + 1);
 +                      if (err)
 +                              goto remove;
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +              }
 +#endif
                /*
                 * Enable Runtime PM for this func (if supported)
                 */
@@@ -924,77 -882,3 +924,77 @@@ err
        return err;
  }
  
 +int sdio_reset_comm(struct mmc_card *card)
 +{
 +      struct mmc_host *host = card->host;
 +      u32 ocr;
 +      int err;
 +
 +      printk("%s():\n", __func__);
 +      mmc_claim_host(host);
 +
 +      mmc_go_idle(host);
 +
 +      mmc_set_clock(host, host->f_min);
 +
 +      err = mmc_send_io_op_cond(host, 0, &ocr);
 +      if (err)
 +              goto err;
 +
 +      host->ocr = mmc_select_voltage(host, ocr);
 +      if (!host->ocr) {
 +              err = -EINVAL;
 +              goto err;
 +      }
 +
 +      err = mmc_send_io_op_cond(host, host->ocr, &ocr);
 +      if (err)
 +              goto err;
 +
 +      if (mmc_host_is_spi(host)) {
 +              err = mmc_spi_set_crc(host, use_spi_crc);
 +              if (err)
 +                      goto err;
 +      }
 +
 +      if (!mmc_host_is_spi(host)) {
 +              err = mmc_send_relative_addr(host, &card->rca);
 +              if (err)
 +                      goto err;
 +              mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
 +      }
 +      if (!mmc_host_is_spi(host)) {
 +              err = mmc_select_card(card);
 +              if (err)
 +                      goto err;
 +      }
 +
 +      /*
 +       * Switch to high-speed (if supported).
 +       */
 +      err = sdio_enable_hs(card);
 +      if (err > 0)
 +              mmc_sd_go_highspeed(card);
 +      else if (err)
 +              goto err;
 +
 +      /*
 +       * Change to the card's maximum speed.
 +       */
 +      mmc_set_clock(host, mmc_sdio_get_max_clock(card));
 +
 +      err = sdio_enable_4bit_bus(card);
 +      if (err > 0)
 +              mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
 +      else if (err)
 +              goto err;
 +
 +      mmc_release_host(host);
 +      return 0;
 +err:
 +      printk("%s: Error resetting SDIO communications (%d)\n",
 +             mmc_hostname(host), err);
 +      mmc_release_host(host);
 +      return err;
 +}
 +EXPORT_SYMBOL(sdio_reset_comm);
diff --combined drivers/mmc/host/sdhci.c
index d517a216eaa95bb83442db98bc37306035afe71e,8bcd5e98d994ee1c8997ac7c8a1ca5beb70cee1c..6103edad46f00f2f2828e10e85e80876cead505e
@@@ -1044,7 -1044,7 +1044,7 @@@ static void sdhci_set_clock(struct sdhc
        u16 clk = 0;
        unsigned long timeout;
  
 -      if (clock == host->clock)
 +      if (clock && clock == host->clock)
                return;
  
        if (host->ops->set_clock) {
@@@ -2515,8 -2515,9 +2515,9 @@@ int sdhci_add_host(struct sdhci_host *h
            mmc_card_is_removable(mmc))
                mmc->caps |= MMC_CAP_NEEDS_POLL;
  
-       /* UHS-I mode(s) supported by the host controller. */
-       if (host->version >= SDHCI_SPEC_300)
+       /* Any UHS-I mode in caps implies SDR12 and SDR25 support. */
+       if (caps[1] & (SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 |
+                      SDHCI_SUPPORT_DDR50))
                mmc->caps |= MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25;
  
        /* SDR104 supports also implies SDR50 support */
diff --combined drivers/net/Kconfig
index 906ef8fa00603ede3fee9134358626fd2988beb5,0c3f234afc067c46a7505e1ab4ec077fd9a67ced..5a92c48ffe5927a45863c6604065a8f0b4476183
@@@ -2543,7 -2543,7 +2543,7 @@@ config S6GMA
  source "drivers/net/stmmac/Kconfig"
  
  config PCH_GBE
-       tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE"
+       tristate "Intel EG20T PCH/OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
        depends on PCI
        select MII
        ---help---
          This driver enables Gigabit Ethernet function.
  
          This driver also can be used for OKI SEMICONDUCTOR IOH(Input/
-         Output Hub), ML7223.
-         ML7223 IOH is for MP(Media Phone) use.
-         ML7223 is companion chip for Intel Atom E6xx series.
-         ML7223 is completely compatible for Intel EG20T PCH.
+         Output Hub), ML7223/ML7831.
+         ML7223 IOH is for MP(Media Phone) use. ML7831 IOH is for general
+         purpose use.
+         ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+         ML7223/ML7831 is completely compatible for Intel EG20T PCH.
  
  endif # NETDEV_1000
  
@@@ -3323,23 -3324,6 +3324,23 @@@ config PPPOL2T
          used by ISPs and enterprises to tunnel PPP traffic over UDP
          tunnels. L2TP is replacing PPTP for VPN uses.
  
 +config PPPOLAC
 +      tristate "PPP on L2TP Access Concentrator"
 +      depends on PPP && INET
 +      help
 +        L2TP (RFC 2661) is a tunneling protocol widely used in virtual private
 +        networks. This driver handles L2TP data packets between a UDP socket
 +        and a PPP channel, but only permits one session per socket. Thus it is
 +        fairly simple and suited for clients.
 +
 +config PPPOPNS
 +      tristate "PPP on PPTP Network Server"
 +      depends on PPP && INET
 +      help
 +        PPTP (RFC 2637) is a tunneling protocol widely used in virtual private
 +        networks. This driver handles PPTP data packets between a RAW socket
 +        and a PPP channel. It is fairly simple and easy to use.
 +
  config SLIP
        tristate "SLIP (serial line) support"
        ---help---
diff --combined drivers/net/tun.c
index 76b865065679c756003bdbc3dbb7d491d0b4fd97,a631bf71fee576d9e4be5de023f50a0f0a774471..3cc22b9eda534fc5f4d053601a60b3dfbda06f0c
@@@ -1239,16 -1239,12 +1239,18 @@@ static long __tun_chr_ioctl(struct fil
        int vnet_hdr_sz;
        int ret;
  
-       if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
 +#ifdef CONFIG_ANDROID_PARANOID_NETWORK
 +      if (cmd != TUNGETIFF && !capable(CAP_NET_ADMIN)) {
 +              return -EPERM;
 +      }
 +#endif
 +
+       if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
                if (copy_from_user(&ifr, argp, ifreq_len))
                        return -EFAULT;
+       } else {
+               memset(&ifr, 0, sizeof(ifr));
+       }
        if (cmd == TUNGETFEATURES) {
                /* Currently this just means: "what IFF flags are valid?".
                 * This is needed because we never checked for invalid flags on
index 5668b3eb96d8f4ed05d74f5134564f54654136b8,de5e33fc129021be7bb7ff2ed8bde4b0f4de5552..2a106a94cdd009246462c4f254311dd05a38bdaf
@@@ -91,9 -91,6 +91,9 @@@ static void __uart_start(struct tty_str
        struct uart_state *state = tty->driver_data;
        struct uart_port *port = state->uart_port;
  
 +      if (port->ops->wake_peer)
 +              port->ops->wake_peer(port);
 +
        if (!uart_circ_empty(&state->xmit) && state->xmit.buf &&
            !tty->stopped && !tty->hw_stopped)
                port->ops->start_tx(port);
@@@ -2328,6 -2325,7 +2328,7 @@@ void uart_unregister_driver(struct uart
        tty_unregister_driver(p);
        put_tty_driver(p);
        kfree(drv->state);
+       drv->state = NULL;
        drv->tty_driver = NULL;
  }
  
index b5a30fee014b1aeba278485b4d560e5ba81d3d58,a52404a1aef5df2a2fbfddbbd2bd39fb90638ad7..51d572eae9ad8c09c50cb826a22b7a865f1a4e4d
@@@ -764,26 -764,6 +764,26 @@@ static struct device_type gadget_type 
   * Returns negative errno, or zero on success
   */
  int gether_setup(struct usb_gadget *g, u8 ethaddr[ETH_ALEN])
 +{
 +      return gether_setup_name(g, ethaddr, "usb");
 +}
 +
 +/**
 + * gether_setup_name - initialize one ethernet-over-usb link
 + * @g: gadget to associated with these links
 + * @ethaddr: NULL, or a buffer in which the ethernet address of the
 + *    host side of the link is recorded
 + * @netname: name for network device (for example, "usb")
 + * Context: may sleep
 + *
 + * This sets up the single network link that may be exported by a
 + * gadget driver using this framework.  The link layer addresses are
 + * set up using module parameters.
 + *
 + * Returns negative errno, or zero on success
 + */
 +int gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN],
 +              const char *netname)
  {
        struct eth_dev          *dev;
        struct net_device       *net;
  
        /* network device setup */
        dev->net = net;
 -      strcpy(net->name, "usb%d");
 +      snprintf(net->name, sizeof(net->name), "%s%%d", netname);
  
        if (get_ether_addr(dev_addr, net->dev_addr))
                dev_warn(&g->dev,
  
        SET_ETHTOOL_OPS(net, &ops);
  
-       /* two kinds of host-initiated state changes:
-        *  - iff DATA transfer is active, carrier is "on"
-        *  - tx queueing enabled if open *and* carrier is "on"
-        */
-       netif_carrier_off(net);
        dev->gadget = g;
        SET_NETDEV_DEV(net, &g->dev);
        SET_NETDEV_DEVTYPE(net, &gadget_type);
                INFO(dev, "HOST MAC %pM\n", dev->host_mac);
  
                the_dev = dev;
+               /* two kinds of host-initiated state changes:
+                *  - iff DATA transfer is active, carrier is "on"
+                *  - tx queueing enabled if open *and* carrier is "on"
+                */
+               netif_carrier_off(net);
        }
  
        return status;
@@@ -963,6 -943,7 +963,6 @@@ void gether_disconnect(struct gether *l
        struct eth_dev          *dev = link->ioport;
        struct usb_request      *req;
  
 -      WARN_ON(!dev);
        if (!dev)
                return;
  
index e4dd26a8b2b14f6529f5f6e8c39908ef7059bab2,923153c473b9e7855f01908475c0264475652748..08fdcfa9cc42e684da1fba71839fec51eb9a54a6
@@@ -130,9 -130,17 +130,17 @@@ qh_refresh (struct ehci_hcd *ehci, stru
        else {
                qtd = list_entry (qh->qtd_list.next,
                                struct ehci_qtd, qtd_list);
-               /* first qtd may already be partially processed */
-               if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current)
+               /*
+                * first qtd may already be partially processed.
+                * If we come here during unlink, the QH overlay region
+                * might have reference to the just unlinked qtd. The
+                * qtd is updated in qh_completions(). Update the QH
+                * overlay here.
+                */
+               if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current) {
+                       qh->hw->hw_qtd_next = qtd->hw_next;
                        qtd = NULL;
+               }
        }
  
        if (qtd)
@@@ -995,12 -1003,6 +1003,12 @@@ static void qh_link_async (struct ehci_
        head->qh_next.qh = qh;
        head->hw->hw_next = dma;
  
 +      /*
 +       * flush qh descriptor into memory immediately,
 +       * see comments in qh_append_tds.
 +       * */
 +      ehci_sync_mem();
 +
        qh_get(qh);
        qh->xacterrs = 0;
        qh->qh_state = QH_STATE_LINKED;
@@@ -1088,18 -1090,6 +1096,18 @@@ static struct ehci_qh *qh_append_tds 
                        wmb ();
                        dummy->hw_token = token;
  
 +                      /*
 +                       * Writing to dma coherent buffer on ARM may
 +                       * be delayed to reach memory, so HC may not see
 +                       * hw_token of dummy qtd in time, which can cause
 +                       * the qtd transaction to be executed very late,
 +                       * and degrade performance a lot. ehci_sync_mem
 +                       * is added to flush 'token' immediatelly into
 +                       * memory, so that ehci can execute the transaction
 +                       * ASAP.
 +                       * */
 +                      ehci_sync_mem();
 +
                        urb->hcpriv = qh_get (qh);
                }
        }
diff --combined fs/fuse/dev.c
index c858b5c83209bbe2ca16f56e93be195db46f6da7,5c029fb3e087aeb959b1d04c03b3e8a279f8b22d..947b8225134ece46ec30d5f800894369118267a2
@@@ -19,7 -19,6 +19,7 @@@
  #include <linux/pipe_fs_i.h>
  #include <linux/swap.h>
  #include <linux/splice.h>
 +#include <linux/freezer.h>
  
  MODULE_ALIAS_MISCDEV(FUSE_MINOR);
  MODULE_ALIAS("devname:fuse");
@@@ -388,10 -387,7 +388,10 @@@ __acquires(fc->lock
         * Wait it out.
         */
        spin_unlock(&fc->lock);
 -      wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
 +
 +      while (req->state != FUSE_REQ_FINISHED)
 +              wait_event_freezable(req->waitq,
 +                                   req->state == FUSE_REQ_FINISHED);
        spin_lock(&fc->lock);
  
        if (!req->aborted)
@@@ -1528,6 -1524,7 +1528,7 @@@ static int fuse_retrieve(struct fuse_co
                req->pages[req->num_pages] = page;
                req->num_pages++;
  
+               offset = 0;
                num -= this_num;
                total_len += this_num;
                index++;
diff --combined include/linux/cpu.h
index 97f1ca76b4aa4662389ff382299b234ac3bc3cd5,42af2eae8805b783cd6232f6a62f224a68d35252..111797a091945a49934ee34b6168002e7e798eff
@@@ -66,8 -66,9 +66,9 @@@ enum 
        /* migration should happen before other stuff but after perf */
        CPU_PRI_PERF            = 20,
        CPU_PRI_MIGRATION       = 10,
-       /* prepare workqueues for other notifiers */
-       CPU_PRI_WORKQUEUE       = 5,
+       /* bring up workqueues before normal notifiers and down after */
+       CPU_PRI_WORKQUEUE_UP    = 5,
+       CPU_PRI_WORKQUEUE_DOWN  = -5,
  };
  
  #ifdef CONFIG_SMP
@@@ -174,11 -175,4 +175,11 @@@ static inline int disable_nonboot_cpus(
  static inline void enable_nonboot_cpus(void) {}
  #endif /* !CONFIG_PM_SLEEP_SMP */
  
 +#define IDLE_START 1
 +#define IDLE_END 2
 +
 +void idle_notifier_register(struct notifier_block *n);
 +void idle_notifier_unregister(struct notifier_block *n);
 +void idle_notifier_call_chain(unsigned long val);
 +
  #endif /* _LINUX_CPU_H_ */
diff --combined include/linux/mmc/host.h
index 2cfa8d02e719291ec40ec3807593ce2f09b90784,f8d1e741d80c4adc058192e961074057d516bb38..2e46d530086a63cc5b33257db27ab96eb8bbac50
@@@ -12,7 -12,6 +12,7 @@@
  
  #include <linux/leds.h>
  #include <linux/sched.h>
 +#include <linux/wakelock.h>
  
  #include <linux/mmc/core.h>
  #include <linux/mmc/pm.h>
@@@ -262,17 -261,13 +262,18 @@@ struct mmc_host 
        int                     claim_cnt;      /* "claim" nesting count */
  
        struct delayed_work     detect;
 +      struct wake_lock        detect_wake_lock;
  
        const struct mmc_bus_ops *bus_ops;      /* current bus driver */
        unsigned int            bus_refs;       /* reference counter */
  
 +      unsigned int            bus_resume_flags;
 +#define MMC_BUSRESUME_MANUAL_RESUME   (1 << 0)
 +#define MMC_BUSRESUME_NEEDS_RESUME    (1 << 1)
 +
        unsigned int            sdio_irqs;
        struct task_struct      *sdio_irq_thread;
+       bool                    sdio_irq_pending;
        atomic_t                sdio_irq_thread_abort;
  
        mmc_pm_flag_t           pm_flags;       /* requested pm features */
  
        struct dentry           *debugfs_root;
  
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +      struct {
 +              struct sdio_cis                 *cis;
 +              struct sdio_cccr                *cccr;
 +              struct sdio_embedded_func       *funcs;
 +              int                             num_funcs;
 +      } embedded_sdio_data;
 +#endif
 +
        unsigned long           private[0] ____cacheline_aligned;
  };
  
@@@ -304,14 -290,6 +305,14 @@@ extern int mmc_add_host(struct mmc_hos
  extern void mmc_remove_host(struct mmc_host *);
  extern void mmc_free_host(struct mmc_host *);
  
 +#ifdef CONFIG_MMC_EMBEDDED_SDIO
 +extern void mmc_set_embedded_sdio_data(struct mmc_host *host,
 +                                     struct sdio_cis *cis,
 +                                     struct sdio_cccr *cccr,
 +                                     struct sdio_embedded_func *funcs,
 +                                     int num_funcs);
 +#endif
 +
  static inline void *mmc_priv(struct mmc_host *host)
  {
        return (void *)host->private;
  #define mmc_dev(x)    ((x)->parent)
  #define mmc_classdev(x)       (&(x)->class_dev)
  #define mmc_hostname(x)       (dev_name(&(x)->class_dev))
 +#define mmc_bus_needs_resume(host) ((host)->bus_resume_flags & MMC_BUSRESUME_NEEDS_RESUME)
 +#define mmc_bus_manual_resume(host) ((host)->bus_resume_flags & MMC_BUSRESUME_MANUAL_RESUME)
 +
 +static inline void mmc_set_bus_resume_policy(struct mmc_host *host, int manual)
 +{
 +      if (manual)
 +              host->bus_resume_flags |= MMC_BUSRESUME_MANUAL_RESUME;
 +      else
 +              host->bus_resume_flags &= ~MMC_BUSRESUME_MANUAL_RESUME;
 +}
 +
 +extern int mmc_resume_bus(struct mmc_host *host);
  
  extern int mmc_suspend_host(struct mmc_host *);
  extern int mmc_resume_host(struct mmc_host *);
@@@ -347,6 -313,7 +348,7 @@@ extern void mmc_request_done(struct mmc
  static inline void mmc_signal_sdio_irq(struct mmc_host *host)
  {
        host->ops->enable_sdio_irq(host, 0);
+       host->sdio_irq_pending = true;
        wake_up_process(host->sdio_irq_thread);
  }
  
diff --combined include/linux/sched.h
index 35895e3ff675ef712898fe63d788dea18d468e6a,0dae42e70295551fe9d43ffafb28151128f1fe51..5039e0af0cdfcd31d1774fbcb72844cf6d102537
@@@ -1235,6 -1235,9 +1235,9 @@@ struct task_struct 
        const struct sched_class *sched_class;
        struct sched_entity se;
        struct sched_rt_entity rt;
+ #ifdef CONFIG_CGROUP_SCHED
+       struct task_group *sched_task_group;
+ #endif
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
        /* list of struct preempt_notifier: */
  #endif
  #ifdef CONFIG_CPUSETS
        nodemask_t mems_allowed;        /* Protected by alloc_lock */
-       int mems_allowed_change_disable;
+       seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
        int cpuset_mem_spread_rotor;
        int cpuset_slab_spread_rotor;
  #endif
@@@ -1754,9 -1757,6 +1757,9 @@@ static inline void put_task_struct(stru
  extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
  extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
  
 +extern int task_free_register(struct notifier_block *n);
 +extern int task_free_unregister(struct notifier_block *n);
 +
  /*
   * Per process flags
   */
@@@ -2616,7 -2616,7 +2619,7 @@@ extern int sched_group_set_rt_period(st
  extern long sched_group_rt_period(struct task_group *tg);
  extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
  #endif
- #endif
+ #endif /* CONFIG_CGROUP_SCHED */
  
  extern int task_can_switch_user(struct user_struct *up,
                                        struct task_struct *tsk);
diff --combined kernel/cgroup.c
index 5083a09a9b6d3370a3d47558718ec2ab63e81611,69158d5b02eb6e300b3df043db6b2145e3ec388f..739553e69c3991d90c1e720254b90dc6450f40c3
@@@ -268,33 -268,6 +268,33 @@@ static void cgroup_release_agent(struc
  static DECLARE_WORK(release_agent_work, cgroup_release_agent);
  static void check_for_release(struct cgroup *cgrp);
  
 +/*
 + * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
 + * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
 + * reference to css->refcnt. In general, this refcnt is expected to goes down
 + * to zero, soon.
 + *
 + * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
 + */
 +DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
 +
 +static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
 +{
 +      if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
 +              wake_up_all(&cgroup_rmdir_waitq);
 +}
 +
 +void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
 +{
 +      css_get(css);
 +}
 +
 +void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 +{
 +      cgroup_wakeup_rmdir_waiter(css->cgroup);
 +      css_put(css);
 +}
 +
  /* Link structure for associating css_set objects with cgroups */
  struct cg_cgroup_link {
        /*
@@@ -354,43 -327,52 +354,43 @@@ static struct hlist_head *css_set_hash(
        return &css_set_table[index];
  }
  
 -/* We don't maintain the lists running through each css_set to its
 - * task until after the first call to cgroup_iter_start(). This
 - * reduces the fork()/exit() overhead for people who have cgroups
 - * compiled into their kernel but not actually in use */
 -static int use_task_css_set_links __read_mostly;
 -
 -static void __put_css_set(struct css_set *cg, int taskexit)
 +static void free_css_set_work(struct work_struct *work)
  {
 +      struct css_set *cg = container_of(work, struct css_set, work);
        struct cg_cgroup_link *link;
        struct cg_cgroup_link *saved_link;
 -      /*
 -       * Ensure that the refcount doesn't hit zero while any readers
 -       * can see it. Similar to atomic_dec_and_lock(), but for an
 -       * rwlock
 -       */
 -      if (atomic_add_unless(&cg->refcount, -1, 1))
 -              return;
 -      write_lock(&css_set_lock);
 -      if (!atomic_dec_and_test(&cg->refcount)) {
 -              write_unlock(&css_set_lock);
 -              return;
 -      }
 -
 -      /* This css_set is dead. unlink it and release cgroup refcounts */
 -      hlist_del(&cg->hlist);
 -      css_set_count--;
  
 +      write_lock(&css_set_lock);
        list_for_each_entry_safe(link, saved_link, &cg->cg_links,
                                 cg_link_list) {
                struct cgroup *cgrp = link->cgrp;
                list_del(&link->cg_link_list);
                list_del(&link->cgrp_link_list);
 -              if (atomic_dec_and_test(&cgrp->count) &&
 -                  notify_on_release(cgrp)) {
 -                      if (taskexit)
 -                              set_bit(CGRP_RELEASABLE, &cgrp->flags);
 +              if (atomic_dec_and_test(&cgrp->count)) {
                        check_for_release(cgrp);
 +                      cgroup_wakeup_rmdir_waiter(cgrp);
                }
 -
                kfree(link);
        }
 -
        write_unlock(&css_set_lock);
 -      kfree_rcu(cg, rcu_head);
 +
 +      kfree(cg);
  }
  
 +static void free_css_set_rcu(struct rcu_head *obj)
 +{
 +      struct css_set *cg = container_of(obj, struct css_set, rcu_head);
 +
 +      INIT_WORK(&cg->work, free_css_set_work);
 +      schedule_work(&cg->work);
 +}
 +
 +/* We don't maintain the lists running through each css_set to its
 + * task until after the first call to cgroup_iter_start(). This
 + * reduces the fork()/exit() overhead for people who have cgroups
 + * compiled into their kernel but not actually in use */
 +static int use_task_css_set_links __read_mostly;
 +
  /*
   * refcounted get/put for css_set objects
   */
@@@ -399,26 -381,14 +399,26 @@@ static inline void get_css_set(struct c
        atomic_inc(&cg->refcount);
  }
  
 -static inline void put_css_set(struct css_set *cg)
 +static void put_css_set(struct css_set *cg)
  {
 -      __put_css_set(cg, 0);
 -}
 +      /*
 +       * Ensure that the refcount doesn't hit zero while any readers
 +       * can see it. Similar to atomic_dec_and_lock(), but for an
 +       * rwlock
 +       */
 +      if (atomic_add_unless(&cg->refcount, -1, 1))
 +              return;
 +      write_lock(&css_set_lock);
 +      if (!atomic_dec_and_test(&cg->refcount)) {
 +              write_unlock(&css_set_lock);
 +              return;
 +      }
  
 -static inline void put_css_set_taskexit(struct css_set *cg)
 -{
 -      __put_css_set(cg, 1);
 +      hlist_del(&cg->hlist);
 +      css_set_count--;
 +
 +      write_unlock(&css_set_lock);
 +      call_rcu(&cg->rcu_head, free_css_set_rcu);
  }
  
  /*
@@@ -750,9 -720,9 +750,9 @@@ static struct cgroup *task_cgroup_from_
   * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
   * another.  It does so using cgroup_mutex, however there are
   * several performance critical places that need to reference
 - * task->cgroup without the expense of grabbing a system global
 + * task->cgroups without the expense of grabbing a system global
   * mutex.  Therefore except as noted below, when dereferencing or, as
 - * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
 + * in cgroup_attach_task(), modifying a task's cgroups pointer we use
   * task_lock(), which acts on a spinlock (task->alloc_lock) already in
   * the task_struct routinely used for such matters.
   *
@@@ -941,6 -911,33 +941,6 @@@ static void cgroup_d_remove_dir(struct 
        remove_dir(dentry);
  }
  
 -/*
 - * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
 - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
 - * reference to css->refcnt. In general, this refcnt is expected to goes down
 - * to zero, soon.
 - *
 - * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
 - */
 -DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
 -
 -static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
 -{
 -      if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
 -              wake_up_all(&cgroup_rmdir_waitq);
 -}
 -
 -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
 -{
 -      css_get(css);
 -}
 -
 -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 -{
 -      cgroup_wakeup_rmdir_waiter(css->cgroup);
 -      css_put(css);
 -}
 -
  /*
   * Call with cgroup_mutex held. Drops reference counts on modules, including
   * any duplicate ones that parse_cgroupfs_options took. If this function
@@@ -1803,9 -1800,8 +1803,8 @@@ static int cgroup_task_migrate(struct c
         * trading it for newcg is protected by cgroup_mutex, we're safe to drop
         * it here; it will be freed under RCU.
         */
-       put_css_set(oldcg);
        set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+       put_css_set(oldcg);
        return 0;
  }
  
@@@ -1823,7 -1819,6 +1822,7 @@@ int cgroup_attach_task(struct cgroup *c
        struct cgroup_subsys *ss, *failed_ss = NULL;
        struct cgroup *oldcgrp;
        struct cgroupfs_root *root = cgrp->root;
 +      struct css_set *cg;
  
        /* Nothing to do if the task is already in that cgroup */
        oldcgrp = task_cgroup_from_root(tsk, root);
                }
        }
  
 +      task_lock(tsk);
 +      cg = tsk->cgroups;
 +      get_css_set(cg);
 +      task_unlock(tsk);
 +
        retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
        if (retval)
                goto out;
                if (ss->attach)
                        ss->attach(ss, cgrp, oldcgrp, tsk);
        }
 -
 -      synchronize_rcu();
 +      set_bit(CGRP_RELEASABLE, &cgrp->flags);
 +      /* put_css_set will not destroy cg until after an RCU grace period */
 +      put_css_set(cg);
  
        /*
         * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@@ -2194,24 -2183,6 +2193,24 @@@ out_free_group_list
        return retval;
  }
  
 +static int cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
 +{
 +      struct cgroup_subsys *ss;
 +      int ret;
 +
 +      for_each_subsys(cgrp->root, ss) {
 +              if (ss->allow_attach) {
 +                      ret = ss->allow_attach(cgrp, tsk);
 +                      if (ret)
 +                              return ret;
 +              } else {
 +                      return -EACCES;
 +              }
 +      }
 +
 +      return 0;
 +}
 +
  /*
   * Find the task_struct of the task to attach by vpid and pass it along to the
   * function to attach either it or all tasks in its threadgroup. Will take
@@@ -2257,16 -2228,9 +2256,16 @@@ static int attach_task_by_pid(struct cg
                if (cred->euid &&
                    cred->euid != tcred->uid &&
                    cred->euid != tcred->suid) {
 -                      rcu_read_unlock();
 -                      cgroup_unlock();
 -                      return -EACCES;
 +                      /*
 +                       * if the default permission check fails, give each
 +                       * cgroup a chance to extend the permission check
 +                       */
 +                      ret = cgroup_allow_attach(cgrp, tsk);
 +                      if (ret) {
 +                              rcu_read_unlock();
 +                              cgroup_unlock();
 +                              return ret;
 +                      }
                }
                get_task_struct(tsk);
                rcu_read_unlock();
@@@ -3840,8 -3804,6 +3839,8 @@@ static long cgroup_create(struct cgrou
        if (err < 0)
                goto err_remove;
  
 +      set_bit(CGRP_RELEASABLE, &parent->flags);
 +
        /* The cgroup directory was pre-locked for us */
        BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
  
@@@ -3973,21 -3935,6 +3972,21 @@@ static int cgroup_clear_css_refs(struc
        return !failed;
  }
  
 +/* checks if all of the css_sets attached to a cgroup have a refcount of 0.
 + * Must be called with css_set_lock held */
 +static int cgroup_css_sets_empty(struct cgroup *cgrp)
 +{
 +      struct cg_cgroup_link *link;
 +
 +      list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
 +              struct css_set *cg = link->cg;
 +              if (atomic_read(&cg->refcount) > 0)
 +                      return 0;
 +      }
 +
 +      return 1;
 +}
 +
  static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
  {
        struct cgroup *cgrp = dentry->d_fsdata;
        /* the vfs holds both inode->i_mutex already */
  again:
        mutex_lock(&cgroup_mutex);
 -      if (atomic_read(&cgrp->count) != 0) {
 +      if (!cgroup_css_sets_empty(cgrp)) {
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        }
  
        mutex_lock(&cgroup_mutex);
        parent = cgrp->parent;
 -      if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
 +      if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) {
                clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
                mutex_unlock(&cgroup_mutex);
                return -EBUSY;
        cgroup_d_remove_dir(d);
        dput(d);
  
 -      set_bit(CGRP_RELEASABLE, &parent->flags);
        check_for_release(parent);
  
        /*
@@@ -4672,7 -4620,7 +4671,7 @@@ void cgroup_exit(struct task_struct *ts
        task_unlock(tsk);
  
        if (cg)
 -              put_css_set_taskexit(cg);
 +              put_css_set(cg);
  }
  
  /**
@@@ -4725,14 -4673,6 +4724,14 @@@ static void check_for_release(struct cg
        }
  }
  
 +/* Caller must verify that the css is not for root cgroup */
 +void __css_get(struct cgroup_subsys_state *css, int count)
 +{
 +      atomic_add(count, &css->refcnt);
 +      set_bit(CGRP_RELEASABLE, &css->cgroup->flags);
 +}
 +EXPORT_SYMBOL_GPL(__css_get);
 +
  /* Caller must verify that the css is not for root cgroup */
  void __css_put(struct cgroup_subsys_state *css, int count)
  {
        rcu_read_lock();
        val = atomic_sub_return(count, &css->refcnt);
        if (val == 1) {
 -              if (notify_on_release(cgrp)) {
 -                      set_bit(CGRP_RELEASABLE, &cgrp->flags);
 -                      check_for_release(cgrp);
 -              }
 +              check_for_release(cgrp);
                cgroup_wakeup_rmdir_waiter(cgrp);
        }
        rcu_read_unlock();
diff --combined kernel/fork.c
index 06909a9da0424b89c7b711f417cb5f9a2277375c,3d42aa3dad3834d6a6373d949f38b6409f25832e..158ca4f026c5b418239267bdbb6f5eb70ebdba50
@@@ -48,6 -48,7 +48,7 @@@
  #include <linux/audit.h>
  #include <linux/memcontrol.h>
  #include <linux/ftrace.h>
+ #include <linux/proc_fs.h>
  #include <linux/profile.h>
  #include <linux/rmap.h>
  #include <linux/ksm.h>
@@@ -155,9 -156,6 +156,9 @@@ struct kmem_cache *vm_area_cachep
  /* SLAB cache for mm_struct structures (tsk->mm) */
  static struct kmem_cache *mm_cachep;
  
 +/* Notifier list called when a task struct is freed */
 +static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
 +
  static void account_kernel_stack(struct thread_info *ti, int account)
  {
        struct zone *zone = page_zone(virt_to_page(ti));
@@@ -189,18 -187,6 +190,18 @@@ static inline void put_signal_struct(st
                free_signal_struct(sig);
  }
  
 +int task_free_register(struct notifier_block *n)
 +{
 +      return atomic_notifier_chain_register(&task_free_notifier, n);
 +}
 +EXPORT_SYMBOL(task_free_register);
 +
 +int task_free_unregister(struct notifier_block *n)
 +{
 +      return atomic_notifier_chain_unregister(&task_free_notifier, n);
 +}
 +EXPORT_SYMBOL(task_free_unregister);
 +
  void __put_task_struct(struct task_struct *tsk)
  {
        WARN_ON(!tsk->exit_state);
        delayacct_tsk_free(tsk);
        put_signal_struct(tsk->signal);
  
 +      atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
        if (!profile_handoff_task(tsk))
                free_task(tsk);
  }
@@@ -1000,6 -985,9 +1001,9 @@@ static int copy_signal(unsigned long cl
  #ifdef CONFIG_CGROUPS
        init_rwsem(&sig->threadgroup_fork_lock);
  #endif
+ #ifdef CONFIG_CPUSETS
+       seqcount_init(&tsk->mems_allowed_seq);
+ #endif
  
        sig->oom_adj = current->signal->oom_adj;
        sig->oom_score_adj = current->signal->oom_score_adj;
@@@ -1032,7 -1020,7 +1036,7 @@@ static void rt_mutex_init_task(struct t
  {
        raw_spin_lock_init(&p->pi_lock);
  #ifdef CONFIG_RT_MUTEXES
 -      plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
 +      plist_head_init(&p->pi_waiters);
        p->pi_blocked_on = NULL;
  #endif
  }
@@@ -1394,6 -1382,8 +1398,8 @@@ bad_fork_cleanup_io
        if (p->io_context)
                exit_io_context(p);
  bad_fork_cleanup_namespaces:
+       if (unlikely(clone_flags & CLONE_NEWPID))
+               pid_ns_release_proc(p->nsproxy->pid_ns);
        exit_task_namespaces(p);
  bad_fork_cleanup_mm:
        if (p->mm) {
diff --combined kernel/futex.c
index b2d51a7ede3b30c8f4f5bfbcb544ba1085d32534,24bc59c8867f8110dc65a96414d4d905eb4f1d48..e45efe79dcb4d651018ef6755eb9446a0d05ad66
@@@ -2231,11 -2231,11 +2231,11 @@@ int handle_early_requeue_pi_wakeup(stru
   * @uaddr2:   the pi futex we will take prior to returning to user-space
   *
   * The caller will wait on uaddr and will be requeued by futex_requeue() to
-  * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
-  * complete the acquisition of the rt_mutex prior to returning to userspace.
-  * This ensures the rt_mutex maintains an owner when it has waiters; without
-  * one, the pi logic wouldn't know which task to boost/deboost, if there was a
-  * need to.
+  * uaddr2 which must be PI aware and unique from uaddr.  Normal wakeup will wake
+  * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
+  * userspace.  This ensures the rt_mutex maintains an owner when it has waiters;
+  * without one, the pi logic would not know which task to boost/deboost, if
+  * there was a need to.
   *
   * We call schedule in futex_wait_queue_me() when we enqueue and return there
   * via the following:
@@@ -2272,6 -2272,9 +2272,9 @@@ static int futex_wait_requeue_pi(u32 __
        struct futex_q q = futex_q_init;
        int res, ret;
  
+       if (uaddr == uaddr2)
+               return -EINVAL;
        if (!bitset)
                return -EINVAL;
  
                 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
                 * the pi_state.
                 */
-               WARN_ON(!&q.pi_state);
+               WARN_ON(!q.pi_state);
                pi_mutex = &q.pi_state->pi_mutex;
                ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
                debug_rt_mutex_free_waiter(&rt_waiter);
         * fault, unlock the rt_mutex and return the fault to userspace.
         */
        if (ret == -EFAULT) {
-               if (rt_mutex_owner(pi_mutex) == current)
+               if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
                        rt_mutex_unlock(pi_mutex);
        } else if (ret == -EINTR) {
                /*
@@@ -2736,7 -2739,7 +2739,7 @@@ static int __init futex_init(void
                futex_cmpxchg_enabled = 1;
  
        for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
 -              plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
 +              plist_head_init(&futex_queues[i].chain);
                spin_lock_init(&futex_queues[i].lock);
        }
  
diff --combined kernel/power/suspend.c
index 61e63472816ac715b7b64165bc1080572d5cbf5b,e40d20595b156a52bf821432225f3769f9c6906f..f5adb6e8d0f125642ca6cbbc54dec3c2888ade96
  #include <linux/slab.h>
  #include <linux/suspend.h>
  #include <linux/syscore_ops.h>
+ #include <linux/ftrace.h>
  #include <trace/events/power.h>
  
  #include "power.h"
  
  const char *const pm_states[PM_SUSPEND_MAX] = {
 +#ifdef CONFIG_EARLYSUSPEND
 +      [PM_SUSPEND_ON]         = "on",
 +#endif
        [PM_SUSPEND_STANDBY]    = "standby",
        [PM_SUSPEND_MEM]        = "mem",
  };
@@@ -213,6 -211,7 +214,7 @@@ int suspend_devices_and_enter(suspend_s
                        goto Close;
        }
        suspend_console();
+       ftrace_stop();
        suspend_test_start();
        error = dpm_suspend_start(PMSG_SUSPEND);
        if (error) {
        suspend_test_start();
        dpm_resume_end(PMSG_RESUME);
        suspend_test_finish("resume devices");
+       ftrace_start();
        resume_console();
   Close:
        if (suspend_ops->end)
diff --combined kernel/sched.c
index d48888061b3e553e0d2e0a768929f7bf24f89d22,aacd55f8d4ea2ba17154ac43ae359498081653af..e788b663b7977ee2922927a910346d48214c925a
@@@ -71,7 -71,6 +71,7 @@@
  #include <linux/ctype.h>
  #include <linux/ftrace.h>
  #include <linux/slab.h>
 +#include <linux/cpuacct.h>
  
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
@@@ -606,22 -605,19 +606,19 @@@ static inline int cpu_of(struct rq *rq
  /*
   * Return the group to which this tasks belongs.
   *
-  * We use task_subsys_state_check() and extend the RCU verification with
-  * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
-  * task it moves into the cgroup. Therefore by holding either of those locks,
-  * we pin the task to the current cgroup.
+  * We cannot use task_subsys_state() and friends because the cgroup
+  * subsystem changes that value before the cgroup_subsys::attach() method
+  * is called, therefore we cannot pin it and might observe the wrong value.
+  *
+  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
+  * core changes this before calling sched_move_task().
+  *
+  * Instead we use a 'copy' which is updated from sched_move_task() while
+  * holding both task_struct::pi_lock and rq::lock.
   */
  static inline struct task_group *task_group(struct task_struct *p)
  {
-       struct task_group *tg;
-       struct cgroup_subsys_state *css;
-       css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-                       lockdep_is_held(&p->pi_lock) ||
-                       lockdep_is_held(&task_rq(p)->lock));
-       tg = container_of(css, struct task_group, css);
-       return autogroup_task_group(p, tg);
+       return p->sched_task_group;
  }
  
  /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@@ -2207,7 -2203,7 +2204,7 @@@ void set_task_cpu(struct task_struct *p
         * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
         *
         * sched_move_task() holds both and thus holding either pins the cgroup,
-        * see set_task_rq().
+        * see task_group().
         *
         * Furthermore, all task_rq users should acquire both locks, see
         * task_rq_lock().
@@@ -7221,11 -7217,8 +7218,8 @@@ int sched_domain_level_max
  
  static int __init setup_relax_domain_level(char *str)
  {
-       unsigned long val;
-       val = simple_strtoul(str, NULL, 0);
-       if (val < sched_domain_level_max)
-               default_relax_domain_level = val;
+       if (kstrtoint(str, 0, &default_relax_domain_level))
+               pr_warn("Unable to set relax_domain_level\n");
  
        return 1;
  }
@@@ -7418,7 -7411,6 +7412,6 @@@ struct sched_domain *build_sched_domain
        if (!sd)
                return child;
  
-       set_domain_attribute(sd, attr);
        cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
        if (child) {
                sd->level = child->level + 1;
                child->parent = sd;
        }
        sd->child = child;
+       set_domain_attribute(sd, attr);
  
        return sd;
  }
@@@ -7784,34 -7777,66 +7778,66 @@@ int __init sched_create_sysfs_power_sav
  }
  #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  
+ static int num_cpus_frozen;   /* used to mark begin/end of suspend/resume */
  /*
   * Update cpusets according to cpu_active mask.  If cpusets are
   * disabled, cpuset_update_active_cpus() becomes a simple wrapper
   * around partition_sched_domains().
+  *
+  * If we come here as part of a suspend/resume, don't touch cpusets because we
+  * want to restore it back to its original state upon resume anyway.
   */
  static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                             void *hcpu)
  {
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
+       case CPU_ONLINE_FROZEN:
+       case CPU_DOWN_FAILED_FROZEN:
+               /*
+                * num_cpus_frozen tracks how many CPUs are involved in suspend
+                * resume sequence. As long as this is not the last online
+                * operation in the resume sequence, just build a single sched
+                * domain, ignoring cpusets.
+                */
+               num_cpus_frozen--;
+               if (likely(num_cpus_frozen)) {
+                       partition_sched_domains(1, NULL, NULL);
+                       break;
+               }
+               /*
+                * This is the last CPU online operation. So fall through and
+                * restore the original sched domains by considering the
+                * cpuset configurations.
+                */
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
                cpuset_update_active_cpus();
-               return NOTIFY_OK;
+               break;
        default:
                return NOTIFY_DONE;
        }
+       return NOTIFY_OK;
  }
  
  static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                               void *hcpu)
  {
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
        case CPU_DOWN_PREPARE:
                cpuset_update_active_cpus();
-               return NOTIFY_OK;
+               break;
+       case CPU_DOWN_PREPARE_FROZEN:
+               num_cpus_frozen++;
+               partition_sched_domains(1, NULL, NULL);
+               break;
        default:
                return NOTIFY_DONE;
        }
+       return NOTIFY_OK;
  }
  
  static int update_runtime(struct notifier_block *nfb,
@@@ -7924,7 -7949,7 +7950,7 @@@ static void init_rt_rq(struct rt_rq *rt
  #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
 -      plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
 +      plist_head_init(&rt_rq->pushable_tasks);
  #endif
  
        rt_rq->rt_time = 0;
@@@ -8129,7 -8154,7 +8155,7 @@@ void __init sched_init(void
  #endif
  
  #ifdef CONFIG_RT_MUTEXES
 -      plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
 +      plist_head_init(&init_task.pi_waiters);
  #endif
  
        /*
@@@ -8180,24 -8205,13 +8206,24 @@@ static inline int preempt_count_equals(
        return (nested == preempt_offset);
  }
  
 +static int __might_sleep_init_called;
 +int __init __might_sleep_init(void)
 +{
 +      __might_sleep_init_called = 1;
 +      return 0;
 +}
 +early_initcall(__might_sleep_init);
 +
  void __might_sleep(const char *file, int line, int preempt_offset)
  {
  #ifdef in_atomic
        static unsigned long prev_jiffy;        /* ratelimiting */
  
        if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
 -          system_state != SYSTEM_RUNNING || oops_in_progress)
 +          oops_in_progress)
 +              return;
 +      if (system_state != SYSTEM_RUNNING &&
 +          (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
                return;
        if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                return;
@@@ -8560,6 -8574,7 +8586,7 @@@ void sched_destroy_group(struct task_gr
   */
  void sched_move_task(struct task_struct *tsk)
  {
+       struct task_group *tg;
        int on_rq, running;
        unsigned long flags;
        struct rq *rq;
        if (unlikely(running))
                tsk->sched_class->put_prev_task(rq, tsk);
  
+       tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
+                               lockdep_is_held(&tsk->sighand->siglock)),
+                         struct task_group, css);
+       tg = autogroup_task_group(tsk, tg);
+       tsk->sched_task_group = tg;
  #ifdef CONFIG_FAIR_GROUP_SCHED
        if (tsk->sched_class->task_move_group)
                tsk->sched_class->task_move_group(tsk, on_rq);
@@@ -8940,20 -8961,6 +8973,20 @@@ cpu_cgroup_destroy(struct cgroup_subsy
        sched_destroy_group(tg);
  }
  
 +static int
 +cpu_cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
 +{
 +      const struct cred *cred = current_cred(), *tcred;
 +
 +      tcred = __task_cred(tsk);
 +
 +      if ((current != tsk) && !capable(CAP_SYS_NICE) &&
 +          cred->euid != tcred->uid && cred->euid != tcred->suid)
 +              return -EACCES;
 +
 +      return 0;
 +}
 +
  static int
  cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
@@@ -9059,7 -9066,6 +9092,7 @@@ struct cgroup_subsys cpu_cgroup_subsys 
        .name           = "cpu",
        .create         = cpu_cgroup_create,
        .destroy        = cpu_cgroup_destroy,
 +      .allow_attach   = cpu_cgroup_allow_attach,
        .can_attach_task = cpu_cgroup_can_attach_task,
        .attach_task    = cpu_cgroup_attach_task,
        .exit           = cpu_cgroup_exit,
@@@ -9086,30 -9092,8 +9119,30 @@@ struct cpuacct 
        u64 __percpu *cpuusage;
        struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
        struct cpuacct *parent;
 +      struct cpuacct_charge_calls *cpufreq_fn;
 +      void *cpuacct_data;
  };
  
 +static struct cpuacct *cpuacct_root;
 +
 +/* Default calls for cpufreq accounting */
 +static struct cpuacct_charge_calls *cpuacct_cpufreq;
 +int cpuacct_register_cpufreq(struct cpuacct_charge_calls *fn)
 +{
 +      cpuacct_cpufreq = fn;
 +
 +      /*
 +       * Root node is created before platform can register callbacks,
 +       * initalize here.
 +       */
 +      if (cpuacct_root && fn) {
 +              cpuacct_root->cpufreq_fn = fn;
 +              if (fn->init)
 +                      fn->init(&cpuacct_root->cpuacct_data);
 +      }
 +      return 0;
 +}
 +
  struct cgroup_subsys cpuacct_subsys;
  
  /* return cpu accounting group corresponding to this container */
@@@ -9144,16 -9128,8 +9177,16 @@@ static struct cgroup_subsys_state *cpua
                if (percpu_counter_init(&ca->cpustat[i], 0))
                        goto out_free_counters;
  
 +      ca->cpufreq_fn = cpuacct_cpufreq;
 +
 +      /* If available, have platform code initalize cpu frequency table */
 +      if (ca->cpufreq_fn && ca->cpufreq_fn->init)
 +              ca->cpufreq_fn->init(&ca->cpuacct_data);
 +
        if (cgrp->parent)
                ca->parent = cgroup_ca(cgrp->parent);
 +      else
 +              cpuacct_root = ca;
  
        return &ca->css;
  
@@@ -9281,32 -9257,6 +9314,32 @@@ static int cpuacct_stats_show(struct cg
        return 0;
  }
  
 +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft,
 +              struct cgroup_map_cb *cb)
 +{
 +      struct cpuacct *ca = cgroup_ca(cgrp);
 +      if (ca->cpufreq_fn && ca->cpufreq_fn->cpufreq_show)
 +              ca->cpufreq_fn->cpufreq_show(ca->cpuacct_data, cb);
 +
 +      return 0;
 +}
 +
 +/* return total cpu power usage (milliWatt second) of a group */
 +static u64 cpuacct_powerusage_read(struct cgroup *cgrp, struct cftype *cft)
 +{
 +      int i;
 +      struct cpuacct *ca = cgroup_ca(cgrp);
 +      u64 totalpower = 0;
 +
 +      if (ca->cpufreq_fn && ca->cpufreq_fn->power_usage)
 +              for_each_present_cpu(i) {
 +                      totalpower += ca->cpufreq_fn->power_usage(
 +                                      ca->cpuacct_data);
 +              }
 +
 +      return totalpower;
 +}
 +
  static struct cftype files[] = {
        {
                .name = "usage",
                .name = "stat",
                .read_map = cpuacct_stats_show,
        },
 +      {
 +              .name =  "cpufreq",
 +              .read_map = cpuacct_cpufreq_show,
 +      },
 +      {
 +              .name = "power",
 +              .read_u64 = cpuacct_powerusage_read
 +      },
  };
  
  static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@@ -9358,10 -9300,6 +9391,10 @@@ static void cpuacct_charge(struct task_
        for (; ca; ca = ca->parent) {
                u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
                *cpuusage += cputime;
 +
 +              /* Call back into platform code to account for CPU speeds */
 +              if (ca->cpufreq_fn && ca->cpufreq_fn->charge)
 +                      ca->cpufreq_fn->charge(ca->cpuacct_data, cputime, cpu);
        }
  
        rcu_read_unlock();
index 06a5f310fb8e84ccff933b788896144bdce23c15,c3cbd8c34b49cd9a7878a0c17a0702ca61c645a6..5928f9559daba4960681b9ad56457b367042728a
@@@ -161,23 -161,43 +161,43 @@@ static struct timespec xtime __attribut
  static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
  static struct timespec total_sleep_time;
  
+ /* Offset clock monotonic -> clock realtime */
+ static ktime_t offs_real;
+ /* Offset clock monotonic -> clock boottime */
+ static ktime_t offs_boot;
  /*
   * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
   */
  static struct timespec raw_time;
  
- /* flag for if timekeeping is suspended */
- int __read_mostly timekeeping_suspended;
+ /* must hold write on xtime_lock */
+ static void update_rt_offset(void)
+ {
+       struct timespec tmp, *wtm = &wall_to_monotonic;
+       set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
+       offs_real = timespec_to_ktime(tmp);
+ }
  
- /* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
+ /* must hold write on xtime_lock */
static void timekeeping_update(bool clearntp)
  {
-       xtime.tv_sec += leapsecond;
-       wall_to_monotonic.tv_sec -= leapsecond;
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                       timekeeper.mult);
+       if (clearntp) {
+               timekeeper.ntp_error = 0;
+               ntp_clear();
+       }
+       update_rt_offset();
+       update_vsyscall(&xtime, &wall_to_monotonic,
+                        timekeeper.clock, timekeeper.mult);
  }
  
+ /* flag for if timekeeping is suspended */
+ int __read_mostly timekeeping_suspended;
  /**
   * timekeeping_forward_now - update clock to the current time
   *
@@@ -362,7 -382,7 +382,7 @@@ int do_settimeofday(const struct timesp
        struct timespec ts_delta;
        unsigned long flags;
  
-       if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+       if (!timespec_valid_strict(tv))
                return -EINVAL;
  
        write_seqlock_irqsave(&xtime_lock, flags);
  
        xtime = *tv;
  
-       timekeeper.ntp_error = 0;
-       ntp_clear();
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeping_update(true);
  
        write_sequnlock_irqrestore(&xtime_lock, flags);
  
@@@ -401,6 -417,8 +417,8 @@@ EXPORT_SYMBOL(do_settimeofday)
  int timekeeping_inject_offset(struct timespec *ts)
  {
        unsigned long flags;
+       struct timespec tmp;
+       int ret = 0;
  
        if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
  
        timekeeping_forward_now();
  
+       tmp = timespec_add(xtime,  *ts);
+       if (!timespec_valid_strict(&tmp)) {
+               ret = -EINVAL;
+               goto error;
+       }
        xtime = timespec_add(xtime, *ts);
        wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
  
-       timekeeper.ntp_error = 0;
-       ntp_clear();
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+ error: /* even if we error out, we forwarded the time, so call update */
+       timekeeping_update(true);
  
        write_sequnlock_irqrestore(&xtime_lock, flags);
  
        /* signal hrtimers about time change */
        clock_was_set();
  
-       return 0;
+       return ret;
  }
  EXPORT_SYMBOL(timekeeping_inject_offset);
  
@@@ -570,7 -591,20 +591,20 @@@ void __init timekeeping_init(void
        struct timespec now, boot;
  
        read_persistent_clock(&now);
+       if (!timespec_valid_strict(&now)) {
+               pr_warn("WARNING: Persistent clock returned invalid value!\n"
+                       "         Check your CMOS/BIOS settings.\n");
+               now.tv_sec = 0;
+               now.tv_nsec = 0;
+       }
        read_boot_clock(&boot);
+       if (!timespec_valid_strict(&boot)) {
+               pr_warn("WARNING: Boot clock returned invalid value!\n"
+                       "         Check your CMOS/BIOS settings.\n");
+               boot.tv_sec = 0;
+               boot.tv_nsec = 0;
+       }
  
        write_seqlock_irqsave(&xtime_lock, flags);
  
        }
        set_normalized_timespec(&wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
+       update_rt_offset();
        total_sleep_time.tv_sec = 0;
        total_sleep_time.tv_nsec = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
  /* time in seconds when suspend began */
  static struct timespec timekeeping_suspend_time;
  
+ static void update_sleep_time(struct timespec t)
+ {
+       total_sleep_time = t;
+       offs_boot = timespec_to_ktime(t);
+ }
  /**
   * __timekeeping_inject_sleeptime - Internal function to add sleep interval
   * @delta: pointer to a timespec delta value
   */
  static void __timekeeping_inject_sleeptime(struct timespec *delta)
  {
-       if (!timespec_valid(delta)) {
+       if (!timespec_valid_strict(delta)) {
                printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
                                        "sleep delta value!\n");
                return;
  
        xtime = timespec_add(xtime, *delta);
        wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
-       total_sleep_time = timespec_add(total_sleep_time, *delta);
+       update_sleep_time(timespec_add(total_sleep_time, *delta));
  }
  
  
@@@ -645,10 -686,7 +686,7 @@@ void timekeeping_inject_sleeptime(struc
  
        __timekeeping_inject_sleeptime(delta);
  
-       timekeeper.ntp_error = 0;
-       ntp_clear();
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeping_update(true);
  
        write_sequnlock_irqrestore(&xtime_lock, flags);
  
@@@ -683,6 -721,7 +721,7 @@@ static void timekeeping_resume(void
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
        timekeeping_suspended = 0;
+       timekeeping_update(false);
        write_sequnlock_irqrestore(&xtime_lock, flags);
  
        touch_softlockup_watchdog();
@@@ -834,9 -873,14 +873,14 @@@ static cycle_t logarithmic_accumulation
  
        timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
        while (timekeeper.xtime_nsec >= nsecps) {
+               int leap;
                timekeeper.xtime_nsec -= nsecps;
                xtime.tv_sec++;
-               second_overflow();
+               leap = second_overflow(xtime.tv_sec);
+               xtime.tv_sec += leap;
+               wall_to_monotonic.tv_sec -= leap;
+               if (leap)
+                       clock_was_set_delayed();
        }
  
        /* Accumulate raw time */
@@@ -881,6 -925,10 +925,10 @@@ static void update_wall_time(void
  #else
        offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
  #endif
+       /* Check if there's really nothing to do */
+       if (offset < timekeeper.cycle_interval)
+               return;
        timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
  
        /*
         * xtime.tv_nsec isn't larger then NSEC_PER_SEC
         */
        if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+               int leap;
                xtime.tv_nsec -= NSEC_PER_SEC;
                xtime.tv_sec++;
-               second_overflow();
+               leap = second_overflow(xtime.tv_sec);
+               xtime.tv_sec += leap;
+               wall_to_monotonic.tv_sec -= leap;
+               if (leap)
+                       clock_was_set_delayed();
        }
  
-       /* check to see if there is a new clocksource to use */
-       update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
-                               timekeeper.mult);
+       timekeeping_update(false);
  }
  
  /**
@@@ -1002,7 -1053,7 +1053,7 @@@ void get_monotonic_boottime(struct time
        } while (read_seqretry(&xtime_lock, seq));
  
        set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
 -                      ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
 +              (s64)ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
  }
  EXPORT_SYMBOL_GPL(get_monotonic_boottime);
  
@@@ -1108,6 -1159,40 +1159,40 @@@ void get_xtime_and_monotonic_and_sleep_
        } while (read_seqretry(&xtime_lock, seq));
  }
  
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ /**
+  * ktime_get_update_offsets - hrtimer helper
+  * @real:     pointer to storage for monotonic -> realtime offset
+  * @_boot:    pointer to storage for monotonic -> boottime offset
+  *
+  * Returns current monotonic time and updates the offsets
+  * Called from hrtimer_interupt() or retrigger_next_event()
+  */
+ ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot)
+ {
+       ktime_t now;
+       unsigned int seq;
+       u64 secs, nsecs;
+       do {
+               seq = read_seqbegin(&xtime_lock);
+               secs = xtime.tv_sec;
+               nsecs = xtime.tv_nsec;
+               nsecs += timekeeping_get_ns();
+               /* If arch requires, add in gettimeoffset() */
+               nsecs += arch_gettimeoffset();
+               *real = offs_real;
+               *boot = offs_boot;
+       } while (read_seqretry(&xtime_lock, seq));
+       now = ktime_add_ns(ktime_set(secs, 0), nsecs);
+       now = ktime_sub(now, *real);
+       return now;
+ }
+ #endif
  /**
   * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
   */
diff --combined mm/page_alloc.c
index e2f474da7ee2bcae724d8cd7b66bc594c4ed602f,eb6b3fd3ba82e182ea84536ef27ef762374b27d9..bfe789472b4a1639d7885db9c6029485d25a4605
@@@ -127,20 -127,6 +127,20 @@@ void pm_restrict_gfp_mask(void
        saved_gfp_mask = gfp_allowed_mask;
        gfp_allowed_mask &= ~GFP_IOFS;
  }
 +
 +static bool pm_suspending(void)
 +{
 +      if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
 +              return false;
 +      return true;
 +}
 +
 +#else
 +
 +static bool pm_suspending(void)
 +{
 +      return false;
 +}
  #endif /* CONFIG_PM_SLEEP */
  
  #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@@ -190,7 -176,6 +190,7 @@@ static char * const zone_names[MAX_NR_Z
  };
  
  int min_free_kbytes = 1024;
 +int min_free_order_shift = 1;
  
  static unsigned long __meminitdata nr_kernel_pages;
  static unsigned long __meminitdata nr_all_pages;
@@@ -555,7 -540,7 +555,7 @@@ static inline void __free_one_page(stru
                combined_idx = buddy_idx & page_idx;
                higher_page = page + (combined_idx - page_idx);
                buddy_idx = __find_buddy_index(combined_idx, order + 1);
-               higher_buddy = page + (buddy_idx - combined_idx);
+               higher_buddy = higher_page + (buddy_idx - combined_idx);
                if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
                        list_add_tail(&page->lru,
                                &zone->free_area[order].free_list[migratetype]);
@@@ -1502,7 -1487,7 +1502,7 @@@ static bool __zone_watermark_ok(struct 
                free_pages -= z->free_area[o].nr_free << o;
  
                /* Require fewer higher order pages to be free */
 -              min >>= 1;
 +              min >>= min_free_order_shift;
  
                if (free_pages <= min)
                        return false;
@@@ -1912,14 -1897,20 +1912,20 @@@ static struct page 
  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, unsigned long *did_some_progress,
-       bool sync_migration)
+       int migratetype, bool sync_migration,
+       bool *deferred_compaction,
+       unsigned long *did_some_progress)
  {
        struct page *page;
  
-       if (!order || compaction_deferred(preferred_zone))
+       if (!order)
                return NULL;
  
+       if (compaction_deferred(preferred_zone)) {
+               *deferred_compaction = true;
+               return NULL;
+       }
        current->flags |= PF_MEMALLOC;
        *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
                                                nodemask, sync_migration);
                 * but not enough to satisfy watermarks.
                 */
                count_vm_event(COMPACTFAIL);
-               defer_compaction(preferred_zone);
+               /*
+                * As async compaction considers a subset of pageblocks, only
+                * defer if the failure was a sync compaction failure.
+                */
+               if (sync_migration)
+                       defer_compaction(preferred_zone);
  
                cond_resched();
        }
@@@ -1959,8 -1956,9 +1971,9 @@@ static inline struct page 
  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, unsigned long *did_some_progress,
-       bool sync_migration)
+       int migratetype, bool sync_migration,
+       bool *deferred_compaction,
+       unsigned long *did_some_progress)
  {
        return NULL;
  }
@@@ -2110,6 -2108,7 +2123,7 @@@ __alloc_pages_slowpath(gfp_t gfp_mask, 
        unsigned long pages_reclaimed = 0;
        unsigned long did_some_progress;
        bool sync_migration = false;
+       bool deferred_compaction = false;
  
        /*
         * In the slowpath, we sanity check order to avoid ever trying to
@@@ -2190,12 -2189,22 +2204,22 @@@ rebalance
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                       migratetype, &did_some_progress,
-                                       sync_migration);
+                                       migratetype, sync_migration,
+                                       &deferred_compaction,
+                                       &did_some_progress);
        if (page)
                goto got_pg;
        sync_migration = true;
  
+       /*
+        * If compaction is deferred for high-order allocations, it is because
+        * sync compaction recently failed. In this is the case and the caller
+        * has requested the system not be heavily disrupted, fail the
+        * allocation now instead of entering direct reclaim
+        */
+       if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+               goto nopage;
        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order,
                                        zonelist, high_zoneidx,
  
                        goto restart;
                }
 +
 +              /*
 +               * Suspend converts GFP_KERNEL to __GFP_WAIT which can
 +               * prevent reclaim making forward progress without
 +               * invoking OOM. Bail if we are suspending
 +               */
 +              if (pm_suspending())
 +                      goto nopage;
        }
  
        /* Check if we should retry the allocation */
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                       migratetype, &did_some_progress,
-                                       sync_migration);
+                                       migratetype, sync_migration,
+                                       &deferred_compaction,
+                                       &did_some_progress);
                if (page)
                        goto got_pg;
        }
@@@ -2291,8 -2293,9 +2316,9 @@@ __alloc_pages_nodemask(gfp_t gfp_mask, 
  {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
        struct zone *preferred_zone;
-       struct page *page;
+       struct page *page = NULL;
        int migratetype = allocflags_to_migratetype(gfp_mask);
+       unsigned int cpuset_mems_cookie;
  
        gfp_mask &= gfp_allowed_mask;
  
        if (unlikely(!zonelist->_zonerefs->zone))
                return NULL;
  
-       get_mems_allowed();
+ retry_cpuset:
+       cpuset_mems_cookie = get_mems_allowed();
        /* The preferred zone is used for statistics later */
        first_zones_zonelist(zonelist, high_zoneidx,
                                nodemask ? : &cpuset_current_mems_allowed,
                                &preferred_zone);
-       if (!preferred_zone) {
-               put_mems_allowed();
-               return NULL;
-       }
+       if (!preferred_zone)
+               goto out;
  
        /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                page = __alloc_pages_slowpath(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
                                preferred_zone, migratetype);
-       put_mems_allowed();
  
        trace_mm_page_alloc(page, order, gfp_mask, migratetype);
+ out:
+       /*
+        * When updating a task's mems_allowed, it is possible to race with
+        * parallel threads in such a way that an allocation can fail while
+        * the mask is being updated. If a page allocation is about to fail,
+        * check if the cpuset changed during allocation and if so, retry.
+        */
+       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+               goto retry_cpuset;
        return page;
  }
  EXPORT_SYMBOL(__alloc_pages_nodemask);
@@@ -2555,13 -2568,15 +2591,15 @@@ void si_meminfo_node(struct sysinfo *va
  bool skip_free_areas_node(unsigned int flags, int nid)
  {
        bool ret = false;
+       unsigned int cpuset_mems_cookie;
  
        if (!(flags & SHOW_MEM_FILTER_NODES))
                goto out;
  
-       get_mems_allowed();
-       ret = !node_isset(nid, cpuset_current_mems_allowed);
-       put_mems_allowed();
+       do {
+               cpuset_mems_cookie = get_mems_allowed();
+               ret = !node_isset(nid, cpuset_current_mems_allowed);
+       } while (!put_mems_allowed(cpuset_mems_cookie));
  out:
        return ret;
  }
@@@ -3441,25 -3456,33 +3479,33 @@@ static void setup_zone_migrate_reserve(
                if (page_to_nid(page) != zone_to_nid(zone))
                        continue;
  
-               /* Blocks with reserved pages will never free, skip them. */
-               block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
-               if (pageblock_is_reserved(pfn, block_end_pfn))
-                       continue;
                block_migratetype = get_pageblock_migratetype(page);
  
-               /* If this block is reserved, account for it */
-               if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) {
-                       reserve--;
-                       continue;
-               }
+               /* Only test what is necessary when the reserves are not met */
+               if (reserve > 0) {
+                       /*
+                        * Blocks with reserved pages will never free, skip
+                        * them.
+                        */
+                       block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
+                       if (pageblock_is_reserved(pfn, block_end_pfn))
+                               continue;
  
-               /* Suitable for reserving if this block is movable */
-               if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) {
-                       set_pageblock_migratetype(page, MIGRATE_RESERVE);
-                       move_freepages_block(zone, page, MIGRATE_RESERVE);
-                       reserve--;
-                       continue;
+                       /* If this block is reserved, account for it */
+                       if (block_migratetype == MIGRATE_RESERVE) {
+                               reserve--;
+                               continue;
+                       }
+                       /* Suitable for reserving if this block is movable */
+                       if (block_migratetype == MIGRATE_MOVABLE) {
+                               set_pageblock_migratetype(page,
+                                                       MIGRATE_RESERVE);
+                               move_freepages_block(zone, page,
+                                                       MIGRATE_RESERVE);
+                               reserve--;
+                               continue;
+                       }
                }
  
                /*
diff --combined mm/shmem.c
index 883e98f78ca97a714175be2ad807d08da7ce9d0f,769941f7209581f2fbe4e2d1842cb9fbea1c55c7..df31a4432938383e397c8566792092097a7c15d7
@@@ -2348,12 -2348,14 +2348,14 @@@ static struct dentry *shmem_fh_to_dentr
  {
        struct inode *inode;
        struct dentry *dentry = NULL;
-       u64 inum = fid->raw[2];
-       inum = (inum << 32) | fid->raw[1];
+       u64 inum;
  
        if (fh_len < 3)
                return NULL;
  
+       inum = fid->raw[2];
+       inum = (inum << 32) | fid->raw[1];
        inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
                        shmem_match, fid->raw);
        if (inode) {
@@@ -3015,15 -3017,6 +3017,15 @@@ put_memory
  }
  EXPORT_SYMBOL_GPL(shmem_file_setup);
  
 +void shmem_set_file(struct vm_area_struct *vma, struct file *file)
 +{
 +      if (vma->vm_file)
 +              fput(vma->vm_file);
 +      vma->vm_file = file;
 +      vma->vm_ops = &shmem_vm_ops;
 +      vma->vm_flags |= VM_CAN_NONLINEAR;
 +}
 +
  /**
   * shmem_zero_setup - setup a shared anonymous mapping
   * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
@@@ -3037,7 -3030,11 +3039,7 @@@ int shmem_zero_setup(struct vm_area_str
        if (IS_ERR(file))
                return PTR_ERR(file);
  
 -      if (vma->vm_file)
 -              fput(vma->vm_file);
 -      vma->vm_file = file;
 -      vma->vm_ops = &shmem_vm_ops;
 -      vma->vm_flags |= VM_CAN_NONLINEAR;
 +      shmem_set_file(vma, file);
        return 0;
  }
  
diff --combined net/bluetooth/hci_sock.c
index ff02cf5e77ccdd576d2f42f78f2c4715e3a47d5e,eb5cb6f256f279f2d1551d4f36a113acf4d4855f..ce1424a330b9aef29ecb3c7050e308dc4d67c17e
@@@ -180,24 -180,82 +180,24 @@@ static int hci_sock_release(struct sock
        return 0;
  }
  
 -struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
 -{
 -      struct list_head *p;
 -
 -      list_for_each(p, &hdev->blacklist) {
 -              struct bdaddr_list *b;
 -
 -              b = list_entry(p, struct bdaddr_list, list);
 -
 -              if (bacmp(bdaddr, &b->bdaddr) == 0)
 -                      return b;
 -      }
 -
 -      return NULL;
 -}
 -
 -static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
 +static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
  {
        bdaddr_t bdaddr;
 -      struct bdaddr_list *entry;
  
        if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
                return -EFAULT;
  
 -      if (bacmp(&bdaddr, BDADDR_ANY) == 0)
 -              return -EBADF;
 -
 -      if (hci_blacklist_lookup(hdev, &bdaddr))
 -              return -EEXIST;
 -
 -      entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
 -      if (!entry)
 -              return -ENOMEM;
 -
 -      bacpy(&entry->bdaddr, &bdaddr);
 -
 -      list_add(&entry->list, &hdev->blacklist);
 -
 -      return 0;
 +      return hci_blacklist_add(hdev, &bdaddr);
  }
  
 -int hci_blacklist_clear(struct hci_dev *hdev)
 -{
 -      struct list_head *p, *n;
 -
 -      list_for_each_safe(p, n, &hdev->blacklist) {
 -              struct bdaddr_list *b;
 -
 -              b = list_entry(p, struct bdaddr_list, list);
 -
 -              list_del(p);
 -              kfree(b);
 -      }
 -
 -      return 0;
 -}
 -
 -static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
 +static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
  {
        bdaddr_t bdaddr;
 -      struct bdaddr_list *entry;
  
        if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
                return -EFAULT;
  
 -      if (bacmp(&bdaddr, BDADDR_ANY) == 0)
 -              return hci_blacklist_clear(hdev);
 -
 -      entry = hci_blacklist_lookup(hdev, &bdaddr);
 -      if (!entry)
 -              return -ENOENT;
 -
 -      list_del(&entry->list);
 -      kfree(entry);
 -
 -      return 0;
 +      return hci_blacklist_del(hdev, &bdaddr);
  }
  
  /* Ioctls that require bound socket */
@@@ -232,12 -290,12 +232,12 @@@ static inline int hci_sock_bound_ioctl(
        case HCIBLOCKADDR:
                if (!capable(CAP_NET_ADMIN))
                        return -EACCES;
 -              return hci_blacklist_add(hdev, (void __user *) arg);
 +              return hci_sock_blacklist_add(hdev, (void __user *) arg);
  
        case HCIUNBLOCKADDR:
                if (!capable(CAP_NET_ADMIN))
                        return -EACCES;
 -              return hci_blacklist_del(hdev, (void __user *) arg);
 +              return hci_sock_blacklist_del(hdev, (void __user *) arg);
  
        default:
                if (hdev->ioctl)
@@@ -374,6 -432,7 +374,7 @@@ static int hci_sock_getname(struct sock
        *addr_len = sizeof(*haddr);
        haddr->hci_family = AF_BLUETOOTH;
        haddr->hci_dev    = hdev->id;
+       haddr->hci_channel= 0;
  
        release_sock(sk);
        return 0;
@@@ -586,6 -645,7 +587,7 @@@ static int hci_sock_setsockopt(struct s
                {
                        struct hci_filter *f = &hci_pi(sk)->filter;
  
+                       memset(&uf, 0, sizeof(uf));
                        uf.type_mask = f->type_mask;
                        uf.opcode    = f->opcode;
                        uf.event_mask[0] = *((u32 *) f->event_mask + 0);
index 61f1f623091dbcd89992a64a635fc8de62963795,9810d4545d3f3cb86738dd0a583082f46fea5a9a..785e84f7dee5e1ef548dbeb0ecf375043acb3add
  #include <net/bluetooth/bluetooth.h>
  #include <net/bluetooth/hci_core.h>
  #include <net/bluetooth/l2cap.h>
 +#include <net/bluetooth/smp.h>
  
  static const struct proto_ops l2cap_sock_ops;
 -
 -/* ---- L2CAP timers ---- */
 -static void l2cap_sock_timeout(unsigned long arg)
 -{
 -      struct sock *sk = (struct sock *) arg;
 -      int reason;
 -
 -      BT_DBG("sock %p state %d", sk, sk->sk_state);
 -
 -      bh_lock_sock(sk);
 -
 -      if (sock_owned_by_user(sk)) {
 -              /* sk is owned by user. Try again later */
 -              l2cap_sock_set_timer(sk, HZ / 5);
 -              bh_unlock_sock(sk);
 -              sock_put(sk);
 -              return;
 -      }
 -
 -      if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
 -              reason = ECONNREFUSED;
 -      else if (sk->sk_state == BT_CONNECT &&
 -                      l2cap_pi(sk)->chan->sec_level != BT_SECURITY_SDP)
 -              reason = ECONNREFUSED;
 -      else
 -              reason = ETIMEDOUT;
 -
 -      __l2cap_sock_close(sk, reason);
 -
 -      bh_unlock_sock(sk);
 -
 -      l2cap_sock_kill(sk);
 -      sock_put(sk);
 -}
 -
 -void l2cap_sock_set_timer(struct sock *sk, long timeout)
 -{
 -      BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
 -      sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
 -}
 -
 -void l2cap_sock_clear_timer(struct sock *sk)
 -{
 -      BT_DBG("sock %p state %d", sk, sk->sk_state);
 -      sk_stop_timer(sk, &sk->sk_timer);
 -}
 +static void l2cap_sock_init(struct sock *sk, struct sock *parent);
 +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio);
  
  static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
  {
                chan->sec_level = BT_SECURITY_SDP;
  
        bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
 +
 +      chan->state = BT_BOUND;
        sk->sk_state = BT_BOUND;
  
  done:
@@@ -121,7 -162,7 +121,7 @@@ static int l2cap_sock_connect(struct so
  
        lock_sock(sk);
  
 -      if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
 +      if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED
                        && !(la.l2_psm || la.l2_cid)) {
                err = -EINVAL;
                goto done;
        }
  
        /* PSM must be odd and lsb of upper byte must be 0 */
 -      if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
 -                              sk->sk_type != SOCK_RAW && !la.l2_cid) {
 +      if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && !la.l2_cid &&
 +                                      chan->chan_type != L2CAP_CHAN_RAW) {
                err = -EINVAL;
                goto done;
        }
@@@ -217,8 -258,6 +217,8 @@@ static int l2cap_sock_listen(struct soc
  
        sk->sk_max_ack_backlog = backlog;
        sk->sk_ack_backlog = 0;
 +
 +      chan->state = BT_LISTEN;
        sk->sk_state = BT_LISTEN;
  
  done:
@@@ -235,26 -274,30 +235,26 @@@ static int l2cap_sock_accept(struct soc
  
        lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  
 -      if (sk->sk_state != BT_LISTEN) {
 -              err = -EBADFD;
 -              goto done;
 -      }
 -
        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
  
        BT_DBG("sk %p timeo %ld", sk, timeo);
  
        /* Wait for an incoming connection. (wake-one). */
        add_wait_queue_exclusive(sk_sleep(sk), &wait);
 -      while (!(nsk = bt_accept_dequeue(sk, newsock))) {
 +      while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
 -              if (!timeo) {
 -                      err = -EAGAIN;
 +
 +              if (sk->sk_state != BT_LISTEN) {
 +                      err = -EBADFD;
                        break;
                }
  
 -              release_sock(sk);
 -              timeo = schedule_timeout(timeo);
 -              lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 +              nsk = bt_accept_dequeue(sk, newsock);
 +              if (nsk)
 +                      break;
  
 -              if (sk->sk_state != BT_LISTEN) {
 -                      err = -EBADFD;
 +              if (!timeo) {
 +                      err = -EAGAIN;
                        break;
                }
  
                        err = sock_intr_errno(timeo);
                        break;
                }
 +
 +              release_sock(sk);
 +              timeo = schedule_timeout(timeo);
 +              lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
        }
 -      set_current_state(TASK_RUNNING);
 +      __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
  
        if (err)
@@@ -290,6 -329,7 +290,7 @@@ static int l2cap_sock_getname(struct so
  
        BT_DBG("sock %p, sk %p", sock, sk);
  
+       memset(la, 0, sizeof(struct sockaddr_l2));
        addr->sa_family = AF_BLUETOOTH;
        *len = sizeof(struct sockaddr_l2);
  
@@@ -398,7 -438,6 +399,7 @@@ static int l2cap_sock_getsockopt(struc
        struct sock *sk = sock->sk;
        struct l2cap_chan *chan = l2cap_pi(sk)->chan;
        struct bt_security sec;
 +      struct bt_power pwr;
        int len, err = 0;
  
        BT_DBG("sk %p", sk);
  
        switch (optname) {
        case BT_SECURITY:
 -              if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
 -                              && sk->sk_type != SOCK_RAW) {
 +              if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED &&
 +                                      chan->chan_type != L2CAP_CHAN_RAW) {
                        err = -EINVAL;
                        break;
                }
  
 +              memset(&sec, 0, sizeof(sec));
                sec.level = chan->sec_level;
  
 +              if (sk->sk_state == BT_CONNECTED)
 +                      sec.key_size = chan->conn->hcon->enc_key_size;
 +
                len = min_t(unsigned int, len, sizeof(sec));
                if (copy_to_user(optval, (char *) &sec, len))
                        err = -EFAULT;
  
                break;
  
 +      case BT_POWER:
 +              if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
 +                              && sk->sk_type != SOCK_RAW) {
 +                      err = -EINVAL;
 +                      break;
 +              }
 +
 +              pwr.force_active = chan->force_active;
 +
 +              len = min_t(unsigned int, len, sizeof(pwr));
 +              if (copy_to_user(optval, (char *) &pwr, len))
 +                      err = -EFAULT;
 +
 +              break;
 +
        default:
                err = -ENOPROTOOPT;
                break;
@@@ -516,7 -536,7 +517,7 @@@ static int l2cap_sock_setsockopt_old(st
                chan->mode = opts.mode;
                switch (chan->mode) {
                case L2CAP_MODE_BASIC:
 -                      chan->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
 +                      clear_bit(CONF_STATE2_DEVICE, &chan->conf_state);
                        break;
                case L2CAP_MODE_ERTM:
                case L2CAP_MODE_STREAMING:
@@@ -566,8 -586,6 +567,8 @@@ static int l2cap_sock_setsockopt(struc
        struct sock *sk = sock->sk;
        struct l2cap_chan *chan = l2cap_pi(sk)->chan;
        struct bt_security sec;
 +      struct bt_power pwr;
 +      struct l2cap_conn *conn;
        int len, err = 0;
        u32 opt;
  
  
        switch (optname) {
        case BT_SECURITY:
 -              if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
 -                              && sk->sk_type != SOCK_RAW) {
 +              if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED &&
 +                                      chan->chan_type != L2CAP_CHAN_RAW) {
                        err = -EINVAL;
                        break;
                }
                }
  
                chan->sec_level = sec.level;
 +
 +              conn = chan->conn;
 +              if (conn && chan->scid == L2CAP_CID_LE_DATA) {
 +                      if (!conn->hcon->out) {
 +                              err = -EINVAL;
 +                              break;
 +                      }
 +
 +                      if (smp_conn_security(conn, sec.level))
 +                              break;
 +
 +                      err = 0;
 +                      sk->sk_state = BT_CONFIG;
 +              }
                break;
  
        case BT_DEFER_SETUP:
                chan->flushable = opt;
                break;
  
 +      case BT_POWER:
 +              if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED &&
 +                                      chan->chan_type != L2CAP_CHAN_RAW) {
 +                      err = -EINVAL;
 +                      break;
 +              }
 +
 +              pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
 +
 +              len = min_t(unsigned int, sizeof(pwr), optlen);
 +              if (copy_from_user((char *) &pwr, optval, len)) {
 +                      err = -EFAULT;
 +                      break;
 +              }
 +              chan->force_active = pwr.force_active;
 +              break;
 +
        default:
                err = -ENOPROTOOPT;
                break;
@@@ -688,6 -675,8 +689,6 @@@ static int l2cap_sock_sendmsg(struct ki
  {
        struct sock *sk = sock->sk;
        struct l2cap_chan *chan = l2cap_pi(sk)->chan;
 -      struct sk_buff *skb;
 -      u16 control;
        int err;
  
        BT_DBG("sock %p, sk %p", sock, sk);
        lock_sock(sk);
  
        if (sk->sk_state != BT_CONNECTED) {
 -              err = -ENOTCONN;
 -              goto done;
 -      }
 -
 -      /* Connectionless channel */
 -      if (sk->sk_type == SOCK_DGRAM) {
 -              skb = l2cap_create_connless_pdu(chan, msg, len);
 -              if (IS_ERR(skb)) {
 -                      err = PTR_ERR(skb);
 -              } else {
 -                      l2cap_do_send(chan, skb);
 -                      err = len;
 -              }
 -              goto done;
 +              release_sock(sk);
 +              return -ENOTCONN;
        }
  
 -      switch (chan->mode) {
 -      case L2CAP_MODE_BASIC:
 -              /* Check outgoing MTU */
 -              if (len > chan->omtu) {
 -                      err = -EMSGSIZE;
 -                      goto done;
 -              }
 -
 -              /* Create a basic PDU */
 -              skb = l2cap_create_basic_pdu(chan, msg, len);
 -              if (IS_ERR(skb)) {
 -                      err = PTR_ERR(skb);
 -                      goto done;
 -              }
 -
 -              l2cap_do_send(chan, skb);
 -              err = len;
 -              break;
 -
 -      case L2CAP_MODE_ERTM:
 -      case L2CAP_MODE_STREAMING:
 -              /* Entire SDU fits into one PDU */
 -              if (len <= chan->remote_mps) {
 -                      control = L2CAP_SDU_UNSEGMENTED;
 -                      skb = l2cap_create_iframe_pdu(chan, msg, len, control,
 -                                                                      0);
 -                      if (IS_ERR(skb)) {
 -                              err = PTR_ERR(skb);
 -                              goto done;
 -                      }
 -                      __skb_queue_tail(&chan->tx_q, skb);
 -
 -                      if (chan->tx_send_head == NULL)
 -                              chan->tx_send_head = skb;
 -
 -              } else {
 -              /* Segment SDU into multiples PDUs */
 -                      err = l2cap_sar_segment_sdu(chan, msg, len);
 -                      if (err < 0)
 -                              goto done;
 -              }
 -
 -              if (chan->mode == L2CAP_MODE_STREAMING) {
 -                      l2cap_streaming_send(chan);
 -                      err = len;
 -                      break;
 -              }
 -
 -              if ((chan->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
 -                              (chan->conn_state & L2CAP_CONN_WAIT_F)) {
 -                      err = len;
 -                      break;
 -              }
 -              err = l2cap_ertm_send(chan);
 -
 -              if (err >= 0)
 -                      err = len;
 -              break;
 -
 -      default:
 -              BT_DBG("bad state %1.1x", chan->mode);
 -              err = -EBADFD;
 -      }
 +      err = l2cap_chan_send(chan, msg, len);
  
 -done:
        release_sock(sk);
        return err;
  }
  static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
  {
        struct sock *sk = sock->sk;
 +      struct l2cap_pinfo *pi = l2cap_pi(sk);
 +      int err;
  
        lock_sock(sk);
  
        if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
                sk->sk_state = BT_CONFIG;
  
 -              __l2cap_connect_rsp_defer(l2cap_pi(sk)->chan);
 +              __l2cap_connect_rsp_defer(pi->chan);
                release_sock(sk);
                return 0;
        }
        release_sock(sk);
  
        if (sock->type == SOCK_STREAM)
 -              return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
 +              err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
 +      else
 +              err = bt_sock_recvmsg(iocb, sock, msg, len, flags);
 +
 +      if (pi->chan->mode != L2CAP_MODE_ERTM)
 +              return err;
 +
 +      /* Attempt to put pending rx data in the socket buffer */
 +
 +      lock_sock(sk);
 +
 +      if (!test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state))
 +              goto done;
 +
 +      if (pi->rx_busy_skb) {
 +              if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb))
 +                      pi->rx_busy_skb = NULL;
 +              else
 +                      goto done;
 +      }
  
 -      return bt_sock_recvmsg(iocb, sock, msg, len, flags);
 +      /* Restore data flow when half of the receive buffer is
 +       * available.  This avoids resending large numbers of
 +       * frames.
 +       */
 +      if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
 +              l2cap_chan_busy(pi->chan, 0);
 +
 +done:
 +      release_sock(sk);
 +      return err;
  }
  
  /* Kill socket (only if zapped and orphan)
   * Must be called on unlocked socket.
   */
 -void l2cap_sock_kill(struct sock *sk)
 +static void l2cap_sock_kill(struct sock *sk)
  {
        if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
                return;
        sock_put(sk);
  }
  
 -/* Must be called on unlocked socket. */
 -static void l2cap_sock_close(struct sock *sk)
 -{
 -      l2cap_sock_clear_timer(sk);
 -      lock_sock(sk);
 -      __l2cap_sock_close(sk, ECONNRESET);
 -      release_sock(sk);
 -      l2cap_sock_kill(sk);
 -}
 -
 -static void l2cap_sock_cleanup_listen(struct sock *parent)
 -{
 -      struct sock *sk;
 -
 -      BT_DBG("parent %p", parent);
 -
 -      /* Close not yet accepted channels */
 -      while ((sk = bt_accept_dequeue(parent, NULL)))
 -              l2cap_sock_close(sk);
 -
 -      parent->sk_state = BT_CLOSED;
 -      sock_set_flag(parent, SOCK_ZAPPED);
 -}
 -
 -void __l2cap_sock_close(struct sock *sk, int reason)
 -{
 -      struct l2cap_chan *chan = l2cap_pi(sk)->chan;
 -      struct l2cap_conn *conn = chan->conn;
 -
 -      BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
 -
 -      switch (sk->sk_state) {
 -      case BT_LISTEN:
 -              l2cap_sock_cleanup_listen(sk);
 -              break;
 -
 -      case BT_CONNECTED:
 -      case BT_CONFIG:
 -              if ((sk->sk_type == SOCK_SEQPACKET ||
 -                                      sk->sk_type == SOCK_STREAM) &&
 -                                      conn->hcon->type == ACL_LINK) {
 -                      l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 -                      l2cap_send_disconn_req(conn, chan, reason);
 -              } else
 -                      l2cap_chan_del(chan, reason);
 -              break;
 -
 -      case BT_CONNECT2:
 -              if ((sk->sk_type == SOCK_SEQPACKET ||
 -                                      sk->sk_type == SOCK_STREAM) &&
 -                                      conn->hcon->type == ACL_LINK) {
 -                      struct l2cap_conn_rsp rsp;
 -                      __u16 result;
 -
 -                      if (bt_sk(sk)->defer_setup)
 -                              result = L2CAP_CR_SEC_BLOCK;
 -                      else
 -                              result = L2CAP_CR_BAD_PSM;
 -
 -                      rsp.scid   = cpu_to_le16(chan->dcid);
 -                      rsp.dcid   = cpu_to_le16(chan->scid);
 -                      rsp.result = cpu_to_le16(result);
 -                      rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
 -                      l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP,
 -                                                      sizeof(rsp), &rsp);
 -              }
 -
 -              l2cap_chan_del(chan, reason);
 -              break;
 -
 -      case BT_CONNECT:
 -      case BT_DISCONN:
 -              l2cap_chan_del(chan, reason);
 -              break;
 -
 -      default:
 -              sock_set_flag(sk, SOCK_ZAPPED);
 -              break;
 -      }
 -}
 -
  static int l2cap_sock_shutdown(struct socket *sock, int how)
  {
        struct sock *sk = sock->sk;
                        err = __l2cap_wait_ack(sk);
  
                sk->sk_shutdown = SHUTDOWN_MASK;
 -              l2cap_sock_clear_timer(sk);
 -              __l2cap_sock_close(sk, 0);
 +              l2cap_chan_close(chan, 0);
  
                if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
                        err = bt_sock_wait_state(sk, BT_CLOSED,
@@@ -829,85 -945,15 +830,85 @@@ static int l2cap_sock_release(struct so
        return err;
  }
  
 +static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data)
 +{
 +      struct sock *sk, *parent = data;
 +
 +      sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
 +                                                              GFP_ATOMIC);
 +      if (!sk)
 +              return NULL;
 +
 +      l2cap_sock_init(sk, parent);
 +
 +      return l2cap_pi(sk)->chan;
 +}
 +
 +static int l2cap_sock_recv_cb(void *data, struct sk_buff *skb)
 +{
 +      int err;
 +      struct sock *sk = data;
 +      struct l2cap_pinfo *pi = l2cap_pi(sk);
 +
 +      if (pi->rx_busy_skb)
 +              return -ENOMEM;
 +
 +      err = sock_queue_rcv_skb(sk, skb);
 +
 +      /* For ERTM, handle one skb that doesn't fit into the recv
 +       * buffer.  This is important to do because the data frames
 +       * have already been acked, so the skb cannot be discarded.
 +       *
 +       * Notify the l2cap core that the buffer is full, so the
 +       * LOCAL_BUSY state is entered and no more frames are
 +       * acked and reassembled until there is buffer space
 +       * available.
 +       */
 +      if (err < 0 && pi->chan->mode == L2CAP_MODE_ERTM) {
 +              pi->rx_busy_skb = skb;
 +              l2cap_chan_busy(pi->chan, 1);
 +              err = 0;
 +      }
 +
 +      return err;
 +}
 +
 +static void l2cap_sock_close_cb(void *data)
 +{
 +      struct sock *sk = data;
 +
 +      l2cap_sock_kill(sk);
 +}
 +
 +static void l2cap_sock_state_change_cb(void *data, int state)
 +{
 +      struct sock *sk = data;
 +
 +      sk->sk_state = state;
 +}
 +
 +static struct l2cap_ops l2cap_chan_ops = {
 +      .name           = "L2CAP Socket Interface",
 +      .new_connection = l2cap_sock_new_connection_cb,
 +      .recv           = l2cap_sock_recv_cb,
 +      .close          = l2cap_sock_close_cb,
 +      .state_change   = l2cap_sock_state_change_cb,
 +};
 +
  static void l2cap_sock_destruct(struct sock *sk)
  {
        BT_DBG("sk %p", sk);
  
 +      if (l2cap_pi(sk)->rx_busy_skb) {
 +              kfree_skb(l2cap_pi(sk)->rx_busy_skb);
 +              l2cap_pi(sk)->rx_busy_skb = NULL;
 +      }
 +
        skb_queue_purge(&sk->sk_receive_queue);
        skb_queue_purge(&sk->sk_write_queue);
  }
  
 -void l2cap_sock_init(struct sock *sk, struct sock *parent)
 +static void l2cap_sock_init(struct sock *sk, struct sock *parent)
  {
        struct l2cap_pinfo *pi = l2cap_pi(sk);
        struct l2cap_chan *chan = pi->chan;
                sk->sk_type = parent->sk_type;
                bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
  
 +              chan->chan_type = pchan->chan_type;
                chan->imtu = pchan->imtu;
                chan->omtu = pchan->omtu;
                chan->conf_state = pchan->conf_state;
                chan->role_switch = pchan->role_switch;
                chan->force_reliable = pchan->force_reliable;
                chan->flushable = pchan->flushable;
 +              chan->force_active = pchan->force_active;
        } else {
 +
 +              switch (sk->sk_type) {
 +              case SOCK_RAW:
 +                      chan->chan_type = L2CAP_CHAN_RAW;
 +                      break;
 +              case SOCK_DGRAM:
 +                      chan->chan_type = L2CAP_CHAN_CONN_LESS;
 +                      break;
 +              case SOCK_SEQPACKET:
 +              case SOCK_STREAM:
 +                      chan->chan_type = L2CAP_CHAN_CONN_ORIENTED;
 +                      break;
 +              }
 +
                chan->imtu = L2CAP_DEFAULT_MTU;
                chan->omtu = 0;
                if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
                        chan->mode = L2CAP_MODE_ERTM;
 -                      chan->conf_state |= L2CAP_CONF_STATE2_DEVICE;
 +                      set_bit(CONF_STATE2_DEVICE, &chan->conf_state);
                } else {
                        chan->mode = L2CAP_MODE_BASIC;
                }
                chan->role_switch = 0;
                chan->force_reliable = 0;
                chan->flushable = BT_FLUSHABLE_OFF;
 +              chan->force_active = BT_POWER_FORCE_ACTIVE_ON;
 +
        }
  
        /* Default config options */
        chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
 +
 +      chan->data = sk;
 +      chan->ops = &l2cap_chan_ops;
  }
  
  static struct proto l2cap_proto = {
        .obj_size       = sizeof(struct l2cap_pinfo)
  };
  
 -struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
 +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
  {
        struct sock *sk;
 +      struct l2cap_chan *chan;
  
        sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
        if (!sk)
        INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
  
        sk->sk_destruct = l2cap_sock_destruct;
 -      sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
 +      sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
  
        sock_reset_flag(sk, SOCK_ZAPPED);
  
        sk->sk_protocol = proto;
        sk->sk_state = BT_OPEN;
  
 -      setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
 +      chan = l2cap_chan_create(sk);
 +      if (!chan) {
 +              l2cap_sock_kill(sk);
 +              return NULL;
 +      }
 +
 +      l2cap_pi(sk)->chan = chan;
  
        return sk;
  }
@@@ -1015,6 -1033,7 +1016,6 @@@ static int l2cap_sock_create(struct ne
                             int kern)
  {
        struct sock *sk;
 -      struct l2cap_chan *chan;
  
        BT_DBG("sock %p", sock);
  
        if (!sk)
                return -ENOMEM;
  
 -      chan = l2cap_chan_create(sk);
 -      if (!chan) {
 -              l2cap_sock_kill(sk);
 -              return -ENOMEM;
 -      }
 -
 -      l2cap_pi(sk)->chan = chan;
 -
        l2cap_sock_init(sk, NULL);
        return 0;
  }
index b02f0d47ab8057eabc3b6e10cd2ca2693b429ff9,9dfe702ce11e77120d08c5b807190be1aeac9472..927f418a0307cfdece3bfbce6265e80695a8aeea
@@@ -485,6 -485,11 +485,6 @@@ static int rfcomm_sock_accept(struct so
  
        lock_sock(sk);
  
 -      if (sk->sk_state != BT_LISTEN) {
 -              err = -EBADFD;
 -              goto done;
 -      }
 -
        if (sk->sk_type != SOCK_STREAM) {
                err = -EINVAL;
                goto done;
  
        /* Wait for an incoming connection. (wake-one). */
        add_wait_queue_exclusive(sk_sleep(sk), &wait);
 -      while (!(nsk = bt_accept_dequeue(sk, newsock))) {
 +      while (1) {
                set_current_state(TASK_INTERRUPTIBLE);
 -              if (!timeo) {
 -                      err = -EAGAIN;
 +
 +              if (sk->sk_state != BT_LISTEN) {
 +                      err = -EBADFD;
                        break;
                }
  
 -              release_sock(sk);
 -              timeo = schedule_timeout(timeo);
 -              lock_sock(sk);
 +              nsk = bt_accept_dequeue(sk, newsock);
 +              if (nsk)
 +                      break;
  
 -              if (sk->sk_state != BT_LISTEN) {
 -                      err = -EBADFD;
 +              if (!timeo) {
 +                      err = -EAGAIN;
                        break;
                }
  
                        err = sock_intr_errno(timeo);
                        break;
                }
 +
 +              release_sock(sk);
 +              timeo = schedule_timeout(timeo);
 +              lock_sock(sk);
        }
 -      set_current_state(TASK_RUNNING);
 +      __set_current_state(TASK_RUNNING);
        remove_wait_queue(sk_sleep(sk), &wait);
  
        if (err)
@@@ -544,6 -544,7 +544,7 @@@ static int rfcomm_sock_getname(struct s
  
        BT_DBG("sock %p, sk %p", sock, sk);
  
+       memset(sa, 0, sizeof(*sa));
        sa->rc_family  = AF_BLUETOOTH;
        sa->rc_channel = rfcomm_pi(sk)->channel;
        if (peer)
@@@ -679,8 -680,7 +680,8 @@@ static int rfcomm_sock_setsockopt(struc
  {
        struct sock *sk = sock->sk;
        struct bt_security sec;
 -      int len, err = 0;
 +      int err = 0;
 +      size_t len;
        u32 opt;
  
        BT_DBG("sk %p", sk);
  static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
  {
        struct sock *sk = sock->sk;
 -      struct sock *l2cap_sk;
        struct rfcomm_conninfo cinfo;
        struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn;
        int len, err = 0;
                        break;
                }
  
 -              l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk;
  
                memset(&cinfo, 0, sizeof(cinfo));
                cinfo.hci_handle = conn->hcon->handle;
diff --combined net/ipv4/tcp.c
index b0e5330f1c61bcccdfbc378835d2aabd98b17355,dd3af6c6ee0abf7cf9a91331ec6be6bbb32bafdb..08d22449f563c19a0b93ace1af27acbaf032294f
  #include <linux/crypto.h>
  #include <linux/time.h>
  #include <linux/slab.h>
 +#include <linux/uid_stat.h>
  
  #include <net/icmp.h>
  #include <net/tcp.h>
  #include <net/xfrm.h>
  #include <net/ip.h>
 +#include <net/ip6_route.h>
 +#include <net/ipv6.h>
 +#include <net/transp_v6.h>
  #include <net/netdma.h>
  #include <net/sock.h>
  
@@@ -743,7 -739,9 +743,9 @@@ static unsigned int tcp_xmit_size_goal(
                           old_size_goal + mss_now > xmit_size_goal)) {
                        xmit_size_goal = old_size_goal;
                } else {
-                       tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+                       tp->xmit_size_goal_segs =
+                               min_t(u16, xmit_size_goal / mss_now,
+                                     sk->sk_gso_max_segs);
                        xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
                }
        }
@@@ -854,8 -852,7 +856,7 @@@ new_segment
  wait_for_sndbuf:
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  wait_for_memory:
-               if (copied)
-                       tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+               tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
  
                if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
                        goto do_error;
@@@ -1116,9 -1113,6 +1117,9 @@@ out
        if (copied)
                tcp_push(sk, flags, mss_now, tp->nonagle);
        release_sock(sk);
 +
 +      if (copied > 0)
 +              uid_stat_tcp_snd(current_uid(), copied);
        return copied;
  
  do_fault:
@@@ -1395,11 -1389,8 +1396,11 @@@ int tcp_read_sock(struct sock *sk, read
        tcp_rcv_space_adjust(sk);
  
        /* Clean up data we have read: This will do ACK frames. */
 -      if (copied > 0)
 +      if (copied > 0) {
                tcp_cleanup_rbuf(sk, copied);
 +              uid_stat_tcp_rcv(current_uid(), copied);
 +      }
 +
        return copied;
  }
  EXPORT_SYMBOL(tcp_read_sock);
@@@ -1601,8 -1592,14 +1602,14 @@@ int tcp_recvmsg(struct kiocb *iocb, str
                }
  
  #ifdef CONFIG_NET_DMA
-               if (tp->ucopy.dma_chan)
-                       dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+               if (tp->ucopy.dma_chan) {
+                       if (tp->rcv_wnd == 0 &&
+                           !skb_queue_empty(&sk->sk_async_wait_queue)) {
+                               tcp_service_net_dma(sk, true);
+                               tcp_cleanup_rbuf(sk, copied);
+                       } else
+                               dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+               }
  #endif
                if (copied >= target) {
                        /* Do not sleep, just process backlog. */
@@@ -1781,9 -1778,6 +1788,9 @@@ skip_copy
        tcp_cleanup_rbuf(sk, copied);
  
        release_sock(sk);
 +
 +      if (copied > 0)
 +              uid_stat_tcp_rcv(current_uid(), copied);
        return copied;
  
  out:
  
  recv_urg:
        err = tcp_recv_urg(sk, msg, len, flags);
 +      if (err > 0)
 +              uid_stat_tcp_rcv(current_uid(), err);
        goto out;
  }
  EXPORT_SYMBOL(tcp_recvmsg);
@@@ -2410,7 -2402,10 +2417,10 @@@ static int do_tcp_setsockopt(struct soc
                /* Cap the max timeout in ms TCP will retry/retrans
                 * before giving up and aborting (ETIMEDOUT) a connection.
                 */
-               icsk->icsk_user_timeout = msecs_to_jiffies(val);
+               if (val < 0)
+                       err = -EINVAL;
+               else
+                       icsk->icsk_user_timeout = msecs_to_jiffies(val);
                break;
        default:
                err = -ENOPROTOOPT;
@@@ -3236,7 -3231,7 +3246,7 @@@ void __init tcp_init(void
  {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-       int i, max_share, cnt;
+       int i, max_rshare, max_wshare, cnt;
        unsigned long jiffy = jiffies;
  
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
  
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
        limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
-       max_share = min(4UL*1024*1024, limit);
+       max_wshare = min(4UL*1024*1024, limit);
+       max_rshare = min(6UL*1024*1024, limit);
  
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_wmem[1] = 16*1024;
-       sysctl_tcp_wmem[2] = max(64*1024, max_share);
+       sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
  
        sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_rmem[1] = 87380;
-       sysctl_tcp_rmem[2] = max(87380, max_share);
+       sysctl_tcp_rmem[2] = max(87380, max_rshare);
  
        printk(KERN_INFO "TCP: Hash tables configured "
               "(established %u bind %u)\n",
        tcp_secret_retiring = &tcp_secret_two;
        tcp_secret_secondary = &tcp_secret_two;
  }
 +
 +static int tcp_is_local(struct net *net, __be32 addr) {
 +      struct rtable *rt;
 +      struct flowi4 fl4 = { .daddr = addr };
 +      rt = ip_route_output_key(net, &fl4);
 +      if (IS_ERR_OR_NULL(rt))
 +              return 0;
 +      return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK);
 +}
 +
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 +static int tcp_is_local6(struct net *net, struct in6_addr *addr) {
 +      struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0);
 +      return rt6 && rt6->rt6i_dev && (rt6->rt6i_dev->flags & IFF_LOOPBACK);
 +}
 +#endif
 +
 +/*
 + * tcp_nuke_addr - destroy all sockets on the given local address
 + * if local address is the unspecified address (0.0.0.0 or ::), destroy all
 + * sockets with local addresses that are not configured.
 + */
 +int tcp_nuke_addr(struct net *net, struct sockaddr *addr)
 +{
 +      int family = addr->sa_family;
 +      unsigned int bucket;
 +
 +      struct in_addr *in;
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 +      struct in6_addr *in6;
 +#endif
 +      if (family == AF_INET) {
 +              in = &((struct sockaddr_in *)addr)->sin_addr;
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 +      } else if (family == AF_INET6) {
 +              in6 = &((struct sockaddr_in6 *)addr)->sin6_addr;
 +#endif
 +      } else {
 +              return -EAFNOSUPPORT;
 +      }
 +
 +      for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
 +              struct hlist_nulls_node *node;
 +              struct sock *sk;
 +              spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
 +
 +restart:
 +              spin_lock_bh(lock);
 +              sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
 +                      struct inet_sock *inet = inet_sk(sk);
 +
 +                      if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
 +                              continue;
 +                      if (sock_flag(sk, SOCK_DEAD))
 +                              continue;
 +
 +                      if (family == AF_INET) {
 +                              __be32 s4 = inet->inet_rcv_saddr;
 +                              if (s4 == LOOPBACK4_IPV6)
 +                                      continue;
 +
 +                              if (in->s_addr != s4 &&
 +                                  !(in->s_addr == INADDR_ANY &&
 +                                    !tcp_is_local(net, s4)))
 +                                      continue;
 +                      }
 +
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 +                      if (family == AF_INET6) {
 +                              struct in6_addr *s6;
 +                              if (!inet->pinet6)
 +                                      continue;
 +
 +                              s6 = &inet->pinet6->rcv_saddr;
 +                              if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED)
 +                                      continue;
 +
 +                              if (!ipv6_addr_equal(in6, s6) &&
 +                                  !(ipv6_addr_equal(in6, &in6addr_any) &&
 +                                    !tcp_is_local6(net, s6)))
 +                              continue;
 +                      }
 +#endif
 +
 +                      sock_hold(sk);
 +                      spin_unlock_bh(lock);
 +
 +                      local_bh_disable();
 +                      bh_lock_sock(sk);
 +                      sk->sk_err = ETIMEDOUT;
 +                      sk->sk_error_report(sk);
 +
 +                      tcp_done(sk);
 +                      bh_unlock_sock(sk);
 +                      local_bh_enable();
 +                      sock_put(sk);
 +
 +                      goto restart;
 +              }
 +              spin_unlock_bh(lock);
 +      }
 +
 +      return 0;
 +}
diff --combined net/ipv6/addrconf.c
index 8a4bf719c253a52590f08c8e358c5db4dbd76348,70d6a7fff10838bf7c47395b3f0ebfa4f0e4cbca..036bcee48930fa9cb95e459bc8178a0c13ac20b9
@@@ -492,8 -492,7 +492,7 @@@ static void addrconf_forward_change(str
        struct net_device *dev;
        struct inet6_dev *idev;
  
-       rcu_read_lock();
-       for_each_netdev_rcu(net, dev) {
+       for_each_netdev(net, dev) {
                idev = __in6_dev_get(dev);
                if (idev) {
                        int changed = (!idev->cnf.forwarding) ^ (!newf);
                                dev_forward_change(idev);
                }
        }
-       rcu_read_unlock();
  }
  
  static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
@@@ -828,13 -826,12 +826,13 @@@ static int ipv6_create_tempaddr(struct 
  {
        struct inet6_dev *idev = ifp->idev;
        struct in6_addr addr, *tmpaddr;
 -      unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
 +      unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
        unsigned long regen_advance;
        int tmp_plen;
        int ret = 0;
        int max_addresses;
        u32 addr_flags;
 +      unsigned long now = jiffies;
  
        write_lock(&idev->lock);
        if (ift) {
@@@ -879,7 -876,7 +877,7 @@@ retry
                goto out;
        }
        memcpy(&addr.s6_addr[8], idev->rndid, 8);
 -      age = (jiffies - ifp->tstamp) / HZ;
 +      age = (now - ifp->tstamp) / HZ;
        tmp_valid_lft = min_t(__u32,
                              ifp->valid_lft,
                              idev->cnf.temp_valid_lft + age);
                                 idev->cnf.max_desync_factor);
        tmp_plen = ifp->prefix_len;
        max_addresses = idev->cnf.max_addresses;
 -      tmp_cstamp = ifp->cstamp;
        tmp_tstamp = ifp->tstamp;
        spin_unlock_bh(&ifp->lock);
  
        ift->ifpub = ifp;
        ift->valid_lft = tmp_valid_lft;
        ift->prefered_lft = tmp_prefered_lft;
 -      ift->cstamp = tmp_cstamp;
 +      ift->cstamp = now;
        ift->tstamp = tmp_tstamp;
        spin_unlock_bh(&ift->lock);
  
  #ifdef CONFIG_IPV6_PRIVACY
                        read_lock_bh(&in6_dev->lock);
                        /* update all temporary addresses in the list */
 -                      list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
 -                              /*
 -                               * When adjusting the lifetimes of an existing
 -                               * temporary address, only lower the lifetimes.
 -                               * Implementations must not increase the
 -                               * lifetimes of an existing temporary address
 -                               * when processing a Prefix Information Option.
 -                               */
 +                      list_for_each_entry(ift, &in6_dev->tempaddr_list,
 +                                          tmp_list) {
 +                              int age, max_valid, max_prefered;
 +
                                if (ifp != ift->ifpub)
                                        continue;
  
 +                              /*
 +                               * RFC 4941 section 3.3:
 +                               * If a received option will extend the lifetime
 +                               * of a public address, the lifetimes of
 +                               * temporary addresses should be extended,
 +                               * subject to the overall constraint that no
 +                               * temporary addresses should ever remain
 +                               * "valid" or "preferred" for a time longer than
 +                               * (TEMP_VALID_LIFETIME) or
 +                               * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR),
 +                               * respectively.
 +                               */
 +                              age = (now - ift->cstamp) / HZ;
 +                              max_valid = in6_dev->cnf.temp_valid_lft - age;
 +                              if (max_valid < 0)
 +                                      max_valid = 0;
 +
 +                              max_prefered = in6_dev->cnf.temp_prefered_lft -
 +                                             in6_dev->cnf.max_desync_factor -
 +                                             age;
 +                              if (max_prefered < 0)
 +                                      max_prefered = 0;
 +
 +                              if (valid_lft > max_valid)
 +                                      valid_lft = max_valid;
 +
 +                              if (prefered_lft > max_prefered)
 +                                      prefered_lft = max_prefered;
 +
                                spin_lock(&ift->lock);
                                flags = ift->flags;
 -                              if (ift->valid_lft > valid_lft &&
 -                                  ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
 -                                      ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
 -                              if (ift->prefered_lft > prefered_lft &&
 -                                  ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
 -                                      ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
 +                              ift->valid_lft = valid_lft;
 +                              ift->prefered_lft = prefered_lft;
 +                              ift->tstamp = now;
 +                              if (prefered_lft > 0)
 +                                      ift->flags &= ~IFA_F_DEPRECATED;
 +
                                spin_unlock(&ift->lock);
                                if (!(flags&IFA_F_TENTATIVE))
                                        ipv6_ifa_notify(0, ift);
  
                        if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
                                /*
 -                               * When a new public address is created as described in [ADDRCONF],
 -                               * also create a new temporary address. Also create a temporary
 -                               * address if it's enabled but no temporary address currently exists.
 +                               * When a new public address is created as
 +                               * described in [ADDRCONF], also create a new
 +                               * temporary address. Also create a temporary
 +                               * address if it's enabled but no temporary
 +                               * address currently exists.
                                 */
                                read_unlock_bh(&in6_dev->lock);
                                ipv6_create_tempaddr(ifp, NULL);
diff --combined net/ipv6/route.c
index 7ef5d08201859227451a857a1d480a69fcc34d27,917256826f84e9f4f31d01451c80bfc918517a12..6a96cad4b0b072170e6a993f65749ab49c469217
@@@ -233,9 -233,7 +233,9 @@@ static inline struct rt6_info *ip6_dst_
  {
        struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
  
 -      memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 +      if (rt != NULL)
 +              memset(&rt->rt6i_table, 0,
 +                      sizeof(*rt) - sizeof(struct dst_entry));
  
        return rt;
  }
@@@ -1401,17 -1399,18 +1401,18 @@@ static int __ip6_del_rt(struct rt6_inf
        struct fib6_table *table;
        struct net *net = dev_net(rt->rt6i_dev);
  
-       if (rt == net->ipv6.ip6_null_entry)
-               return -ENOENT;
+       if (rt == net->ipv6.ip6_null_entry) {
+               err = -ENOENT;
+               goto out;
+       }
  
        table = rt->rt6i_table;
        write_lock_bh(&table->tb6_lock);
        err = fib6_del(rt, info);
-       dst_release(&rt->dst);
        write_unlock_bh(&table->tb6_lock);
  
+ out:
+       dst_release(&rt->dst);
        return err;
  }
  
@@@ -2848,10 -2847,6 +2849,6 @@@ static int __net_init ip6_route_net_ini
        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
  
- #ifdef CONFIG_PROC_FS
-       proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
-       proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
- #endif
        net->ipv6.ip6_rt_gc_expire = 30*HZ;
  
        ret = 0;
@@@ -2872,10 -2867,6 +2869,6 @@@ out_ip6_dst_ops
  
  static void __net_exit ip6_route_net_exit(struct net *net)
  {
- #ifdef CONFIG_PROC_FS
-       proc_net_remove(net, "ipv6_route");
-       proc_net_remove(net, "rt6_stats");
- #endif
        kfree(net->ipv6.ip6_null_entry);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        kfree(net->ipv6.ip6_prohibit_entry);
        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
  }
  
+ static int __net_init ip6_route_net_init_late(struct net *net)
+ {
+ #ifdef CONFIG_PROC_FS
+       proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
+       proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+ #endif
+       return 0;
+ }
+ static void __net_exit ip6_route_net_exit_late(struct net *net)
+ {
+ #ifdef CONFIG_PROC_FS
+       proc_net_remove(net, "ipv6_route");
+       proc_net_remove(net, "rt6_stats");
+ #endif
+ }
  static struct pernet_operations ip6_route_net_ops = {
        .init = ip6_route_net_init,
        .exit = ip6_route_net_exit,
  };
  
+ static struct pernet_operations ip6_route_net_late_ops = {
+       .init = ip6_route_net_init_late,
+       .exit = ip6_route_net_exit_late,
+ };
  static struct notifier_block ip6_route_dev_notifier = {
        .notifier_call = ip6_route_dev_notify,
        .priority = 0,
@@@ -2938,19 -2951,25 +2953,25 @@@ int __init ip6_route_init(void
        if (ret)
                goto xfrm6_init;
  
+       ret = register_pernet_subsys(&ip6_route_net_late_ops);
+       if (ret)
+               goto fib6_rules_init;
        ret = -ENOBUFS;
        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
-               goto fib6_rules_init;
+               goto out_register_late_subsys;
  
        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
        if (ret)
-               goto fib6_rules_init;
+               goto out_register_late_subsys;
  
  out:
        return ret;
  
+ out_register_late_subsys:
+       unregister_pernet_subsys(&ip6_route_net_late_ops);
  fib6_rules_init:
        fib6_rules_cleanup();
  xfrm6_init:
@@@ -2969,6 -2988,7 +2990,7 @@@ out_kmem_cache
  void ip6_route_cleanup(void)
  {
        unregister_netdevice_notifier(&ip6_route_dev_notifier);
+       unregister_pernet_subsys(&ip6_route_net_late_ops);
        fib6_rules_cleanup();
        xfrm6_fini();
        fib6_gc_cleanup();
diff --combined net/wireless/core.c
index 8ba1553195289509420642be523e17ce11ab30f2,498c760a1d2aea75900f56392948980aaeb9a1d9..b6d765234ab0909a91239d1dde9fd20a6abefbb3
@@@ -488,14 -488,6 +488,14 @@@ int wiphy_register(struct wiphy *wiphy
        int i;
        u16 ifmodes = wiphy->interface_modes;
  
 +      if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
 +                  !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY)))
 +              return -EINVAL;
 +
 +      if (WARN_ON(wiphy->ap_sme_capa &&
 +                  !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
 +              return -EINVAL;
 +
        if (WARN_ON(wiphy->addresses && !wiphy->n_addresses))
                return -EINVAL;
  
@@@ -967,6 -959,11 +967,11 @@@ static int cfg80211_netdev_notifier_cal
                 */
                synchronize_rcu();
                INIT_LIST_HEAD(&wdev->list);
+               /*
+                * Ensure that all events have been processed and
+                * freed.
+                */
+               cfg80211_process_wdev_events(wdev);
                break;
        case NETDEV_PRE_UP:
                if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
diff --combined net/wireless/core.h
index a020d38ea03922e29c82f804588073269c9d9c0e,83516455a1d156541707df127d014d2ceb2aa0e8..4430e6772fd65619b4092c1ee236ead40e8a4878
@@@ -54,8 -54,6 +54,8 @@@ struct cfg80211_registered_device 
        int opencount; /* also protected by devlist_mtx */
        wait_queue_head_t dev_wait;
  
 +      u32 ap_beacons_nlpid;
 +
        /* BSSes/scanning */
        spinlock_t bss_lock;
        struct list_head bss_list;
@@@ -428,6 -426,7 +428,7 @@@ int cfg80211_change_iface(struct cfg802
                          struct net_device *dev, enum nl80211_iftype ntype,
                          u32 *flags, struct vif_params *params);
  void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+ void cfg80211_process_wdev_events(struct wireless_dev *wdev);
  
  int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
                                  struct wireless_dev *wdev,
diff --combined net/wireless/reg.c
index 72f7feee254ae14a41ef713576319b9b94c353a1,d85a14970db9cc8ca987690f4b3c49fb4ca97c54..bcd96244e56c6c7f9245e3b2f2e982bcf6823fb7
@@@ -331,6 -331,9 +331,9 @@@ static void reg_regdb_search(struct wor
        struct reg_regdb_search_request *request;
        const struct ieee80211_regdomain *curdom, *regdom;
        int i, r;
+       bool set_reg = false;
+       mutex_lock(&cfg80211_mutex);
  
        mutex_lock(&reg_regdb_search_mutex);
        while (!list_empty(&reg_regdb_search_list)) {
                                r = reg_copy_regd(&regdom, curdom);
                                if (r)
                                        break;
-                               mutex_lock(&cfg80211_mutex);
-                               set_regdom(regdom);
-                               mutex_unlock(&cfg80211_mutex);
+                               set_reg = true;
                                break;
                        }
                }
                kfree(request);
        }
        mutex_unlock(&reg_regdb_search_mutex);
+       if (set_reg)
+               set_regdom(regdom);
+       mutex_unlock(&cfg80211_mutex);
  }
  
  static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
@@@ -379,7 -385,15 +385,15 @@@ static void reg_regdb_query(const char 
  
        schedule_work(&reg_regdb_work);
  }
+ /* Feel free to add any other sanity checks here */
+ static void reg_regdb_size_check(void)
+ {
+       /* We should ideally BUILD_BUG_ON() but then random builds would fail */
+       WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
+ }
  #else
+ static inline void reg_regdb_size_check(void) {}
  static inline void reg_regdb_query(const char *alpha2) {}
  #endif /* CONFIG_CFG80211_INTERNAL_REGDB */
  
@@@ -1350,7 -1364,7 +1364,7 @@@ static void reg_set_request_processed(v
        spin_unlock(&reg_requests_lock);
  
        if (last_request->initiator == NL80211_REGDOM_SET_BY_USER)
-               cancel_delayed_work_sync(&reg_timeout);
+               cancel_delayed_work(&reg_timeout);
  
        if (need_more_processing)
                schedule_work(&reg_work);
@@@ -1773,7 -1787,6 +1787,7 @@@ static void restore_alpha2(char *alpha2
  static void restore_regulatory_settings(bool reset_user)
  {
        char alpha2[2];
 +      char world_alpha2[2];
        struct reg_beacon *reg_beacon, *btmp;
        struct regulatory_request *reg_request, *tmp;
        LIST_HEAD(tmp_reg_req_list);
  
        /* First restore to the basic regulatory settings */
        cfg80211_regdomain = cfg80211_world_regdom;
 +      world_alpha2[0] = cfg80211_regdomain->alpha2[0];
 +      world_alpha2[1] = cfg80211_regdomain->alpha2[1];
  
        mutex_unlock(&reg_mutex);
        mutex_unlock(&cfg80211_mutex);
  
 -      regulatory_hint_core(cfg80211_regdomain->alpha2);
 +      regulatory_hint_core(world_alpha2);
  
        /*
         * This restores the ieee80211_regdom module parameter
@@@ -2228,6 -2239,8 +2242,8 @@@ int __init regulatory_init(void
        spin_lock_init(&reg_requests_lock);
        spin_lock_init(&reg_pending_beacons_lock);
  
+       reg_regdb_size_check();
        cfg80211_regdomain = cfg80211_world_regdom;
  
        user_alpha2[0] = '9';
diff --combined net/xfrm/xfrm_policy.c
index 1b906c38ca366370d5f523f5b89069a6d31b2acd,7c8e0cb1e15328764919dcd24411cc5835640709..6b9e4e10c84914472dee5e6e5ea840667e5800a9
@@@ -1349,16 -1349,14 +1349,16 @@@ static inline struct xfrm_dst *xfrm_all
                BUG();
        }
        xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
 -      memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry));
 -      xfrm_policy_put_afinfo(afinfo);
  
 -      if (likely(xdst))
 +      if (likely(xdst)) {
 +              memset(&xdst->u.rt6.rt6i_table, 0,
 +                      sizeof(*xdst) - sizeof(struct dst_entry));
                xdst->flo.ops = &xfrm_bundle_fc_ops;
 -      else
 +      else
                xdst = ERR_PTR(-ENOBUFS);
  
 +      xfrm_policy_put_afinfo(afinfo);
 +
        return xdst;
  }
  
@@@ -1761,7 -1759,7 +1761,7 @@@ static struct dst_entry *make_blackhole
  
        if (!afinfo) {
                dst_release(dst_orig);
-               ret = ERR_PTR(-EINVAL);
+               return ERR_PTR(-EINVAL);
        } else {
                ret = afinfo->blackhole_route(net, dst_orig);
        }
@@@ -1919,6 -1917,9 +1919,9 @@@ no_transform
        }
  ok:
        xfrm_pols_put(pols, drop_pols);
+       if (dst && dst->xfrm &&
+           dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
+               dst->flags |= DST_XFRM_TUNNEL;
        return dst;
  
  nopol:
diff --combined scripts/Kbuild.include
index ce8844f619cf15e39ddac8ed3304821848c68e29,2046b5cf5bf2a462e94c4678a201ea1729c7260f..2c18eddc0eb1cabc27acea81f7a8a4bd1989c463
@@@ -100,7 -100,7 +100,7 @@@ as-option = $(call try-run,
  # Usage: cflags-y += $(call as-instr,instr,option1,option2)
  
  as-instr = $(call try-run,\
 -      /bin/echo -e "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3))
 +      printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3))
  
  # cc-option
  # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
@@@ -205,7 -205,7 +205,7 @@@ endi
  # >$< substitution to preserve $ when reloading .cmd file
  # note: when using inline perl scripts [perl -e '...$$t=1;...']
  # in $(cmd_xxx) double $$ your perl vars
- make-cmd = $(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1)))))
+ make-cmd = $(subst \\,\\\\,$(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1))))))
  
  # Find any prerequisites that is newer than target or that does not exist.
  # PHONY targets skipped in both cases.