Merge commit 'linus/master' into merge-linus

author Arjan van de Ven <arjan@linux.intel.com>

Fri, 17 Oct 2008 16:20:26 +0000 (09:20 -0700)

committer Arjan van de Ven <arjan@linux.intel.com>

Fri, 17 Oct 2008 16:20:26 +0000 (09:20 -0700)
author Arjan van de Ven <arjan@linux.intel.com>
Fri, 17 Oct 2008 16:20:26 +0000 (09:20 -0700)
committer Arjan van de Ven <arjan@linux.intel.com>
Fri, 17 Oct 2008 16:20:26 +0000 (09:20 -0700)
diff --combined arch/ia64/kvm/kvm-ia64.c

index cf8eae1855e627bee0bf5d3a2ee123c318fdf26e,c0699f0e35a926936113e42d925b4fd08318c1c3..a312c9e9b9efa2ecd194aa7f88a98a233749dbf6
--- 1/arch/ia64/kvm/kvm-ia64.c
--- 2/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@@ -31,6 -31,7 +31,7 @@@
   #include <linux/bitops.h>
   #include <linux/hrtimer.h>
   #include <linux/uaccess.h>
+ #include <linux/intel-iommu.h>
   
   #include <asm/pgtable.h>
   #include <asm/gcc_intrin.h>
@@@ -38,12 -39,14 +39,14 @@@
   #include <asm/cacheflush.h>
   #include <asm/div64.h>
   #include <asm/tlb.h>
+ #include <asm/elf.h>
   
   #include "misc.h"
   #include "vti.h"
   #include "iodev.h"
   #include "ioapic.h"
   #include "lapic.h"
+ #include "irq.h"
   
   static unsigned long kvm_vmm_base;
   static unsigned long kvm_vsa_base;
@@@ -61,12 -64,6 +64,6 @@@ struct kvm_stats_debugfs_item debugfs_e
         { NULL }
   };
   
- 
- struct fdesc{
-     unsigned long ip;
-     unsigned long gp;
- };
- 
   static void kvm_flush_icache(unsigned long start, unsigned long len)
   {
         int l;
@@@ -184,12 -181,16 +181,16 @@@ int kvm_dev_ioctl_check_extension(long 
         switch (ext) {
         case KVM_CAP_IRQCHIP:
         case KVM_CAP_USER_MEMORY:
+       case KVM_CAP_MP_STATE:
   
                 r = 1;
                 break;
         case KVM_CAP_COALESCED_MMIO:
                 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                 break;
+       case KVM_CAP_IOMMU:
+               r = intel_iommu_found();
+               break;
         default:
                 r = 0;
         }
@@@ -776,6 -777,7 +777,7 @@@ static void kvm_init_vm(struct kvm *kvm
          */
         kvm_build_io_pmt(kvm);
   
+       INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
   }
   
   struct  kvm *kvm_arch_create_vm(void)
@@@ -1112,7 -1114,7 +1114,7 @@@ static void kvm_migrate_hlt_timer(struc
         struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
   
         if (hrtimer_cancel(p_ht))
- -              hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
+ +              hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
   }
   
   static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
@@@ -1339,6 -1341,10 +1341,10 @@@ static void kvm_release_vm_pages(struc
   
   void kvm_arch_destroy_vm(struct kvm *kvm)
   {
+       kvm_iommu_unmap_guest(kvm);
+ #ifdef  KVM_CAP_DEVICE_ASSIGNMENT
+       kvm_free_all_assigned_devices(kvm);
+ #endif
         kfree(kvm->arch.vioapic);
         kvm_release_vm_pages(kvm);
         kvm_free_physmem(kvm);
@@@ -1440,17 -1446,24 +1446,24 @@@ int kvm_arch_set_memory_region(struct k
                 int user_alloc)
   {
         unsigned long i;
-       struct page *page;
+       unsigned long pfn;
         int npages = mem->memory_size >> PAGE_SHIFT;
         struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
         unsigned long base_gfn = memslot->base_gfn;
   
         for (i = 0; i < npages; i++) {
-               page = gfn_to_page(kvm, base_gfn + i);
-               kvm_set_pmt_entry(kvm, base_gfn + i,
-                               page_to_pfn(page) << PAGE_SHIFT,
-                               _PAGE_AR_RWX|_PAGE_MA_WB);
-               memslot->rmap[i] = (unsigned long)page;
+               pfn = gfn_to_pfn(kvm, base_gfn + i);
+               if (!kvm_is_mmio_pfn(pfn)) {
+                       kvm_set_pmt_entry(kvm, base_gfn + i,
+                                       pfn << PAGE_SHIFT,
+                               _PAGE_AR_RWX | _PAGE_MA_WB);
+                       memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
+               } else {
+                       kvm_set_pmt_entry(kvm, base_gfn + i,
+                                       GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
+                                       _PAGE_MA_UC);
+                       memslot->rmap[i] = 0;
+                       }
         }
   
         return 0;
@@@ -1794,11 -1807,43 +1807,43 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
   int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state)
   {
-       return -EINVAL;
+       vcpu_load(vcpu);
+       mp_state->mp_state = vcpu->arch.mp_state;
+       vcpu_put(vcpu);
+       return 0;
+ }
+ 
+ static int vcpu_reset(struct kvm_vcpu *vcpu)
+ {
+       int r;
+       long psr;
+       local_irq_save(psr);
+       r = kvm_insert_vmm_mapping(vcpu);
+       if (r)
+               goto fail;
+ 
+       vcpu->arch.launched = 0;
+       kvm_arch_vcpu_uninit(vcpu);
+       r = kvm_arch_vcpu_init(vcpu);
+       if (r)
+               goto fail;
+ 
+       kvm_purge_vmm_mapping(vcpu);
+       r = 0;
+ fail:
+       local_irq_restore(psr);
+       return r;
   }
   
   int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state)
   {
-       return -EINVAL;
+       int r = 0;
+ 
+       vcpu_load(vcpu);
+       vcpu->arch.mp_state = mp_state->mp_state;
+       if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
+               r = vcpu_reset(vcpu);
+       vcpu_put(vcpu);
+       return r;
   }
diff --combined arch/x86/kvm/i8254.c

index 1bf8f57a30411d69afc00438456555f9310bd635,634132a9a512391d324def8826390a709d257c80..11c6725fb798b6967d60f07ea920fc594d292994
--- 1/arch/x86/kvm/i8254.c
--- 2/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@@ -200,13 -200,14 +200,14 @@@ static int __pit_timer_fn(struct kvm_kp
   
         if (!atomic_inc_and_test(&pt->pending))
                 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-       if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-               vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ 
+       if (vcpu0 && waitqueue_active(&vcpu0->wq))
                 wake_up_interruptible(&vcpu0->wq);
-       }
   
- -      pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
- -      pt->scheduled = ktime_to_ns(pt->timer.expires);
+ +      hrtimer_add_expires_ns(&pt->timer, pt->period);
-       pt->scheduled = ktime_to_ns(hrtimer_get_expires(&pt->timer));
++      pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
+       if (pt->period)
- -              ps->channels[0].count_load_time = pt->timer.expires;
++              ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
   
         return (pt->period == 0 ? 0 : 1);
   }
@@@ -215,12 -216,22 +216,22 @@@ int pit_has_pending_timer(struct kvm_vc
   {
         struct kvm_pit *pit = vcpu->kvm->arch.vpit;
   
-       if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending)
+       if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
                 return atomic_read(&pit->pit_state.pit_timer.pending);
- 
         return 0;
   }
   
+ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
+ {
+       struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
+                                                irq_ack_notifier);
+       spin_lock(&ps->inject_lock);
+       if (atomic_dec_return(&ps->pit_timer.pending) < 0)
+               atomic_inc(&ps->pit_timer.pending);
+       ps->irq_ack = 1;
+       spin_unlock(&ps->inject_lock);
+ }
+ 
   static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
   {
         struct kvm_kpit_state *ps;
@@@ -246,7 -257,7 +257,7 @@@ void __kvm_migrate_pit_timer(struct kvm
   
         timer = &pit->pit_state.pit_timer.timer;
         if (hrtimer_cancel(timer))
- -              hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+ +              hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
   }
   
   static void destroy_pit_timer(struct kvm_kpit_timer *pt)
@@@ -255,8 -266,9 +266,9 @@@
         hrtimer_cancel(&pt->timer);
   }
   
- static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
   {
+       struct kvm_kpit_timer *pt = &ps->pit_timer;
         s64 interval;
   
         interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@@ -268,6 -280,7 +280,7 @@@
         pt->period = (is_period == 0) ? 0 : interval;
         pt->timer.function = pit_timer_fn;
         atomic_set(&pt->pending, 0);
+       ps->irq_ack = 1;
   
         hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
                       HRTIMER_MODE_ABS);
@@@ -302,11 -315,11 +315,11 @@@ static void pit_load_count(struct kvm *
         case 1:
           /* FIXME: enhance mode 4 precision */
         case 4:
-               create_pit_timer(&ps->pit_timer, val, 0);
+               create_pit_timer(ps, val, 0);
                 break;
         case 2:
         case 3:
-               create_pit_timer(&ps->pit_timer, val, 1);
+               create_pit_timer(ps, val, 1);
                 break;
         default:
                 destroy_pit_timer(&ps->pit_timer);
@@@ -520,7 -533,7 +533,7 @@@ void kvm_pit_reset(struct kvm_pit *pit
         mutex_unlock(&pit->pit_state.lock);
   
         atomic_set(&pit->pit_state.pit_timer.pending, 0);
-       pit->pit_state.inject_pending = 1;
+       pit->pit_state.irq_ack = 1;
   }
   
   struct kvm_pit *kvm_create_pit(struct kvm *kvm)
@@@ -534,6 -547,7 +547,7 @@@
   
         mutex_init(&pit->pit_state.lock);
         mutex_lock(&pit->pit_state.lock);
+       spin_lock_init(&pit->pit_state.inject_lock);
   
         /* Initialize PIO device */
         pit->dev.read = pit_ioport_read;
@@@ -555,6 -569,9 +569,9 @@@
         pit_state->pit = pit;
         hrtimer_init(&pit_state->pit_timer.timer,
                      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       pit_state->irq_ack_notifier.gsi = 0;
+       pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
+       kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
         mutex_unlock(&pit->pit_state.lock);
   
         kvm_pit_reset(pit);
@@@ -578,10 -595,8 +595,8 @@@ void kvm_free_pit(struct kvm *kvm
   static void __inject_pit_timer_intr(struct kvm *kvm)
   {
         mutex_lock(&kvm->lock);
-       kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
-       kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
-       kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
-       kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+       kvm_set_irq(kvm, 0, 1);
+       kvm_set_irq(kvm, 0, 0);
         mutex_unlock(&kvm->lock);
   }
   
@@@ -592,37 -607,19 +607,19 @@@ void kvm_inject_pit_timer_irqs(struct k
         struct kvm_kpit_state *ps;
   
         if (vcpu && pit) {
+               int inject = 0;
                 ps = &pit->pit_state;
   
-               /* Try to inject pending interrupts when:
-                * 1. Pending exists
-                * 2. Last interrupt was accepted or waited for too long time*/
-               if (atomic_read(&ps->pit_timer.pending) &&
-                   (ps->inject_pending ||
-                   (jiffies - ps->last_injected_time
-                               >= KVM_MAX_PIT_INTR_INTERVAL))) {
-                       ps->inject_pending = 0;
-                       __inject_pit_timer_intr(kvm);
-                       ps->last_injected_time = jiffies;
-               }
-       }
- }
- 
- void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
- {
-       struct kvm_arch *arch = &vcpu->kvm->arch;
-       struct kvm_kpit_state *ps;
- 
-       if (vcpu && arch->vpit) {
-               ps = &arch->vpit->pit_state;
-               if (atomic_read(&ps->pit_timer.pending) &&
-               (((arch->vpic->pics[0].imr & 1) == 0 &&
-                 arch->vpic->pics[0].irq_base == vec) ||
-                 (arch->vioapic->redirtbl[0].fields.vector == vec &&
-                 arch->vioapic->redirtbl[0].fields.mask != 1))) {
-                       ps->inject_pending = 1;
-                       atomic_dec(&ps->pit_timer.pending);
-                       ps->channels[0].count_load_time = ktime_get();
+               /* Try to inject pending interrupts when
+                * last one has been acked.
+                */
+               spin_lock(&ps->inject_lock);
+               if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
+                       ps->irq_ack = 0;
+                       inject = 1;
                 }
+               spin_unlock(&ps->inject_lock);
+               if (inject)
+                       __inject_pit_timer_intr(kvm);
         }
   }
diff --combined arch/x86/kvm/lapic.c

index a5b61de6adf1c7cb4e8b43cef1fe0dfaca0b4e20,6571926bfd339b498c2ca06835b71d8ead494787..0fc3cab48943da8a3c513c7c753a4792c6743198
--- 1/arch/x86/kvm/lapic.c
--- 2/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@@ -32,6 -32,7 +32,7 @@@
   #include <asm/current.h>
   #include <asm/apicdef.h>
   #include <asm/atomic.h>
+ #include "kvm_cache_regs.h"
   #include "irq.h"
   
   #define PRId64 "d"
@@@ -338,13 -339,7 +339,7 @@@ static int __apic_accept_irq(struct kvm
                 } else
                         apic_clear_vector(vector, apic->regs + APIC_TMR);
   
-               if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-                       kvm_vcpu_kick(vcpu);
-               else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
-                       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-                       if (waitqueue_active(&vcpu->wq))
-                               wake_up_interruptible(&vcpu->wq);
-               }
+               kvm_vcpu_kick(vcpu);
   
                 result = (orig_irr == 0);
                 break;
@@@ -370,21 -365,18 +365,18 @@@
                         vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
                         kvm_vcpu_kick(vcpu);
                 } else {
-                       printk(KERN_DEBUG
-                              "Ignoring de-assert INIT to vcpu %d\n",
-                              vcpu->vcpu_id);
+                       apic_debug("Ignoring de-assert INIT to vcpu %d\n",
+                                  vcpu->vcpu_id);
                 }
- 
                 break;
   
         case APIC_DM_STARTUP:
-               printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
-                      vcpu->vcpu_id, vector);
+               apic_debug("SIPI to vcpu %d vector 0x%02x\n",
+                          vcpu->vcpu_id, vector);
                 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
                         vcpu->arch.sipi_vector = vector;
                         vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
-                       if (waitqueue_active(&vcpu->wq))
-                               wake_up_interruptible(&vcpu->wq);
+                       kvm_vcpu_kick(vcpu);
                 }
                 break;
   
@@@ -438,7 -430,7 +430,7 @@@ struct kvm_vcpu *kvm_get_lowest_prio_vc
   static void apic_set_eoi(struct kvm_lapic *apic)
   {
         int vector = apic_find_highest_isr(apic);
- 
+       int trigger_mode;
         /*
          * Not every write EOI will has corresponding ISR,
          * one example is when Kernel check timer on setup_IO_APIC
@@@ -450,7 -442,10 +442,10 @@@
         apic_update_ppr(apic);
   
         if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
-               kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
+               trigger_mode = IOAPIC_LEVEL_TRIG;
+       else
+               trigger_mode = IOAPIC_EDGE_TRIG;
+       kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
   }
   
   static void apic_send_ipi(struct kvm_lapic *apic)
@@@ -558,8 -553,7 +553,7 @@@ static void __report_tpr_access(struct 
         struct kvm_run *run = vcpu->run;
   
         set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
-       kvm_x86_ops->cache_regs(vcpu);
-       run->tpr_access.rip = vcpu->arch.rip;
+       run->tpr_access.rip = kvm_rip_read(vcpu);
         run->tpr_access.is_write = write;
   }
   
@@@ -683,9 -677,9 +677,9 @@@ static void apic_mmio_write(struct kvm_
          * Refer SDM 8.4.1
          */
         if (len != 4 || alignment) {
-               if (printk_ratelimit())
-                       printk(KERN_ERR "apic write: bad size=%d %lx\n",
-                              len, (long)address);
+               /* Don't shout loud, $infamous_os would cause only noise. */
+               apic_debug("apic write: bad size=%d %lx\n",
+                          len, (long)address);
                 return;
         }
   
@@@ -947,13 -941,14 +941,12 @@@ static int __apic_timer_fn(struct kvm_l
   
         if(!atomic_inc_and_test(&apic->timer.pending))
                 set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
-       if (waitqueue_active(q)) {
-               apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+       if (waitqueue_active(q))
                 wake_up_interruptible(q);
-       }
+ 
         if (apic_lvtt_period(apic)) {
                 result = 1;
- -              apic->timer.dev.expires = ktime_add_ns(
- -                                      apic->timer.dev.expires,
- -                                      apic->timer.period);
+ +              hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period);
         }
         return result;
   }
@@@ -1122,7 -1117,7 +1115,7 @@@ void __kvm_migrate_apic_timer(struct kv
   
         timer = &apic->timer.dev;
         if (hrtimer_cancel(timer))
- -              hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+ +              hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
   }
   
   void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
diff --combined drivers/s390/crypto/ap_bus.c

index 6f02f1e674d4ca9438315d07e4fb110b329c965b,326db1e827c4dbe41f70419a9af53b1a997a7e7a..e3fe6838293ad4f238cd9bb81df92a59ec61d72b
--- 1/drivers/s390/crypto/ap_bus.c
--- 2/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@@ -659,9 -659,9 +659,9 @@@ static ssize_t poll_timeout_store(struc
         hr_time = ktime_set(0, poll_timeout);
   
         if (!hrtimer_is_queued(&ap_poll_timer) ||
- -          !hrtimer_forward(&ap_poll_timer, ap_poll_timer.expires, hr_time)) {
- -              ap_poll_timer.expires = hr_time;
- -              hrtimer_start(&ap_poll_timer, hr_time, HRTIMER_MODE_ABS);
+ +          !hrtimer_forward(&ap_poll_timer, hrtimer_get_expires(&ap_poll_timer), hr_time)) {
+ +              hrtimer_set_expires(&ap_poll_timer, hr_time);
+ +              hrtimer_start_expires(&ap_poll_timer, HRTIMER_MODE_ABS);
         }
         return count;
   }
@@@ -892,8 -892,8 +892,8 @@@ static void ap_scan_bus(struct work_str
   
                 ap_dev->device.bus = &ap_bus_type;
                 ap_dev->device.parent = ap_root_device;
-               snprintf(ap_dev->device.bus_id, BUS_ID_SIZE, "card%02x",
-                        AP_QID_DEVICE(ap_dev->qid));
+               dev_set_name(&ap_dev->device, "card%02x",
+                            AP_QID_DEVICE(ap_dev->qid));
                 ap_dev->device.release = ap_device_release;
                 rc = device_register(&ap_dev->device);
                 if (rc) {
diff --combined fs/compat.c

index 133ed7f5d681ef111922953fb5a1b2ca99cb5bba,5f9ec449c799854e19a9190b489ccf4a23fc0b03..3b58c32be526301998fe7d0698bbdf98ac84d3a2
--- 1/fs/compat.c
--- 2/fs/compat.c
+++ b/fs/compat.c
@@@ -137,6 -137,45 +137,45 @@@ asmlinkage long compat_sys_utimes(char 
         return compat_sys_futimesat(AT_FDCWD, filename, t);
   }
   
+ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
+ {
+       compat_ino_t ino = stat->ino;
+       typeof(ubuf->st_uid) uid = 0;
+       typeof(ubuf->st_gid) gid = 0;
+       int err;
+ 
+       SET_UID(uid, stat->uid);
+       SET_GID(gid, stat->gid);
+ 
+       if ((u64) stat->size > MAX_NON_LFS ||
+           !old_valid_dev(stat->dev) ||
+           !old_valid_dev(stat->rdev))
+               return -EOVERFLOW;
+       if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
+               return -EOVERFLOW;
+ 
+       if (clear_user(ubuf, sizeof(*ubuf)))
+               return -EFAULT;
+ 
+       err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
+       err |= __put_user(ino, &ubuf->st_ino);
+       err |= __put_user(stat->mode, &ubuf->st_mode);
+       err |= __put_user(stat->nlink, &ubuf->st_nlink);
+       err |= __put_user(uid, &ubuf->st_uid);
+       err |= __put_user(gid, &ubuf->st_gid);
+       err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
+       err |= __put_user(stat->size, &ubuf->st_size);
+       err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
+       err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
+       err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
+       err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
+       err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
+       err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
+       err |= __put_user(stat->blksize, &ubuf->st_blksize);
+       err |= __put_user(stat->blocks, &ubuf->st_blocks);
+       return err;
+ }
+ 
   asmlinkage long compat_sys_newstat(char __user * filename,
                 struct compat_stat __user *statbuf)
   {
@@@ -1239,7 -1278,7 +1278,7 @@@ static int compat_count(compat_uptr_t _
                         if (!p)
                                 break;
                         argv++;
-                       if(++i > max)
+                       if (i++ >= max)
                                 return -E2BIG;
                 }
         }
@@@ -1436,57 -1475,6 +1475,57 @@@ out_ret
   
   #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
   
+ +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
+ +                                    int timeval, int ret)
+ +{
+ +      struct timespec ts;
+ +
+ +      if (!p)
+ +              return ret;
+ +
+ +      if (current->personality & STICKY_TIMEOUTS)
+ +              goto sticky;
+ +
+ +      /* No update for zero timeout */
+ +      if (!end_time->tv_sec && !end_time->tv_nsec)
+ +              return ret;
+ +
+ +      ktime_get_ts(&ts);
+ +      ts = timespec_sub(*end_time, ts);
+ +      if (ts.tv_sec < 0)
+ +              ts.tv_sec = ts.tv_nsec = 0;
+ +
+ +      if (timeval) {
+ +              struct compat_timeval rtv;
+ +
+ +              rtv.tv_sec = ts.tv_sec;
+ +              rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+ +
+ +              if (!copy_to_user(p, &rtv, sizeof(rtv)))
+ +                      return ret;
+ +      } else {
+ +              struct compat_timespec rts;
+ +
+ +              rts.tv_sec = ts.tv_sec;
+ +              rts.tv_nsec = ts.tv_nsec;
+ +
+ +              if (!copy_to_user(p, &rts, sizeof(rts)))
+ +                      return ret;
+ +      }
+ +      /*
+ +       * If an application puts its timeval in read-only memory, we
+ +       * don't want the Linux-specific update to the timeval to
+ +       * cause a fault after the select has completed
+ +       * successfully. However, because we're not updating the
+ +       * timeval, we can't restart the system call.
+ +       */
+ +
+ +sticky:
+ +      if (ret == -ERESTARTNOHAND)
+ +              ret = -EINTR;
+ +      return ret;
+ +}
+ +
   /*
    * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
    * 64-bit unsigned longs.
@@@ -1568,8 -1556,7 +1607,8 @@@ int compat_set_fd_set(unsigned long nr
         ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
   
   int compat_core_sys_select(int n, compat_ulong_t __user *inp,
- -      compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout)
+ +      compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+ +      struct timespec *end_time)
   {
         fd_set_bits fds;
         void *bits;
@@@ -1616,7 -1603,7 +1655,7 @@@
         zero_fd_set(n, fds.res_out);
         zero_fd_set(n, fds.res_ex);
   
- -      ret = do_select(n, &fds, timeout);
+ +      ret = do_select(n, &fds, end_time);
   
         if (ret < 0)
                 goto out;
@@@ -1642,7 -1629,7 +1681,7 @@@ asmlinkage long compat_sys_select(int n
         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
         struct compat_timeval __user *tvp)
   {
- -      s64 timeout = -1;
+ +      struct timespec end_time, *to = NULL;
         struct compat_timeval tv;
         int ret;
   
@@@ -1650,14 -1637,43 +1689,14 @@@
                 if (copy_from_user(&tv, tvp, sizeof(tv)))
                         return -EFAULT;
   
- -              if (tv.tv_sec < 0 || tv.tv_usec < 0)
+ +              to = &end_time;
+ +              if (poll_select_set_timeout(to, tv.tv_sec,
+ +                                          tv.tv_usec * NSEC_PER_USEC))
                         return -EINVAL;
- -
- -              /* Cast to u64 to make GCC stop complaining */
- -              if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
- -                      timeout = -1;   /* infinite */
- -              else {
- -                      timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
- -                      timeout += tv.tv_sec * HZ;
- -              }
         }
   
- -      ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
- -
- -      if (tvp) {
- -              struct compat_timeval rtv;
- -
- -              if (current->personality & STICKY_TIMEOUTS)
- -                      goto sticky;
- -              rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
- -              rtv.tv_sec = timeout;
- -              if (compat_timeval_compare(&rtv, &tv) >= 0)
- -                      rtv = tv;
- -              if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
- -sticky:
- -                      /*
- -                       * If an application puts its timeval in read-only
- -                       * memory, we don't want the Linux-specific update to
- -                       * the timeval to cause a fault after the select has
- -                       * completed successfully. However, because we're not
- -                       * updating the timeval, we can't restart the system
- -                       * call.
- -                       */
- -                      if (ret == -ERESTARTNOHAND)
- -                              ret = -EINTR;
- -              }
- -      }
+ +      ret = compat_core_sys_select(n, inp, outp, exp, to);
+ +      ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
   
         return ret;
   }
@@@ -1670,16 -1686,15 +1709,16 @@@ asmlinkage long compat_sys_pselect7(in
   {
         compat_sigset_t ss32;
         sigset_t ksigmask, sigsaved;
- -      s64 timeout = MAX_SCHEDULE_TIMEOUT;
         struct compat_timespec ts;
+ +      struct timespec end_time, *to = NULL;
         int ret;
   
         if (tsp) {
                 if (copy_from_user(&ts, tsp, sizeof(ts)))
                         return -EFAULT;
   
- -              if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+ +              to = &end_time;
+ +              if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                         return -EINVAL;
         }
   
@@@ -1694,8 -1709,51 +1733,8 @@@
                 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
         }
   
- -      do {
- -              if (tsp) {
- -                      if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
- -                              timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
- -                              timeout += ts.tv_sec * (unsigned long)HZ;
- -                              ts.tv_sec = 0;
- -                              ts.tv_nsec = 0;
- -                      } else {
- -                              ts.tv_sec -= MAX_SELECT_SECONDS;
- -                              timeout = MAX_SELECT_SECONDS * HZ;
- -                      }
- -              }
- -
- -              ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
- -
- -      } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
- -
- -      if (tsp) {
- -              struct compat_timespec rts;
- -
- -              if (current->personality & STICKY_TIMEOUTS)
- -                      goto sticky;
- -
- -              rts.tv_sec = timeout / HZ;
- -              rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
- -              if (rts.tv_nsec >= NSEC_PER_SEC) {
- -                      rts.tv_sec++;
- -                      rts.tv_nsec -= NSEC_PER_SEC;
- -              }
- -              if (compat_timespec_compare(&rts, &ts) >= 0)
- -                      rts = ts;
- -              if (copy_to_user(tsp, &rts, sizeof(rts))) {
- -sticky:
- -                      /*
- -                       * If an application puts its timeval in read-only
- -                       * memory, we don't want the Linux-specific update to
- -                       * the timeval to cause a fault after the select has
- -                       * completed successfully. However, because we're not
- -                       * updating the timeval, we can't restart the system
- -                       * call.
- -                       */
- -                      if (ret == -ERESTARTNOHAND)
- -                              ret = -EINTR;
- -              }
- -      }
+ +      ret = compat_core_sys_select(n, inp, outp, exp, to);
+ +      ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
   
         if (ret == -ERESTARTNOHAND) {
                 /*
@@@ -1740,16 -1798,18 +1779,16 @@@ asmlinkage long compat_sys_ppoll(struc
         compat_sigset_t ss32;
         sigset_t ksigmask, sigsaved;
         struct compat_timespec ts;
- -      s64 timeout = -1;
+ +      struct timespec end_time, *to = NULL;
         int ret;
   
         if (tsp) {
                 if (copy_from_user(&ts, tsp, sizeof(ts)))
                         return -EFAULT;
   
- -              /* We assume that ts.tv_sec is always lower than
- -                 the number of seconds that can be expressed in
- -                 an s64. Otherwise the compiler bitches at us */
- -              timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
- -              timeout += ts.tv_sec * HZ;
+ +              to = &end_time;
+ +              if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+ +                      return -EINVAL;
         }
   
         if (sigmask) {
@@@ -1763,7 -1823,7 +1802,7 @@@
                 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
         }
   
- -      ret = do_sys_poll(ufds, nfds, &timeout);
+ +      ret = do_sys_poll(ufds, nfds, to);
   
         /* We can restart this syscall, usually */
         if (ret == -EINTR) {
@@@ -1781,7 -1841,31 +1820,7 @@@
         } else if (sigmask)
                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
   
- -      if (tsp && timeout >= 0) {
- -              struct compat_timespec rts;
- -
- -              if (current->personality & STICKY_TIMEOUTS)
- -                      goto sticky;
- -              /* Yes, we know it's actually an s64, but it's also positive. */
- -              rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
- -                                      1000;
- -              rts.tv_sec = timeout;
- -              if (compat_timespec_compare(&rts, &ts) >= 0)
- -                      rts = ts;
- -              if (copy_to_user(tsp, &rts, sizeof(rts))) {
- -sticky:
- -                      /*
- -                       * If an application puts its timeval in read-only
- -                       * memory, we don't want the Linux-specific update to
- -                       * the timeval to cause a fault after the select has
- -                       * completed successfully. However, because we're not
- -                       * updating the timeval, we can't restart the system
- -                       * call.
- -                       */
- -                      if (ret == -ERESTARTNOHAND && timeout >= 0)
- -                              ret = -EINTR;
- -              }
- -      }
+ +      ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
   
         return ret;
   }
diff --combined include/linux/hrtimer.h

index 1e6f731381d9ce5faad45fe30aef6de1eebfc116,2f245fe63bda5611ad909c1452aa8a79c4f29eb4..cb25c1cc2352fc2ef5a743a183d1cc90645aff9a
--- 1/include/linux/hrtimer.h
--- 2/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@@ -20,8 -20,6 +20,8 @@@
   #include <linux/init.h>
   #include <linux/list.h>
   #include <linux/wait.h>
+ +#include <linux/percpu.h>
+ +
   
   struct hrtimer_clock_base;
   struct hrtimer_cpu_base;
@@@ -49,14 -47,22 +49,22 @@@ enum hrtimer_restart 
    *    HRTIMER_CB_IRQSAFE:             Callback may run in hardirq context
    *    HRTIMER_CB_IRQSAFE_NO_RESTART:  Callback may run in hardirq context and
    *                                    does not restart the timer
-  *    HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:  Callback must run in hardirq context
-  *                                    Special mode for tick emultation
+  *    HRTIMER_CB_IRQSAFE_PERCPU:      Callback must run in hardirq context
+  *                                    Special mode for tick emulation and
+  *                                    scheduler timer. Such timers are per
+  *                                    cpu and not allowed to be migrated on
+  *                                    cpu unplug.
+  *    HRTIMER_CB_IRQSAFE_UNLOCKED:    Callback should run in hardirq context
+  *                                    with timer->base lock unlocked
+  *                                    used for timers which call wakeup to
+  *                                    avoid lock order problems with rq->lock
    */
   enum hrtimer_cb_mode {
         HRTIMER_CB_SOFTIRQ,
         HRTIMER_CB_IRQSAFE,
         HRTIMER_CB_IRQSAFE_NO_RESTART,
-       HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
+       HRTIMER_CB_IRQSAFE_PERCPU,
+       HRTIMER_CB_IRQSAFE_UNLOCKED,
   };
   
   /*
@@@ -69,9 -75,10 +77,10 @@@
    * 0x02               callback function running
    * 0x04               callback pending (high resolution mode)
    *
-  * Special case:
+  * Special cases:
    * 0x03               callback function running and enqueued
    *            (was requeued on another CPU)
+  * 0x09               timer was migrated on CPU hotunplug
    * The "callback function running and enqueued" status is only possible on
    * SMP. It happens for example when a posix timer expired and the callback
    * queued a signal. Between dropping the lock which protects the posix timer
@@@ -89,6 -96,7 +98,7 @@@
   #define HRTIMER_STATE_ENQUEUED        0x01
   #define HRTIMER_STATE_CALLBACK        0x02
   #define HRTIMER_STATE_PENDING 0x04
+ #define HRTIMER_STATE_MIGRATE 0x08
   
   /**
    * struct hrtimer - the basic hrtimer structure
@@@ -113,8 -121,7 +123,8 @@@
    */
   struct hrtimer {
         struct rb_node                  node;
- -      ktime_t                         expires;
+ +      ktime_t                         _expires;
+ +      ktime_t                         _softexpires;
         enum hrtimer_restart            (*function)(struct hrtimer *);
         struct hrtimer_clock_base       *base;
         unsigned long                   state;
@@@ -200,71 -207,6 +210,71 @@@ struct hrtimer_cpu_base 
   #endif
   };
   
+ +static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
+ +{
+ +      timer->_expires = time;
+ +      timer->_softexpires = time;
+ +}
+ +
+ +static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
+ +{
+ +      timer->_softexpires = time;
+ +      timer->_expires = ktime_add_safe(time, delta);
+ +}
+ +
+ +static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
+ +{
+ +      timer->_softexpires = time;
+ +      timer->_expires = ktime_add_safe(time, ns_to_ktime(delta));
+ +}
+ +
+ +static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
+ +{
+ +      timer->_expires.tv64 = tv64;
+ +      timer->_softexpires.tv64 = tv64;
+ +}
+ +
+ +static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
+ +{
+ +      timer->_expires = ktime_add_safe(timer->_expires, time);
+ +      timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
+ +}
+ +
+ +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns)
+ +{
+ +      timer->_expires = ktime_add_ns(timer->_expires, ns);
+ +      timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
+ +}
+ +
+ +static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
+ +{
+ +      return timer->_expires;
+ +}
+ +
+ +static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
+ +{
+ +      return timer->_softexpires;
+ +}
+ +
+ +static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
+ +{
+ +      return timer->_expires.tv64;
+ +}
+ +static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
+ +{
+ +      return timer->_softexpires.tv64;
+ +}
+ +
+ +static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
+ +{
+ +      return ktime_to_ns(timer->_expires);
+ +}
+ +
+ +static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
+ +{
+ +    return ktime_sub(timer->_expires, timer->base->get_time());
+ +}
+ +
   #ifdef CONFIG_HIGH_RES_TIMERS
   struct clock_event_device;
   
@@@ -285,8 -227,6 +295,8 @@@ static inline int hrtimer_is_hres_activ
         return timer->base->cpu_base->hres_active;
   }
   
+ +extern void hrtimer_peek_ahead_timers(void);
+ +
   /*
    * The resolution of the clocks. The resolution value is returned in
    * the clock_getres() system call to give application programmers an
@@@ -309,7 -249,6 +319,7 @@@
    * is expired in the next softirq when the clock was advanced.
    */
   static inline void clock_was_set(void) { }
+ +static inline void hrtimer_peek_ahead_timers(void) { }
   
   static inline void hres_timers_resume(void) { }
   
@@@ -331,10 -270,6 +341,10 @@@ static inline int hrtimer_is_hres_activ
   extern ktime_t ktime_get(void);
   extern ktime_t ktime_get_real(void);
   
+ +
+ +DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+ +
+ +
   /* Exported timer functions: */
   
   /* Initialize timers: */
@@@ -359,25 -294,12 +369,25 @@@ static inline void destroy_hrtimer_on_s
   /* Basic timer operations: */
   extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
                          const enum hrtimer_mode mode);
+ +extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+ +                      unsigned long range_ns, const enum hrtimer_mode mode);
   extern int hrtimer_cancel(struct hrtimer *timer);
   extern int hrtimer_try_to_cancel(struct hrtimer *timer);
   
+ +static inline int hrtimer_start_expires(struct hrtimer *timer,
+ +                                              enum hrtimer_mode mode)
+ +{
+ +      unsigned long delta;
+ +      ktime_t soft, hard;
+ +      soft = hrtimer_get_softexpires(timer);
+ +      hard = hrtimer_get_expires(timer);
+ +      delta = ktime_to_ns(ktime_sub(hard, soft));
+ +      return hrtimer_start_range_ns(timer, soft, delta, mode);
+ +}
+ +
   static inline int hrtimer_restart(struct hrtimer *timer)
   {
- -      return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+ +      return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
   }
   
   /* Query timers: */
@@@ -434,10 -356,6 +444,10 @@@ extern long hrtimer_nanosleep_restart(s
   extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
                                  struct task_struct *tsk);
   
+ +extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+ +                                              const enum hrtimer_mode mode);
+ +extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
+ +
   /* Soft interrupt function to run the hrtimer queues: */
   extern void hrtimer_run_queues(void);
   extern void hrtimer_run_pending(void);
diff --combined include/linux/sched.h

index dcc03fd5a7f3df86a005ac88b1dc2843c5c178b4,c226c7b82946ce1d830853c4fd3b9bad3d92fa0d..de53c109fd04f1de9eb9fd973af5ae3e7f790922
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -352,7 -352,7 +352,7 @@@ arch_get_unmapped_area_topdown(struct f
   extern void arch_unmap_area(struct mm_struct *, unsigned long);
   extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
   
- #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+ #if USE_SPLIT_PTLOCKS
   /*
    * The mm counters are not protected by its page_table_lock,
    * so must be incremented atomically.
@@@ -363,7 -363,7 +363,7 @@@
   #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
   #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
   
- #else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+ #else  /* !USE_SPLIT_PTLOCKS */
   /*
    * The mm counters are protected by its page_table_lock,
    * so can be incremented directly.
@@@ -374,7 -374,7 +374,7 @@@
   #define inc_mm_counter(mm, member) (mm)->_##member++
   #define dec_mm_counter(mm, member) (mm)->_##member--
   
- #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+ #endif /* !USE_SPLIT_PTLOCKS */
   
   #define get_mm_rss(mm)                                        \
         (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
@@@ -451,8 -451,8 +451,8 @@@ struct signal_struct 
          * - everyone except group_exit_task is stopped during signal delivery
          *   of fatal signals, group_exit_task processes the signal.
          */
-       struct task_struct      *group_exit_task;
         int                     notify_count;
+       struct task_struct      *group_exit_task;
   
         /* thread group stop support, overloads group_exit_code too */
         int                     group_stop_count;
@@@ -824,6 -824,9 +824,9 @@@ struct sched_domain 
         unsigned int ttwu_move_affine;
         unsigned int ttwu_move_balance;
   #endif
+ #ifdef CONFIG_SCHED_DEBUG
+       char *name;
+ #endif
   };
   
   extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
@@@ -897,7 -900,7 +900,7 @@@ struct sched_class 
         void (*yield_task) (struct rq *rq);
         int  (*select_task_rq)(struct task_struct *p, int sync);
   
-       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
   
         struct task_struct * (*pick_next_task) (struct rq *rq);
         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
@@@ -1010,8 -1013,8 +1013,8 @@@ struct sched_entity 
   
   struct sched_rt_entity {
         struct list_head run_list;
-       unsigned int time_slice;
         unsigned long timeout;
+       unsigned int time_slice;
         int nr_cpus_allowed;
   
         struct sched_rt_entity *back;
@@@ -1301,12 -1304,6 +1304,12 @@@ struct task_struct 
         int latency_record_count;
         struct latency_record latency_record[LT_SAVECOUNT];
   #endif
+ +      /*
+ +       * time slack values; these are used to round up poll() and
+ +       * select() etc timeout values. These are in nanoseconds.
+ +       */
+ +      unsigned long timer_slack_ns;
+ +      unsigned long default_timer_slack_ns;
   };
   
   /*
diff --combined include/linux/time.h

index 726976478480a644c13a75ebf8f048e44a38ef98,51e883df0fa51fe598832747477533fc4303e30a..c911ef69ea87e2dbb2c7cd8988dce850d13b79be
--- 1/include/linux/time.h
--- 2/include/linux/time.h
+++ b/include/linux/time.h
@@@ -29,6 -29,8 +29,8 @@@ struct timezone 
   
   #ifdef __KERNEL__
   
+ extern struct timezone sys_tz;
+ 
   /* Parameters used to convert the timespec values: */
   #define MSEC_PER_SEC  1000L
   #define USEC_PER_MSEC 1000L
@@@ -38,8 -40,6 +40,8 @@@
   #define NSEC_PER_SEC  1000000000L
   #define FSEC_PER_SEC  1000000000000000L
   
+ +#define TIME_T_MAX    (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
+ +
   static inline int timespec_equal(const struct timespec *a,
                                    const struct timespec *b)
   {
@@@ -74,8 -74,6 +76,8 @@@ extern unsigned long mktime(const unsig
                             const unsigned int min, const unsigned int sec);
   
   extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
+ +extern struct timespec timespec_add_safe(const struct timespec lhs,
+ +                                       const struct timespec rhs);
   
   /*
    * sub = lhs - rhs, in normalized form
diff --combined kernel/fork.c

index 4308d75f0fa5bc67c36748d31bb0e2518ab58990,30de644a40c4d4d9617d650589f4c90da1e977a2..37b3e150ae3956759b684c6806bbb6dc5ad12376
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -802,6 -802,7 +802,7 @@@ static int copy_signal(unsigned long cl
   
         sig->leader = 0;        /* session leadership doesn't inherit */
         sig->tty_old_pgrp = NULL;
+       sig->tty = NULL;
   
         sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
         sig->gtime = cputime_zero;
@@@ -838,6 -839,7 +839,7 @@@
   void __cleanup_signal(struct signal_struct *sig)
   {
         exit_thread_group_keys(sig);
+       tty_kref_put(sig->tty);
         kmem_cache_free(signal_cachep, sig);
   }
   
@@@ -987,8 -989,6 +989,8 @@@ static struct task_struct *copy_process
         p->prev_utime = cputime_zero;
         p->prev_stime = cputime_zero;
   
+ +      p->default_timer_slack_ns = current->timer_slack_ns;
+ +
   #ifdef CONFIG_DETECT_SOFTLOCKUP
         p->last_switch_count = 0;
         p->last_switch_timestamp = 0;
@@@ -1229,7 -1229,8 +1231,8 @@@
                                 p->nsproxy->pid_ns->child_reaper = p;
   
                         p->signal->leader_pid = pid;
-                       p->signal->tty = current->signal->tty;
+                       tty_kref_put(p->signal->tty);
+                       p->signal->tty = tty_kref_get(current->signal->tty);
                         set_task_pgrp(p, task_pgrp_nr(current));
                         set_task_session(p, task_session_nr(current));
                         attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
diff --combined kernel/hrtimer.c

index 2bd230be1cb5ae3a778546d2ed0e651e153ba46e,cdec83e722fa1b80ee0af0f828d8e47532431a20..51ee90bca2dedcc8b882f96130c3486fe3111ce5
--- 1/kernel/hrtimer.c
--- 2/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@@ -517,7 -517,7 +517,7 @@@ static void hrtimer_force_reprogram(str
                 if (!base->first)
                         continue;
                 timer = rb_entry(base->first, struct hrtimer, node);
- -              expires = ktime_sub(timer->expires, base->offset);
+ +              expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                 if (expires.tv64 < cpu_base->expires_next.tv64)
                         cpu_base->expires_next = expires;
         }
@@@ -539,10 -539,10 +539,10 @@@ static int hrtimer_reprogram(struct hrt
                              struct hrtimer_clock_base *base)
   {
         ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
- -      ktime_t expires = ktime_sub(timer->expires, base->offset);
+ +      ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
         int res;
   
- -      WARN_ON_ONCE(timer->expires.tv64 < 0);
+ +      WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
   
         /*
          * When the callback is running, we do not reprogram the clock event
@@@ -672,13 -672,14 +672,14 @@@ static inline int hrtimer_enqueue_repro
                          */
                         BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
                         return 1;
-               case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
+               case HRTIMER_CB_IRQSAFE_PERCPU:
+               case HRTIMER_CB_IRQSAFE_UNLOCKED:
                         /*
                          * This is solely for the sched tick emulation with
                          * dynamic tick support to ensure that we do not
                          * restart the tick right on the edge and end up with
                          * the tick timer in the softirq ! The calling site
-                        * takes care of this.
+                        * takes care of this. Also used for hrtimer sleeper !
                          */
                         debug_hrtimer_deactivate(timer);
                         return 1;
@@@ -794,7 -795,7 +795,7 @@@ u64 hrtimer_forward(struct hrtimer *tim
         u64 orun = 1;
         ktime_t delta;
   
- -      delta = ktime_sub(now, timer->expires);
+ +      delta = ktime_sub(now, hrtimer_get_expires(timer));
   
         if (delta.tv64 < 0)
                 return 0;
@@@ -806,8 -807,8 +807,8 @@@
                 s64 incr = ktime_to_ns(interval);
   
                 orun = ktime_divns(delta, incr);
- -              timer->expires = ktime_add_ns(timer->expires, incr * orun);
- -              if (timer->expires.tv64 > now.tv64)
+ +              hrtimer_add_expires_ns(timer, incr * orun);
+ +              if (hrtimer_get_expires_tv64(timer) > now.tv64)
                         return orun;
                 /*
                  * This (and the ktime_add() below) is the
@@@ -815,7 -816,7 +816,7 @@@
                  */
                 orun++;
         }
- -      timer->expires = ktime_add_safe(timer->expires, interval);
+ +      hrtimer_add_expires(timer, interval);
   
         return orun;
   }
@@@ -847,8 -848,7 +848,8 @@@ static void enqueue_hrtimer(struct hrti
                  * We dont care about collisions. Nodes with
                  * the same expiry time stay together.
                  */
- -              if (timer->expires.tv64 < entry->expires.tv64) {
+ +              if (hrtimer_get_expires_tv64(timer) <
+ +                              hrtimer_get_expires_tv64(entry)) {
                         link = &(*link)->rb_left;
                 } else {
                         link = &(*link)->rb_right;
@@@ -945,10 -945,9 +946,10 @@@ remove_hrtimer(struct hrtimer *timer, s
   }
   
   /**
- - * hrtimer_start - (re)start an relative timer on the current CPU
+ + * hrtimer_start_range_ns - (re)start an relative timer on the current CPU
    * @timer:    the timer to be added
    * @tim:      expiry time
+ + * @delta_ns: "slack" range for the timer
    * @mode:     expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
    *
    * Returns:
@@@ -956,8 -955,7 +957,8 @@@
    *  1 when the timer was active
    */
   int
- -hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
+ +hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
+ +                      const enum hrtimer_mode mode)
   {
         struct hrtimer_clock_base *base, *new_base;
         unsigned long flags;
@@@ -985,7 -983,7 +986,7 @@@
   #endif
         }
   
- -      timer->expires = tim;
+ +      hrtimer_set_expires_range_ns(timer, tim, delta_ns);
   
         timer_stats_hrtimer_set_start_info(timer);
   
@@@ -1018,26 -1016,8 +1019,26 @@@
   
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
+ +
+ +/**
+ + * hrtimer_start - (re)start an relative timer on the current CPU
+ + * @timer:    the timer to be added
+ + * @tim:      expiry time
+ + * @mode:     expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ + *
+ + * Returns:
+ + *  0 on success
+ + *  1 when the timer was active
+ + */
+ +int
+ +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
+ +{
+ +      return hrtimer_start_range_ns(timer, tim, 0, mode);
+ +}
   EXPORT_SYMBOL_GPL(hrtimer_start);
   
+ +
   /**
    * hrtimer_try_to_cancel - try to deactivate a timer
    * @timer:    hrtimer to stop
@@@ -1097,7 -1077,7 +1098,7 @@@ ktime_t hrtimer_get_remaining(const str
         ktime_t rem;
   
         base = lock_hrtimer_base(timer, &flags);
- -      rem = ktime_sub(timer->expires, base->get_time());
+ +      rem = hrtimer_expires_remaining(timer);
         unlock_hrtimer_base(timer, &flags);
   
         return rem;
@@@ -1129,7 -1109,7 +1130,7 @@@ ktime_t hrtimer_get_next_event(void
                                 continue;
   
                         timer = rb_entry(base->first, struct hrtimer, node);
- -                      delta.tv64 = timer->expires.tv64;
+ +                      delta.tv64 = hrtimer_get_expires_tv64(timer);
                         delta = ktime_sub(delta, base->get_time());
                         if (delta.tv64 < mindelta.tv64)
                                 mindelta.tv64 = delta.tv64;
@@@ -1266,7 -1246,8 +1267,8 @@@ static void __run_hrtimer(struct hrtime
         timer_stats_account_hrtimer(timer);
   
         fn = timer->function;
-       if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+       if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+           timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
                 /*
                  * Used for scheduler timers, avoid lock inversion with
                  * rq->lock and tasklist_lock.
@@@ -1329,23 -1310,10 +1331,23 @@@ void hrtimer_interrupt(struct clock_eve
   
                         timer = rb_entry(node, struct hrtimer, node);
   
- -                      if (basenow.tv64 < timer->expires.tv64) {
+ +                      /*
+ +                       * The immediate goal for using the softexpires is
+ +                       * minimizing wakeups, not running timers at the
+ +                       * earliest interrupt after their soft expiration.
+ +                       * This allows us to avoid using a Priority Search
+ +                       * Tree, which can answer a stabbing querry for
+ +                       * overlapping intervals and instead use the simple
+ +                       * BST we already have.
+ +                       * We don't add extra wakeups by delaying timers that
+ +                       * are right-of a not yet expired timer, because that
+ +                       * timer will have to trigger a wakeup anyway.
+ +                       */
+ +
+ +                      if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
                                 ktime_t expires;
   
- -                              expires = ktime_sub(timer->expires,
+ +                              expires = ktime_sub(hrtimer_get_expires(timer),
                                                     base->offset);
                                 if (expires.tv64 < expires_next.tv64)
                                         expires_next = expires;
@@@ -1381,36 -1349,6 +1383,36 @@@
                 raise_softirq(HRTIMER_SOFTIRQ);
   }
   
+ +/**
+ + * hrtimer_peek_ahead_timers -- run soft-expired timers now
+ + *
+ + * hrtimer_peek_ahead_timers will peek at the timer queue of
+ + * the current cpu and check if there are any timers for which
+ + * the soft expires time has passed. If any such timers exist,
+ + * they are run immediately and then removed from the timer queue.
+ + *
+ + */
+ +void hrtimer_peek_ahead_timers(void)
+ +{
+ +      unsigned long flags;
+ +      struct tick_device *td;
+ +      struct clock_event_device *dev;
+ +
+ +      if (!hrtimer_hres_active())
+ +              return;
+ +
+ +      local_irq_save(flags);
+ +      td = &__get_cpu_var(tick_cpu_device);
+ +      if (!td)
+ +              goto out;
+ +      dev = td->evtdev;
+ +      if (!dev)
+ +              goto out;
+ +      hrtimer_interrupt(dev);
+ +out:
+ +      local_irq_restore(flags);
+ +}
+ +
   static void run_hrtimer_softirq(struct softirq_action *h)
   {
         run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
@@@ -1478,8 -1416,7 +1480,8 @@@ void hrtimer_run_queues(void
                         struct hrtimer *timer;
   
                         timer = rb_entry(node, struct hrtimer, node);
- -                      if (base->softirq_time.tv64 <= timer->expires.tv64)
+ +                      if (base->softirq_time.tv64 <=
+ +                                      hrtimer_get_expires_tv64(timer))
                                 break;
   
                         if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
@@@ -1517,7 -1454,7 +1519,7 @@@ void hrtimer_init_sleeper(struct hrtime
         sl->timer.function = hrtimer_wakeup;
         sl->task = task;
   #ifdef CONFIG_HIGH_RES_TIMERS
-       sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
   #endif
   }
   
@@@ -1527,7 -1464,7 +1529,7 @@@ static int __sched do_nanosleep(struct 
   
         do {
                 set_current_state(TASK_INTERRUPTIBLE);
- -              hrtimer_start(&t->timer, t->timer.expires, mode);
+ +              hrtimer_start_expires(&t->timer, mode);
                 if (!hrtimer_active(&t->timer))
                         t->task = NULL;
   
@@@ -1549,7 -1486,7 +1551,7 @@@ static int update_rmtp(struct hrtimer *
         struct timespec rmt;
         ktime_t rem;
   
- -      rem = ktime_sub(timer->expires, timer->base->get_time());
+ +      rem = hrtimer_expires_remaining(timer);
         if (rem.tv64 <= 0)
                 return 0;
         rmt = ktime_to_timespec(rem);
@@@ -1568,7 -1505,7 +1570,7 @@@ long __sched hrtimer_nanosleep_restart(
   
         hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
                                 HRTIMER_MODE_ABS);
- -      t.timer.expires.tv64 = restart->nanosleep.expires;
+ +      hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
   
         if (do_nanosleep(&t, HRTIMER_MODE_ABS))
                 goto out;
@@@ -1593,14 -1530,9 +1595,14 @@@ long hrtimer_nanosleep(struct timespec 
         struct restart_block *restart;
         struct hrtimer_sleeper t;
         int ret = 0;
+ +      unsigned long slack;
+ +
+ +      slack = current->timer_slack_ns;
+ +      if (rt_task(current))
+ +              slack = 0;
   
         hrtimer_init_on_stack(&t.timer, clockid, mode);
- -      t.timer.expires = timespec_to_ktime(*rqtp);
+ +      hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
         if (do_nanosleep(&t, mode))
                 goto out;
   
@@@ -1620,7 -1552,7 +1622,7 @@@
         restart->fn = hrtimer_nanosleep_restart;
         restart->nanosleep.index = t.timer.base->index;
         restart->nanosleep.rmtp = rmtp;
- -      restart->nanosleep.expires = t.timer.expires.tv64;
+ +      restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
   
         ret = -ERESTART_RESTARTBLOCK;
   out:
@@@ -1661,29 -1593,95 +1663,95 @@@ static void __cpuinit init_hrtimers_cpu
   
   #ifdef CONFIG_HOTPLUG_CPU
   
- static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-                               struct hrtimer_clock_base *new_base)
+ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+                               struct hrtimer_clock_base *new_base, int dcpu)
   {
         struct hrtimer *timer;
         struct rb_node *node;
+       int raise = 0;
   
         while ((node = rb_first(&old_base->active))) {
                 timer = rb_entry(node, struct hrtimer, node);
                 BUG_ON(hrtimer_callback_running(timer));
                 debug_hrtimer_deactivate(timer);
-               __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
+ 
+               /*
+                * Should not happen. Per CPU timers should be
+                * canceled _before_ the migration code is called
+                */
+               if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
+                       __remove_hrtimer(timer, old_base,
+                                        HRTIMER_STATE_INACTIVE, 0);
+                       WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
+                            timer, timer->function, dcpu);
+                       continue;
+               }
+ 
+               /*
+                * Mark it as STATE_MIGRATE not INACTIVE otherwise the
+                * timer could be seen as !active and just vanish away
+                * under us on another CPU
+                */
+               __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
                 timer->base = new_base;
                 /*
                  * Enqueue the timer. Allow reprogramming of the event device
                  */
                 enqueue_hrtimer(timer, new_base, 1);
+ 
+ #ifdef CONFIG_HIGH_RES_TIMERS
+               /*
+                * Happens with high res enabled when the timer was
+                * already expired and the callback mode is
+                * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
+                * enqueue code does not move them to the soft irq
+                * pending list for performance/latency reasons, but
+                * in the migration state, we need to do that
+                * otherwise we end up with a stale timer.
+                */
+               if (timer->state == HRTIMER_STATE_MIGRATE) {
+                       timer->state = HRTIMER_STATE_PENDING;
+                       list_add_tail(&timer->cb_entry,
+                                     &new_base->cpu_base->cb_pending);
+                       raise = 1;
+               }
+ #endif
+               /* Clear the migration state bit */
+               timer->state &= ~HRTIMER_STATE_MIGRATE;
+       }
+       return raise;
+ }
+ 
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
+                                  struct hrtimer_cpu_base *new_base)
+ {
+       struct hrtimer *timer;
+       int raise = 0;
+ 
+       while (!list_empty(&old_base->cb_pending)) {
+               timer = list_entry(old_base->cb_pending.next,
+                                  struct hrtimer, cb_entry);
+ 
+               __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
+               timer->base = &new_base->clock_base[timer->base->index];
+               list_add_tail(&timer->cb_entry, &new_base->cb_pending);
+               raise = 1;
         }
+       return raise;
+ }
+ #else
+ static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
+                                  struct hrtimer_cpu_base *new_base)
+ {
+       return 0;
   }
+ #endif
   
   static void migrate_hrtimers(int cpu)
   {
         struct hrtimer_cpu_base *old_base, *new_base;
-       int i;
+       int i, raise = 0;
   
         BUG_ON(cpu_online(cpu));
         old_base = &per_cpu(hrtimer_bases, cpu);
@@@ -1696,14 -1694,21 +1764,21 @@@
         spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
   
         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-               migrate_hrtimer_list(&old_base->clock_base[i],
-                                    &new_base->clock_base[i]);
+               if (migrate_hrtimer_list(&old_base->clock_base[i],
+                                        &new_base->clock_base[i], cpu))
+                       raise = 1;
         }
   
+       if (migrate_hrtimer_pending(old_base, new_base))
+               raise = 1;
+ 
         spin_unlock(&old_base->lock);
         spin_unlock(&new_base->lock);
         local_irq_enable();
         put_cpu_var(hrtimer_bases);
+ 
+       if (raise)
+               hrtimer_raise_softirq();
   }
   #endif /* CONFIG_HOTPLUG_CPU */
   
@@@ -1748,103 -1753,3 +1823,103 @@@ void __init hrtimers_init(void
   #endif
   }
   
+ +/**
+ + * schedule_hrtimeout_range - sleep until timeout
+ + * @expires:  timeout value (ktime_t)
+ + * @delta:    slack in expires timeout (ktime_t)
+ + * @mode:     timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ + *
+ + * Make the current task sleep until the given expiry time has
+ + * elapsed. The routine will return immediately unless
+ + * the current task state has been set (see set_current_state()).
+ + *
+ + * The @delta argument gives the kernel the freedom to schedule the
+ + * actual wakeup to a time that is both power and performance friendly.
+ + * The kernel give the normal best effort behavior for "@expires+@delta",
+ + * but may decide to fire the timer earlier, but no earlier than @expires.
+ + *
+ + * You can set the task state as follows -
+ + *
+ + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ + * pass before the routine returns.
+ + *
+ + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ + * delivered to the current task.
+ + *
+ + * The current task state is guaranteed to be TASK_RUNNING when this
+ + * routine returns.
+ + *
+ + * Returns 0 when the timer has expired otherwise -EINTR
+ + */
+ +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+ +                             const enum hrtimer_mode mode)
+ +{
+ +      struct hrtimer_sleeper t;
+ +
+ +      /*
+ +       * Optimize when a zero timeout value is given. It does not
+ +       * matter whether this is an absolute or a relative time.
+ +       */
+ +      if (expires && !expires->tv64) {
+ +              __set_current_state(TASK_RUNNING);
+ +              return 0;
+ +      }
+ +
+ +      /*
+ +       * A NULL parameter means "inifinte"
+ +       */
+ +      if (!expires) {
+ +              schedule();
+ +              __set_current_state(TASK_RUNNING);
+ +              return -EINTR;
+ +      }
+ +
+ +      hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+ +      hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+ +
+ +      hrtimer_init_sleeper(&t, current);
+ +
+ +      hrtimer_start_expires(&t.timer, mode);
+ +      if (!hrtimer_active(&t.timer))
+ +              t.task = NULL;
+ +
+ +      if (likely(t.task))
+ +              schedule();
+ +
+ +      hrtimer_cancel(&t.timer);
+ +      destroy_hrtimer_on_stack(&t.timer);
+ +
+ +      __set_current_state(TASK_RUNNING);
+ +
+ +      return !t.task ? 0 : -EINTR;
+ +}
+ +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
+ +
+ +/**
+ + * schedule_hrtimeout - sleep until timeout
+ + * @expires:  timeout value (ktime_t)
+ + * @mode:     timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ + *
+ + * Make the current task sleep until the given expiry time has
+ + * elapsed. The routine will return immediately unless
+ + * the current task state has been set (see set_current_state()).
+ + *
+ + * You can set the task state as follows -
+ + *
+ + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ + * pass before the routine returns.
+ + *
+ + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ + * delivered to the current task.
+ + *
+ + * The current task state is guaranteed to be TASK_RUNNING when this
+ + * routine returns.
+ + *
+ + * Returns 0 when the timer has expired otherwise -EINTR
+ + */
+ +int __sched schedule_hrtimeout(ktime_t *expires,
+ +                             const enum hrtimer_mode mode)
+ +{
+ +      return schedule_hrtimeout_range(expires, 0, mode);
+ +}
+ +EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --combined kernel/posix-timers.c

index f85efcdcab2d0e68037ee1ad03dc0cf2981c9635,5131e5471169226ef8db42f20792c8ffdac6d12b..ee204586149a0c8ab6144808c0610320545f7b8f
--- 1/kernel/posix-timers.c
--- 2/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@@ -441,7 -441,7 +441,7 @@@ static struct k_itimer * alloc_posix_ti
                 return tmr;
         if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
                 kmem_cache_free(posix_timers_cache, tmr);
-               tmr = NULL;
+               return NULL;
         }
         memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
         return tmr;
@@@ -668,7 -668,7 +668,7 @@@ common_timer_get(struct k_itimer *timr
             (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
                 timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
   
- -      remaining = ktime_sub(timer->expires, now);
+ +      remaining = ktime_sub(hrtimer_get_expires(timer), now);
         /* Return 0 only, when the timer is expired and not pending */
         if (remaining.tv64 <= 0) {
                 /*
@@@ -762,7 -762,7 +762,7 @@@ common_timer_set(struct k_itimer *timr
         hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
         timr->it.real.timer.function = posix_timer_fn;
   
- -      timer->expires = timespec_to_ktime(new_setting->it_value);
+ +      hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
   
         /* Convert interval */
         timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
@@@ -771,12 -771,14 +771,12 @@@
         if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
                 /* Setup correct expiry time for relative timers */
                 if (mode == HRTIMER_MODE_REL) {
- -                      timer->expires =
- -                              ktime_add_safe(timer->expires,
- -                                             timer->base->get_time());
+ +                      hrtimer_add_expires(timer, timer->base->get_time());
                 }
                 return 0;
         }
   
- -      hrtimer_start(timer, timer->expires, mode);
+ +      hrtimer_start_expires(timer, mode);
         return 0;
   }
   
diff --combined kernel/sched.c

index e46b5afa200d6cc63730ddb388c70c856610ea46,6f230596bd0c1d21a2c68ffbff8207e93dcd65b5..eb3c72953615c06d19b08be2480a3144b259c55d
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -201,14 -201,19 +201,19 @@@ void init_rt_bandwidth(struct rt_bandwi
         hrtimer_init(&rt_b->rt_period_timer,
                         CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rt_b->rt_period_timer.function = sched_rt_period_timer;
-       rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
+ }
+ 
+ static inline int rt_bandwidth_enabled(void)
+ {
+       return sysctl_sched_rt_runtime >= 0;
   }
   
   static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
   {
         ktime_t now;
   
-       if (rt_b->rt_runtime == RUNTIME_INF)
+       if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
                 return;
   
         if (hrtimer_active(&rt_b->rt_period_timer))
@@@ -221,8 -226,9 +226,8 @@@
   
                 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
                 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
- -              hrtimer_start(&rt_b->rt_period_timer,
- -                            rt_b->rt_period_timer.expires,
- -                            HRTIMER_MODE_ABS);
+ +              hrtimer_start_expires(&rt_b->rt_period_timer,
+ +                              HRTIMER_MODE_ABS);
         }
         spin_unlock(&rt_b->rt_runtime_lock);
   }
@@@ -297,9 -303,9 +302,9 @@@ static DEFINE_PER_CPU(struct cfs_rq, in
   static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
   static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
   #endif /* CONFIG_RT_GROUP_SCHED */
- #else /* !CONFIG_FAIR_GROUP_SCHED */
+ #else /* !CONFIG_USER_SCHED */
   #define root_task_group init_task_group
- #endif /* CONFIG_FAIR_GROUP_SCHED */
+ #endif /* CONFIG_USER_SCHED */
   
   /* task_group_lock serializes add/remove of task groups and also changes to
    * a task group's cpu shares.
@@@ -603,9 -609,9 +608,9 @@@ struct rq 
   
   static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
   
- static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+ static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
   {
-       rq->curr->sched_class->check_preempt_curr(rq, p);
+       rq->curr->sched_class->check_preempt_curr(rq, p, sync);
   }
   
   static inline int cpu_of(struct rq *rq)
@@@ -1057,7 -1063,7 +1062,7 @@@ static void hrtick_start(struct rq *rq
         struct hrtimer *timer = &rq->hrtick_timer;
         ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
   
- -      timer->expires = time;
+ +      hrtimer_set_expires(timer, time);
   
         if (rq == this_rq()) {
                 hrtimer_restart(timer);
@@@ -1086,7 -1092,7 +1091,7 @@@ hotplug_hrtick(struct notifier_block *n
         return NOTIFY_DONE;
   }
   
- static void init_hrtick(void)
+ static __init void init_hrtick(void)
   {
         hotcpu_notifier(hotplug_hrtick, 0);
   }
@@@ -1101,7 -1107,7 +1106,7 @@@ static void hrtick_start(struct rq *rq
         hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
   }
   
- static void init_hrtick(void)
+ static inline void init_hrtick(void)
   {
   }
   #endif /* CONFIG_SMP */
@@@ -1118,9 -1124,9 +1123,9 @@@ static void init_rq_hrtick(struct rq *r
   
         hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rq->hrtick_timer.function = hrtick;
-       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
   }
- #else
+ #else /* CONFIG_SCHED_HRTICK */
   static inline void hrtick_clear(struct rq *rq)
   {
   }
@@@ -1132,7 -1138,7 +1137,7 @@@ static inline void init_rq_hrtick(struc
   static inline void init_hrtick(void)
   {
   }
- #endif
+ #endif        /* CONFIG_SCHED_HRTICK */
   
   /*
    * resched_task - mark a task 'to be rescheduled now'.
@@@ -1379,38 -1385,24 +1384,24 @@@ static inline void dec_cpu_load(struct 
         update_load_sub(&rq->load, load);
   }
   
- #ifdef CONFIG_SMP
- static unsigned long source_load(int cpu, int type);
- static unsigned long target_load(int cpu, int type);
- static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
- 
- static unsigned long cpu_avg_load_per_task(int cpu)
- {
-       struct rq *rq = cpu_rq(cpu);
- 
-       if (rq->nr_running)
-               rq->avg_load_per_task = rq->load.weight / rq->nr_running;
- 
-       return rq->avg_load_per_task;
- }
- 
- #ifdef CONFIG_FAIR_GROUP_SCHED
- 
- typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
+ #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
+ typedef int (*tg_visitor)(struct task_group *, void *);
   
   /*
    * Iterate the full tree, calling @down when first entering a node and @up when
    * leaving it for the final time.
    */
- static void
- walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
+ static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
   {
         struct task_group *parent, *child;
+       int ret;
   
         rcu_read_lock();
         parent = &root_task_group;
   down:
-       (*down)(parent, cpu, sd);
+       ret = (*down)(parent, data);
+       if (ret)
+               goto out_unlock;
         list_for_each_entry_rcu(child, &parent->children, siblings) {
                 parent = child;
                 goto down;
@@@ -1418,15 -1410,43 +1409,43 @@@
   up:
                 continue;
         }
-       (*up)(parent, cpu, sd);
+       ret = (*up)(parent, data);
+       if (ret)
+               goto out_unlock;
   
         child = parent;
         parent = parent->parent;
         if (parent)
                 goto up;
+ out_unlock:
         rcu_read_unlock();
+ 
+       return ret;
   }
   
+ static int tg_nop(struct task_group *tg, void *data)
+ {
+       return 0;
+ }
+ #endif
+ 
+ #ifdef CONFIG_SMP
+ static unsigned long source_load(int cpu, int type);
+ static unsigned long target_load(int cpu, int type);
+ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+ 
+ static unsigned long cpu_avg_load_per_task(int cpu)
+ {
+       struct rq *rq = cpu_rq(cpu);
+ 
+       if (rq->nr_running)
+               rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+ 
+       return rq->avg_load_per_task;
+ }
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 
   static void __set_se_shares(struct sched_entity *se, unsigned long shares);
   
   /*
@@@ -1485,11 -1505,11 +1504,11 @@@ __update_group_shares_cpu(struct task_g
    * This needs to be done in a bottom-up fashion because the rq weight of a
    * parent group depends on the shares of its child groups.
    */
- static void
- tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
+ static int tg_shares_up(struct task_group *tg, void *data)
   {
         unsigned long rq_weight = 0;
         unsigned long shares = 0;
+       struct sched_domain *sd = data;
         int i;
   
         for_each_cpu_mask(i, sd->span) {
@@@ -1514,6 -1534,8 +1533,8 @@@
                 __update_group_shares_cpu(tg, i, shares, rq_weight);
                 spin_unlock_irqrestore(&rq->lock, flags);
         }
+ 
+       return 0;
   }
   
   /*
@@@ -1521,10 -1543,10 +1542,10 @@@
    * This needs to be done in a top-down fashion because the load of a child
    * group is a fraction of its parents load.
    */
- static void
- tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
+ static int tg_load_down(struct task_group *tg, void *data)
   {
         unsigned long load;
+       long cpu = (long)data;
   
         if (!tg->parent) {
                 load = cpu_rq(cpu)->load.weight;
@@@ -1535,11 -1557,8 +1556,8 @@@
         }
   
         tg->cfs_rq[cpu]->h_load = load;
- }
   
- static void
- tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
- {
+       return 0;
   }
   
   static void update_shares(struct sched_domain *sd)
@@@ -1549,7 -1568,7 +1567,7 @@@
   
         if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
                 sd->last_update = now;
-               walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+               walk_tg_tree(tg_nop, tg_shares_up, sd);
         }
   }
   
@@@ -1560,9 -1579,9 +1578,9 @@@ static void update_shares_locked(struc
         spin_lock(&rq->lock);
   }
   
- static void update_h_load(int cpu)
+ static void update_h_load(long cpu)
   {
-       walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
+       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
   }
   
   #else
@@@ -1920,11 -1939,8 +1938,8 @@@ unsigned long wait_task_inactive(struc
                 running = task_running(rq, p);
                 on_rq = p->se.on_rq;
                 ncsw = 0;
-               if (!match_state || p->state == match_state) {
-                       ncsw = p->nivcsw + p->nvcsw;
-                       if (unlikely(!ncsw))
-                               ncsw = 1;
-               }
+               if (!match_state || p->state == match_state)
+                       ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                 task_rq_unlock(rq, &flags);
   
                 /*
@@@ -2284,7 -2300,7 +2299,7 @@@ out_running
         trace_mark(kernel_sched_wakeup,
                 "pid %d state %ld ## rq %p task %p rq->curr %p",
                 p->pid, p->state, rq, p, rq->curr);
-       check_preempt_curr(rq, p);
+       check_preempt_curr(rq, p, sync);
   
         p->state = TASK_RUNNING;
   #ifdef CONFIG_SMP
@@@ -2419,7 -2435,7 +2434,7 @@@ void wake_up_new_task(struct task_struc
         trace_mark(kernel_sched_wakeup_new,
                 "pid %d state %ld ## rq %p task %p rq->curr %p",
                 p->pid, p->state, rq, p, rq->curr);
-       check_preempt_curr(rq, p);
+       check_preempt_curr(rq, p, 0);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
                 p->sched_class->task_wake_up(rq, p);
@@@ -2879,7 -2895,7 +2894,7 @@@ static void pull_task(struct rq *src_rq
          * Note that idle threads have a prio of MAX_PRIO, for this test
          * to be always true for them.
          */
-       check_preempt_curr(this_rq, p);
+       check_preempt_curr(this_rq, p, 0);
   }
   
   /*
@@@ -4626,6 -4642,15 +4641,15 @@@ __wake_up_sync(wait_queue_head_t *q, un
   }
   EXPORT_SYMBOL_GPL(__wake_up_sync);    /* For internal use only */
   
+ /**
+  * complete: - signals a single thread waiting on this completion
+  * @x:  holds the state of this particular completion
+  *
+  * This will wake up a single thread waiting on this completion. Threads will be
+  * awakened in the same order in which they were queued.
+  *
+  * See also complete_all(), wait_for_completion() and related routines.
+  */
   void complete(struct completion *x)
   {
         unsigned long flags;
@@@ -4637,6 -4662,12 +4661,12 @@@
   }
   EXPORT_SYMBOL(complete);
   
+ /**
+  * complete_all: - signals all threads waiting on this completion
+  * @x:  holds the state of this particular completion
+  *
+  * This will wake up all threads waiting on this particular completion event.
+  */
   void complete_all(struct completion *x)
   {
         unsigned long flags;
@@@ -4657,10 -4688,7 +4687,7 @@@ do_wait_for_common(struct completion *x
                 wait.flags |= WQ_FLAG_EXCLUSIVE;
                 __add_wait_queue_tail(&x->wait, &wait);
                 do {
-                       if ((state == TASK_INTERRUPTIBLE &&
-                            signal_pending(current)) ||
-                           (state == TASK_KILLABLE &&
-                            fatal_signal_pending(current))) {
+                       if (signal_pending_state(state, current)) {
                                 timeout = -ERESTARTSYS;
                                 break;
                         }
@@@ -4688,12 -4716,31 +4715,31 @@@ wait_for_common(struct completion *x, l
         return timeout;
   }
   
+ /**
+  * wait_for_completion: - waits for completion of a task
+  * @x:  holds the state of this particular completion
+  *
+  * This waits to be signaled for completion of a specific task. It is NOT
+  * interruptible and there is no timeout.
+  *
+  * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+  * and interrupt capability. Also see complete().
+  */
   void __sched wait_for_completion(struct completion *x)
   {
         wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
   }
   EXPORT_SYMBOL(wait_for_completion);
   
+ /**
+  * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+  * @x:  holds the state of this particular completion
+  * @timeout:  timeout value in jiffies
+  *
+  * This waits for either a completion of a specific task to be signaled or for a
+  * specified timeout to expire. The timeout is in jiffies. It is not
+  * interruptible.
+  */
   unsigned long __sched
   wait_for_completion_timeout(struct completion *x, unsigned long timeout)
   {
@@@ -4701,6 -4748,13 +4747,13 @@@
   }
   EXPORT_SYMBOL(wait_for_completion_timeout);
   
+ /**
+  * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+  * @x:  holds the state of this particular completion
+  *
+  * This waits for completion of a specific task to be signaled. It is
+  * interruptible.
+  */
   int __sched wait_for_completion_interruptible(struct completion *x)
   {
         long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@@ -4710,6 -4764,14 +4763,14 @@@
   }
   EXPORT_SYMBOL(wait_for_completion_interruptible);
   
+ /**
+  * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+  * @x:  holds the state of this particular completion
+  * @timeout:  timeout value in jiffies
+  *
+  * This waits for either a completion of a specific task to be signaled or for a
+  * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+  */
   unsigned long __sched
   wait_for_completion_interruptible_timeout(struct completion *x,
                                           unsigned long timeout)
@@@ -4718,6 -4780,13 +4779,13 @@@
   }
   EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
   
+ /**
+  * wait_for_completion_killable: - waits for completion of a task (killable)
+  * @x:  holds the state of this particular completion
+  *
+  * This waits to be signaled for completion of a specific task. It can be
+  * interrupted by a kill signal.
+  */
   int __sched wait_for_completion_killable(struct completion *x)
   {
         long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@@ -5120,7 -5189,8 +5188,8 @@@ recheck
                  * Do not allow realtime tasks into groups that have no runtime
                  * assigned.
                  */
-               if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+               if (rt_bandwidth_enabled() && rt_policy(policy) &&
+                               task_group(p)->rt_bandwidth.rt_runtime == 0)
                         return -EPERM;
   #endif
   
@@@ -5956,7 -6026,7 +6025,7 @@@ static int __migrate_task(struct task_s
         set_task_cpu(p, dest_cpu);
         if (on_rq) {
                 activate_task(rq_dest, p, 0);
-               check_preempt_curr(rq_dest, p);
+               check_preempt_curr(rq_dest, p, 0);
         }
   done:
         ret = 1;
@@@ -6281,7 -6351,7 +6350,7 @@@ set_table_entry(struct ctl_table *entry
   static struct ctl_table *
   sd_alloc_ctl_domain_table(struct sched_domain *sd)
   {
-       struct ctl_table *table = sd_alloc_ctl_entry(12);
+       struct ctl_table *table = sd_alloc_ctl_entry(13);
   
         if (table == NULL)
                 return NULL;
@@@ -6309,7 -6379,9 +6378,9 @@@
                 sizeof(int), 0644, proc_dointvec_minmax);
         set_table_entry(&table[10], "flags", &sd->flags,
                 sizeof(int), 0644, proc_dointvec_minmax);
-       /* &table[11] is terminator */
+       set_table_entry(&table[11], "name", sd->name,
+               CORENAME_MAX_SIZE, 0444, proc_dostring);
+       /* &table[12] is terminator */
   
         return table;
   }
@@@ -7193,13 -7265,21 +7264,21 @@@ static void init_sched_groups_power(in
    * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
    */
   
+ #ifdef CONFIG_SCHED_DEBUG
+ # define SD_INIT_NAME(sd, type)               sd->name = #type
+ #else
+ # define SD_INIT_NAME(sd, type)               do { } while (0)
+ #endif
+ 
   #define       SD_INIT(sd, type)       sd_init_##type(sd)
+ 
   #define SD_INIT_FUNC(type)    \
   static noinline void sd_init_##type(struct sched_domain *sd)  \
   {                                                             \
         memset(sd, 0, sizeof(*sd));                             \
         *sd = SD_##type##_INIT;                                 \
         sd->level = SD_LV_##type;                               \
+       SD_INIT_NAME(sd, type);                                 \
   }
   
   SD_INIT_FUNC(CPU)
@@@ -7695,24 -7775,27 +7774,27 @@@ static int dattrs_equal(struct sched_do
    * and partition_sched_domains() will fallback to the single partition
    * 'fallback_doms', it also forces the domains to be rebuilt.
    *
+  * If doms_new==NULL it will be replaced with cpu_online_map.
+  * ndoms_new==0 is a special case for destroying existing domains.
+  * It will not create the default domain.
+  *
    * Call with hotplug lock held
    */
   void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
                              struct sched_domain_attr *dattr_new)
   {
-       int i, j;
+       int i, j, n;
   
         mutex_lock(&sched_domains_mutex);
   
         /* always unregister in case we don't destroy any domains */
         unregister_sched_domain_sysctl();
   
-       if (doms_new == NULL)
-               ndoms_new = 0;
+       n = doms_new ? ndoms_new : 0;
   
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
-               for (j = 0; j < ndoms_new; j++) {
+               for (j = 0; j < n; j++) {
                         if (cpus_equal(doms_cur[i], doms_new[j])
                             && dattrs_equal(dattr_cur, i, dattr_new, j))
                                 goto match1;
@@@ -7725,7 -7808,6 +7807,6 @@@ match1
   
         if (doms_new == NULL) {
                 ndoms_cur = 0;
-               ndoms_new = 1;
                 doms_new = &fallback_doms;
                 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
                 dattr_new = NULL;
@@@ -7762,8 -7844,13 +7843,13 @@@ match2
   int arch_reinit_sched_domains(void)
   {
         get_online_cpus();
+ 
+       /* Destroy domains first to force the rebuild */
+       partition_sched_domains(0, NULL, NULL);
+ 
         rebuild_sched_domains();
         put_online_cpus();
+ 
         return 0;
   }
   
@@@ -7847,7 -7934,7 +7933,7 @@@ static int update_sched_domains(struct 
         case CPU_ONLINE_FROZEN:
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
-               partition_sched_domains(0, NULL, NULL);
+               partition_sched_domains(1, NULL, NULL);
                 return NOTIFY_OK;
   
         default:
@@@ -8234,20 -8321,25 +8320,25 @@@ void __might_sleep(char *file, int line
   #ifdef in_atomic
         static unsigned long prev_jiffy;        /* ratelimiting */
   
-       if ((in_atomic() || irqs_disabled()) &&
-           system_state == SYSTEM_RUNNING && !oops_in_progress) {
-               if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-                       return;
-               prev_jiffy = jiffies;
-               printk(KERN_ERR "BUG: sleeping function called from invalid"
-                               " context at %s:%d\n", file, line);
-               printk("in_atomic():%d, irqs_disabled():%d\n",
-                       in_atomic(), irqs_disabled());
-               debug_show_held_locks(current);
-               if (irqs_disabled())
-                       print_irqtrace_events(current);
-               dump_stack();
-       }
+       if ((!in_atomic() && !irqs_disabled()) ||
+                   system_state != SYSTEM_RUNNING || oops_in_progress)
+               return;
+       if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+               return;
+       prev_jiffy = jiffies;
+ 
+       printk(KERN_ERR
+               "BUG: sleeping function called from invalid context at %s:%d\n",
+                       file, line);
+       printk(KERN_ERR
+               "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+                       in_atomic(), irqs_disabled(),
+                       current->pid, current->comm);
+ 
+       debug_show_held_locks(current);
+       if (irqs_disabled())
+               print_irqtrace_events(current);
+       dump_stack();
   #endif
   }
   EXPORT_SYMBOL(__might_sleep);
@@@ -8745,73 -8837,95 +8836,95 @@@ static DEFINE_MUTEX(rt_constraints_mute
   static unsigned long to_ratio(u64 period, u64 runtime)
   {
         if (runtime == RUNTIME_INF)
-               return 1ULL << 16;
+               return 1ULL << 20;
   
-       return div64_u64(runtime << 16, period);
+       return div64_u64(runtime << 20, period);
   }
   
- #ifdef CONFIG_CGROUP_SCHED
- static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+ /* Must be called with tasklist_lock held */
+ static inline int tg_has_rt_tasks(struct task_group *tg)
   {
-       struct task_group *tgi, *parent = tg->parent;
-       unsigned long total = 0;
+       struct task_struct *g, *p;
   
-       if (!parent) {
-               if (global_rt_period() < period)
-                       return 0;
+       do_each_thread(g, p) {
+               if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+                       return 1;
+       } while_each_thread(g, p);
   
-               return to_ratio(period, runtime) <
-                       to_ratio(global_rt_period(), global_rt_runtime());
-       }
+       return 0;
+ }
   
-       if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
-               return 0;
+ struct rt_schedulable_data {
+       struct task_group *tg;
+       u64 rt_period;
+       u64 rt_runtime;
+ };
   
-       rcu_read_lock();
-       list_for_each_entry_rcu(tgi, &parent->children, siblings) {
-               if (tgi == tg)
-                       continue;
+ static int tg_schedulable(struct task_group *tg, void *data)
+ {
+       struct rt_schedulable_data *d = data;
+       struct task_group *child;
+       unsigned long total, sum = 0;
+       u64 period, runtime;
+ 
+       period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+       runtime = tg->rt_bandwidth.rt_runtime;
   
-               total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-                               tgi->rt_bandwidth.rt_runtime);
+       if (tg == d->tg) {
+               period = d->rt_period;
+               runtime = d->rt_runtime;
         }
-       rcu_read_unlock();
   
-       return total + to_ratio(period, runtime) <=
-               to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
-                               parent->rt_bandwidth.rt_runtime);
- }
- #elif defined CONFIG_USER_SCHED
- static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
- {
-       struct task_group *tgi;
-       unsigned long total = 0;
-       unsigned long global_ratio =
-               to_ratio(global_rt_period(), global_rt_runtime());
+       /*
+        * Cannot have more runtime than the period.
+        */
+       if (runtime > period && runtime != RUNTIME_INF)
+               return -EINVAL;
   
-       rcu_read_lock();
-       list_for_each_entry_rcu(tgi, &task_groups, list) {
-               if (tgi == tg)
-                       continue;
+       /*
+        * Ensure we don't starve existing RT tasks.
+        */
+       if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+               return -EBUSY;
+ 
+       total = to_ratio(period, runtime);
+ 
+       /*
+        * Nobody can have more than the global setting allows.
+        */
+       if (total > to_ratio(global_rt_period(), global_rt_runtime()))
+               return -EINVAL;
   
-               total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-                               tgi->rt_bandwidth.rt_runtime);
+       /*
+        * The sum of our children's runtime should not exceed our own.
+        */
+       list_for_each_entry_rcu(child, &tg->children, siblings) {
+               period = ktime_to_ns(child->rt_bandwidth.rt_period);
+               runtime = child->rt_bandwidth.rt_runtime;
+ 
+               if (child == d->tg) {
+                       period = d->rt_period;
+                       runtime = d->rt_runtime;
+               }
+ 
+               sum += to_ratio(period, runtime);
         }
-       rcu_read_unlock();
   
-       return total + to_ratio(period, runtime) < global_ratio;
+       if (sum > total)
+               return -EINVAL;
+ 
+       return 0;
   }
- #endif
   
- /* Must be called with tasklist_lock held */
- static inline int tg_has_rt_tasks(struct task_group *tg)
+ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
   {
-       struct task_struct *g, *p;
-       do_each_thread(g, p) {
-               if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
-                       return 1;
-       } while_each_thread(g, p);
-       return 0;
+       struct rt_schedulable_data data = {
+               .tg = tg,
+               .rt_period = period,
+               .rt_runtime = runtime,
+       };
+ 
+       return walk_tg_tree(tg_schedulable, tg_nop, &data);
   }
   
   static int tg_set_bandwidth(struct task_group *tg,
@@@ -8821,14 -8935,9 +8934,9 @@@
   
         mutex_lock(&rt_constraints_mutex);
         read_lock(&tasklist_lock);
-       if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
-               err = -EBUSY;
-               goto unlock;
-       }
-       if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
-               err = -EINVAL;
+       err = __rt_schedulable(tg, rt_period, rt_runtime);
+       if (err)
                 goto unlock;
-       }
   
         spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
         tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@@ -8897,16 -9006,25 +9005,25 @@@ long sched_group_rt_period(struct task_
   
   static int sched_rt_global_constraints(void)
   {
-       struct task_group *tg = &root_task_group;
-       u64 rt_runtime, rt_period;
+       u64 runtime, period;
         int ret = 0;
   
-       rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-       rt_runtime = tg->rt_bandwidth.rt_runtime;
+       if (sysctl_sched_rt_period <= 0)
+               return -EINVAL;
+ 
+       runtime = global_rt_runtime();
+       period = global_rt_period();
+ 
+       /*
+        * Sanity check on the sysctl variables.
+        */
+       if (runtime > period && runtime != RUNTIME_INF)
+               return -EINVAL;
   
         mutex_lock(&rt_constraints_mutex);
-       if (!__rt_schedulable(tg, rt_period, rt_runtime))
-               ret = -EINVAL;
+       read_lock(&tasklist_lock);
+       ret = __rt_schedulable(NULL, 0, 0);
+       read_unlock(&tasklist_lock);
         mutex_unlock(&rt_constraints_mutex);
   
         return ret;
@@@ -8917,6 -9035,9 +9034,9 @@@ static int sched_rt_global_constraints(
         unsigned long flags;
         int i;
   
+       if (sysctl_sched_rt_period <= 0)
+               return -EINVAL;
+ 
         spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
         for_each_possible_cpu(i) {
                 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
@@@ -8977,7 -9098,6 +9097,6 @@@ cpu_cgroup_create(struct cgroup_subsys 
   
         if (!cgrp->parent) {
                 /* This is early initialization for the top cgroup */
-               init_task_group.css.cgroup = cgrp;
                 return &init_task_group.css;
         }
   
@@@ -8986,9 -9106,6 +9105,6 @@@
         if (IS_ERR(tg))
                 return ERR_PTR(-ENOMEM);
   
-       /* Bind the cgroup to task_group object we just created */
-       tg->css.cgroup = cgrp;
- 
         return &tg->css;
   }
   
diff --combined kernel/sys.c

index 1b96401a0576ab8b3d5b09641eabc3c53080b0e4,0bc8fa3c2288110b49fad4e9eaab2326f52c69f7..fc71f99fb469a22da88b08263e2609409566173b
--- 1/kernel/sys.c
--- 2/kernel/sys.c
+++ b/kernel/sys.c
@@@ -1060,9 -1060,7 +1060,7 @@@ asmlinkage long sys_setsid(void
         group_leader->signal->leader = 1;
         __set_special_pids(sid);
   
-       spin_lock(&group_leader->sighand->siglock);
-       group_leader->signal->tty = NULL;
-       spin_unlock(&group_leader->sighand->siglock);
+       proc_clear_tty(group_leader);
   
         err = session;
   out:
@@@ -1351,8 -1349,10 +1349,10 @@@ asmlinkage long sys_sethostname(char __
         down_write(&uts_sem);
         errno = -EFAULT;
         if (!copy_from_user(tmp, name, len)) {
-               memcpy(utsname()->nodename, tmp, len);
-               utsname()->nodename[len] = 0;
+               struct new_utsname *u = utsname();
+ 
+               memcpy(u->nodename, tmp, len);
+               memset(u->nodename + len, 0, sizeof(u->nodename) - len);
                 errno = 0;
         }
         up_write(&uts_sem);
@@@ -1364,15 -1364,17 +1364,17 @@@
   asmlinkage long sys_gethostname(char __user *name, int len)
   {
         int i, errno;
+       struct new_utsname *u;
   
         if (len < 0)
                 return -EINVAL;
         down_read(&uts_sem);
-       i = 1 + strlen(utsname()->nodename);
+       u = utsname();
+       i = 1 + strlen(u->nodename);
         if (i > len)
                 i = len;
         errno = 0;
-       if (copy_to_user(name, utsname()->nodename, i))
+       if (copy_to_user(name, u->nodename, i))
                 errno = -EFAULT;
         up_read(&uts_sem);
         return errno;
@@@ -1397,8 -1399,10 +1399,10 @@@ asmlinkage long sys_setdomainname(char 
         down_write(&uts_sem);
         errno = -EFAULT;
         if (!copy_from_user(tmp, name, len)) {
-               memcpy(utsname()->domainname, tmp, len);
-               utsname()->domainname[len] = 0;
+               struct new_utsname *u = utsname();
+ 
+               memcpy(u->domainname, tmp, len);
+               memset(u->domainname + len, 0, sizeof(u->domainname) - len);
                 errno = 0;
         }
         up_write(&uts_sem);
@@@ -1452,14 -1456,22 +1456,22 @@@ asmlinkage long sys_setrlimit(unsigned 
                 return -EINVAL;
         if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
                 return -EFAULT;
-       if (new_rlim.rlim_cur > new_rlim.rlim_max)
-               return -EINVAL;
         old_rlim = current->signal->rlim + resource;
         if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
             !capable(CAP_SYS_RESOURCE))
                 return -EPERM;
-       if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
-               return -EPERM;
+ 
+       if (resource == RLIMIT_NOFILE) {
+               if (new_rlim.rlim_max == RLIM_INFINITY)
+                       new_rlim.rlim_max = sysctl_nr_open;
+               if (new_rlim.rlim_cur == RLIM_INFINITY)
+                       new_rlim.rlim_cur = sysctl_nr_open;
+               if (new_rlim.rlim_max > sysctl_nr_open)
+                       return -EPERM;
+       }
+ 
+       if (new_rlim.rlim_cur > new_rlim.rlim_max)
+               return -EINVAL;
   
         retval = security_task_setrlimit(resource, &new_rlim);
         if (retval)
@@@ -1727,16 -1739,6 +1739,16 @@@ asmlinkage long sys_prctl(int option, u
                 case PR_SET_TSC:
                         error = SET_TSC_CTL(arg2);
                         break;
+ +              case PR_GET_TIMERSLACK:
+ +                      error = current->timer_slack_ns;
+ +                      break;
+ +              case PR_SET_TIMERSLACK:
+ +                      if (arg2 <= 0)
+ +                              current->timer_slack_ns =
+ +                                      current->default_timer_slack_ns;
+ +                      else
+ +                              current->timer_slack_ns = arg2;
+ +                      break;
                 default:
                         error = -EINVAL;
                         break;
diff --combined kernel/time/ntp.c

index 4c8d85421d24b84429be27e096611ad86a3e686d,1ad46f3df6e76cd8994403b1c1ca72c14ec3553b..9c114b726ab3353e75fa222e8597e52db9d34a82
--- 1/kernel/time/ntp.c
--- 2/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@@ -142,7 -142,8 +142,7 @@@ static enum hrtimer_restart ntp_leap_se
                 time_state = TIME_OOP;
                 printk(KERN_NOTICE "Clock: "
                        "inserting leap second 23:59:60 UTC\n");
- -              leap_timer.expires = ktime_add_ns(leap_timer.expires,
- -                                                NSEC_PER_SEC);
+ +              hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
                 res = HRTIMER_RESTART;
                 break;
         case TIME_DEL:
@@@ -244,7 -245,7 +244,7 @@@ static void sync_cmos_clock(unsigned lo
         if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
                 fail = update_persistent_clock(now);
   
-       next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec;
+       next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
         if (next.tv_nsec <= 0)
                 next.tv_nsec += NSEC_PER_SEC;
   
diff --combined kernel/time/tick-sched.c

index b33be61c0f6bec1ece99784ba42f508f1bcaedc5,b711ffcb106c906be19eab01140ec1e8cafd0063..a547be11cf976c9abba879b4fcd1c025c78ca78d
--- 1/kernel/time/tick-sched.c
--- 2/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/profile.h>
   #include <linux/sched.h>
   #include <linux/tick.h>
+ #include <linux/module.h>
   
   #include <asm/irq_regs.h>
   
@@@ -75,6 -76,9 +76,9 @@@ static void tick_do_update_jiffies64(kt
                                                            incr * ticks);
                 }
                 do_timer(++ticks);
+ 
+               /* Keep the tick_next_period variable up to date */
+               tick_next_period = ktime_add(last_jiffies_update, tick_period);
         }
         write_sequnlock(&xtime_lock);
   }
@@@ -187,9 -191,17 +191,17 @@@ u64 get_cpu_idle_time_us(int cpu, u64 *
   {
         struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
   
-       *last_update_time = ktime_to_us(ts->idle_lastupdate);
+       if (!tick_nohz_enabled)
+               return -1;
+ 
+       if (ts->idle_active)
+               *last_update_time = ktime_to_us(ts->idle_lastupdate);
+       else
+               *last_update_time = ktime_to_us(ktime_get());
+ 
         return ktime_to_us(ts->idle_sleeptime);
   }
+ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
   
   /**
    * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
@@@ -221,7 -233,7 +233,7 @@@ void tick_nohz_stop_sched_tick(int inid
          */
         if (unlikely(!cpu_online(cpu))) {
                 if (cpu == tick_do_timer_cpu)
-                       tick_do_timer_cpu = -1;
+                       tick_do_timer_cpu = TICK_DO_TIMER_NONE;
         }
   
         if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
@@@ -258,7 -270,7 +270,7 @@@
         next_jiffies = get_next_timer_interrupt(last_jiffies);
         delta_jiffies = next_jiffies - last_jiffies;
   
-       if (rcu_needs_cpu(cpu))
+       if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
                 delta_jiffies = 1;
         /*
          * Do not stop the tick, if we are only one off
@@@ -288,7 -300,7 +300,7 @@@
                                 goto out;
                         }
   
- -                      ts->idle_tick = ts->sched_timer.expires;
+ +                      ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
                         ts->tick_stopped = 1;
                         ts->idle_jiffies = last_jiffies;
                         rcu_enter_nohz();
@@@ -303,7 -315,7 +315,7 @@@
                  * invoked.
                  */
                 if (cpu == tick_do_timer_cpu)
-                       tick_do_timer_cpu = -1;
+                       tick_do_timer_cpu = TICK_DO_TIMER_NONE;
   
                 ts->idle_sleeps++;
   
@@@ -419,21 -431,21 +431,21 @@@ void tick_nohz_restart_sched_tick(void
         ts->tick_stopped  = 0;
         ts->idle_exittime = now;
         hrtimer_cancel(&ts->sched_timer);
- -      ts->sched_timer.expires = ts->idle_tick;
+ +      hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
   
         while (1) {
                 /* Forward the time to expire in the future */
                 hrtimer_forward(&ts->sched_timer, now, tick_period);
   
                 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
- -                      hrtimer_start(&ts->sched_timer,
- -                                    ts->sched_timer.expires,
+ +                      hrtimer_start_expires(&ts->sched_timer,
                                       HRTIMER_MODE_ABS);
                         /* Check, if the timer was already in the past */
                         if (hrtimer_active(&ts->sched_timer))
                                 break;
                 } else {
- -                      if (!tick_program_event(ts->sched_timer.expires, 0))
+ +                      if (!tick_program_event(
+ +                              hrtimer_get_expires(&ts->sched_timer), 0))
                                 break;
                 }
                 /* Update jiffies and reread time */
@@@ -446,7 -458,7 +458,7 @@@
   static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
   {
         hrtimer_forward(&ts->sched_timer, now, tick_period);
- -      return tick_program_event(ts->sched_timer.expires, 0);
+ +      return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
   }
   
   /*
@@@ -468,7 -480,7 +480,7 @@@ static void tick_nohz_handler(struct cl
          * this duty, then the jiffies update is still serialized by
          * xtime_lock.
          */
-       if (unlikely(tick_do_timer_cpu == -1))
+       if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
                 tick_do_timer_cpu = cpu;
   
         /* Check, if the jiffies need an update */
@@@ -529,7 -541,7 +541,7 @@@ static void tick_nohz_switch_to_nohz(vo
         next = tick_init_jiffy_update();
   
         for (;;) {
- -              ts->sched_timer.expires = next;
+ +              hrtimer_set_expires(&ts->sched_timer, next);
                 if (!tick_program_event(next, 0))
                         break;
                 next = ktime_add(next, tick_period);
@@@ -570,7 -582,7 +582,7 @@@ static enum hrtimer_restart tick_sched_
          * this duty, then the jiffies update is still serialized by
          * xtime_lock.
          */
-       if (unlikely(tick_do_timer_cpu == -1))
+       if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
                 tick_do_timer_cpu = cpu;
   #endif
   
@@@ -622,18 -634,19 +634,18 @@@ void tick_setup_sched_timer(void
          */
         hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
         ts->sched_timer.function = tick_sched_timer;
-       ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
   
         /* Get the next period (per cpu) */
- -      ts->sched_timer.expires = tick_init_jiffy_update();
+ +      hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
         offset = ktime_to_ns(tick_period) >> 1;
         do_div(offset, num_possible_cpus());
         offset *= smp_processor_id();
- -      ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
+ +      hrtimer_add_expires_ns(&ts->sched_timer, offset);
   
         for (;;) {
                 hrtimer_forward(&ts->sched_timer, now, tick_period);
- -              hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
- -                            HRTIMER_MODE_ABS);
+ +              hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
                 /* Check, if the timer was already in the past */
                 if (hrtimer_active(&ts->sched_timer))
                         break;
author	Arjan van de Ven <arjan@linux.intel.com>
	Fri, 17 Oct 2008 16:20:26 +0000 (09:20 -0700)
committer	Arjan van de Ven <arjan@linux.intel.com>
	Fri, 17 Oct 2008 16:20:26 +0000 (09:20 -0700)
		1	2
arch/ia64/kvm/kvm-ia64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/i8254.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/lapic.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/s390/crypto/ap_bus.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/compat.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/hrtimer.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/time.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/hrtimer.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/posix-timers.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sys.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/ntp.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-sched.c	patch \|	diff1 \|	diff2 \|	blob \| history