Merge branch 'linus' into timers/nohz
authorIngo Molnar <mingo@elte.hu>
Fri, 18 Jul 2008 17:53:16 +0000 (19:53 +0200)
committerIngo Molnar <mingo@elte.hu>
Fri, 18 Jul 2008 17:53:16 +0000 (19:53 +0200)
1  2 
arch/arm/kernel/process.c
arch/mips/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
kernel/softirq.c
kernel/time/tick-sched.c

index 84f5a4c778fb13815303de4a8057a2ce5c779e0f,199b3680118bc4c5f09c4ee300a41627830d587a..89bfded70a1f480ced698568a9d5fd685c48a17b
@@@ -133,10 -133,8 +133,8 @@@ static void default_idle(void
                cpu_relax();
        else {
                local_irq_disable();
-               if (!need_resched()) {
-                       timer_dyn_reprogram();
+               if (!need_resched())
                        arch_idle();
-               }
                local_irq_enable();
        }
  }
@@@ -164,7 -162,7 +162,7 @@@ void cpu_idle(void
                if (!idle)
                        idle = default_idle;
                leds_event(led_idle_start);
 -              tick_nohz_stop_sched_tick();
 +              tick_nohz_stop_sched_tick(1);
                while (!need_resched())
                        idle();
                leds_event(led_idle_end);
index bdead3aad2537902cb35e75675ffe66fd08d7236,c06f5b5d764cdb6779ad93aa453732c8218397b4..b16facd9ea8ecae7b27116952090beca25450c78
@@@ -53,7 -53,7 +53,7 @@@ void __noreturn cpu_idle(void
  {
        /* endless idle loop with no priority at all */
        while (1) {
 -              tick_nohz_stop_sched_tick();
 +              tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
  #ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
                        extern void smtc_idle_loop_hook(void);
@@@ -125,13 -125,6 +125,6 @@@ int copy_thread(int nr, unsigned long c
        *childregs = *regs;
        childregs->regs[7] = 0; /* Clear error flag */
  
- #if defined(CONFIG_BINFMT_IRIX)
-       if (current->personality != PER_LINUX) {
-               /* Under IRIX things are a little different. */
-               childregs->regs[3] = 1;
-               regs->regs[3] = 0;
-       }
- #endif
        childregs->regs[2] = 0; /* Child gets zero as return value */
        regs->regs[2] = p->pid;
  
index 1f5fa1cf16ddff5a1957e649847ef7e010187fb0,0c3927accb0054b71c7de9eb828a93559232737e..53bc653ed5ca52bab902dde3d9efbc0e1630eb6c
  
  asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
  
- static int hlt_counter;
- unsigned long boot_option_idle_override = 0;
- EXPORT_SYMBOL(boot_option_idle_override);
  DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
  EXPORT_PER_CPU_SYMBOL(current_task);
  
@@@ -77,57 -72,24 +72,24 @@@ unsigned long thread_saved_pc(struct ta
        return ((unsigned long *)tsk->thread.sp)[3];
  }
  
- /*
-  * Powermanagement idle function, if any..
-  */
- void (*pm_idle)(void);
- EXPORT_SYMBOL(pm_idle);
+ #ifdef CONFIG_HOTPLUG_CPU
+ #include <asm/nmi.h>
  
void disable_hlt(void)
static void cpu_exit_clear(void)
  {
-       hlt_counter++;
- }
+       int cpu = raw_smp_processor_id();
  
EXPORT_SYMBOL(disable_hlt);
      idle_task_exit();
  
- void enable_hlt(void)
- {
-       hlt_counter--;
- }
+       cpu_uninit();
+       irq_ctx_exit(cpu);
  
- EXPORT_SYMBOL(enable_hlt);
+       cpu_clear(cpu, cpu_callout_map);
+       cpu_clear(cpu, cpu_callin_map);
  
- /*
-  * We use this if we don't have any better
-  * idle routine..
-  */
- void default_idle(void)
- {
-       if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
-               current_thread_info()->status &= ~TS_POLLING;
-               /*
-                * TS_POLLING-cleared state must be visible before we
-                * test NEED_RESCHED:
-                */
-               smp_mb();
-               if (!need_resched())
-                       safe_halt();    /* enables interrupts racelessly */
-               else
-                       local_irq_enable();
-               current_thread_info()->status |= TS_POLLING;
-       } else {
-               local_irq_enable();
-               /* loop is done by the caller */
-               cpu_relax();
-       }
+       numa_remove_cpu(cpu);
  }
- #ifdef CONFIG_APM_MODULE
- EXPORT_SYMBOL(default_idle);
- #endif
  
- #ifdef CONFIG_HOTPLUG_CPU
- #include <asm/nmi.h>
  /* We don't actually take CPU down, just spin without interrupts. */
  static inline void play_dead(void)
  {
@@@ -166,26 -128,24 +128,24 @@@ void cpu_idle(void
  
        /* endless idle loop with no priority at all */
        while (1) {
 -              tick_nohz_stop_sched_tick();
 +              tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
-                       void (*idle)(void);
  
                        check_pgt_cache();
                        rmb();
-                       idle = pm_idle;
  
                        if (rcu_pending(cpu))
                                rcu_check_callbacks(cpu, 0);
  
-                       if (!idle)
-                               idle = default_idle;
                        if (cpu_is_offline(cpu))
                                play_dead();
  
                        local_irq_disable();
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
-                       idle();
+                       /* Don't trace irqs off for idle */
+                       stop_critical_timings();
+                       pm_idle();
+                       start_critical_timings();
                }
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
@@@ -333,6 -293,7 +293,7 @@@ void flush_thread(void
        /*
         * Forget coprocessor state..
         */
+       tsk->fpu_counter = 0;
        clear_fpu(tsk);
        clear_used_math();
  }
@@@ -649,8 -610,11 +610,11 @@@ struct task_struct * __switch_to(struc
        /* If the task has used fpu the last 5 timeslices, just do a full
         * restore of the math state immediately to avoid the trap; the
         * chances of needing FPU soon are obviously high now
+        *
+        * tsk_used_math() checks prevent calling math_state_restore(),
+        * which can sleep in the case of !tsk_used_math()
         */
-       if (next_p->fpu_counter > 5)
+       if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
                math_state_restore();
  
        /*
index c0a5c2a687e659fa20e23f47dcd171ed76b6a2f4,a8e53626ac9aaf5fc8290908aaf42552556a1b11..9a10c1897921e84ae6851a141d375eef16037543
@@@ -56,15 -56,6 +56,6 @@@ asmlinkage extern void ret_from_fork(vo
  
  unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
  
- unsigned long boot_option_idle_override = 0;
- EXPORT_SYMBOL(boot_option_idle_override);
- /*
-  * Powermanagement idle function, if any..
-  */
- void (*pm_idle)(void);
- EXPORT_SYMBOL(pm_idle);
  static ATOMIC_NOTIFIER_HEAD(idle_notifier);
  
  void idle_notifier_register(struct notifier_block *n)
@@@ -94,25 -85,6 +85,6 @@@ void exit_idle(void
        __exit_idle();
  }
  
- /*
-  * We use this if we don't have any better
-  * idle routine..
-  */
- void default_idle(void)
- {
-       current_thread_info()->status &= ~TS_POLLING;
-       /*
-        * TS_POLLING-cleared state must be visible before we
-        * test NEED_RESCHED:
-        */
-       smp_mb();
-       if (!need_resched())
-               safe_halt();    /* enables interrupts racelessly */
-       else
-               local_irq_enable();
-       current_thread_info()->status |= TS_POLLING;
- }
  #ifdef CONFIG_HOTPLUG_CPU
  DECLARE_PER_CPU(int, cpu_state);
  
@@@ -148,14 -120,11 +120,11 @@@ void cpu_idle(void
        current_thread_info()->status |= TS_POLLING;
        /* endless idle loop with no priority at all */
        while (1) {
 -              tick_nohz_stop_sched_tick();
 +              tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
-                       void (*idle)(void);
  
                        rmb();
-                       idle = pm_idle;
-                       if (!idle)
-                               idle = default_idle;
                        if (cpu_is_offline(smp_processor_id()))
                                play_dead();
                        /*
                         */
                        local_irq_disable();
                        enter_idle();
-                       idle();
+                       /* Don't trace irqs off for idle */
+                       stop_critical_timings();
+                       pm_idle();
+                       start_critical_timings();
                        /* In many cases the interrupt that ended idle
                           has already called exit_idle. But some idle
                           loops can be woken up without interrupt. */
@@@ -294,6 -266,7 +266,7 @@@ void flush_thread(void
        /*
         * Forget coprocessor state..
         */
+       tsk->fpu_counter = 0;
        clear_fpu(tsk);
        clear_used_math();
  }
@@@ -365,10 -338,10 +338,10 @@@ int copy_thread(int nr, unsigned long c
        p->thread.fs = me->thread.fs;
        p->thread.gs = me->thread.gs;
  
-       asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
-       asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
-       asm("mov %%es,%0" : "=m" (p->thread.es));
-       asm("mov %%ds,%0" : "=m" (p->thread.ds));
+       savesegment(gs, p->thread.gsindex);
+       savesegment(fs, p->thread.fsindex);
+       savesegment(es, p->thread.es);
+       savesegment(ds, p->thread.ds);
  
        if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
                p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@@ -407,7 -380,9 +380,9 @@@ out
  void
  start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
  {
-       asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
+       loadsegment(fs, 0);
+       loadsegment(es, 0);
+       loadsegment(ds, 0);
        load_gs_index(0);
        regs->ip                = new_ip;
        regs->sp                = new_sp;
@@@ -566,6 -541,7 +541,7 @@@ __switch_to(struct task_struct *prev_p
                                 *next = &next_p->thread;
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       unsigned fsindex, gsindex;
  
        /* we're going to use this soon, after a few expensive things */
        if (next_p->fpu_counter>5)
         * Switch DS and ES.
         * This won't pick up thread selector changes, but I guess that is ok.
         */
-       asm volatile("mov %%es,%0" : "=m" (prev->es));
+       savesegment(es, prev->es);
        if (unlikely(next->es | prev->es))
                loadsegment(es, next->es); 
-       
-       asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
+       savesegment(ds, prev->ds);
        if (unlikely(next->ds | prev->ds))
                loadsegment(ds, next->ds);
  
+       /* We must save %fs and %gs before load_TLS() because
+        * %fs and %gs may be cleared by load_TLS().
+        *
+        * (e.g. xen_load_tls())
+        */
+       savesegment(fs, fsindex);
+       savesegment(gs, gsindex);
        load_TLS(next, cpu);
  
+       /*
+        * Leave lazy mode, flushing any hypercalls made here.
+        * This must be done before restoring TLS segments so
+        * the GDT and LDT are properly updated, and must be
+        * done before math_state_restore, so the TS bit is up
+        * to date.
+        */
+       arch_leave_lazy_cpu_mode();
        /* 
         * Switch FS and GS.
         */
        { 
-               unsigned fsindex;
-               asm volatile("movl %%fs,%0" : "=r" (fsindex)); 
                /* segment register != 0 always requires a reload. 
                   also reload when it has changed. 
                   when prev process used 64bit base always reload
                if (next->fs) 
                        wrmsrl(MSR_FS_BASE, next->fs); 
                prev->fsindex = fsindex;
-       }
-       { 
-               unsigned gsindex;
-               asm volatile("movl %%gs,%0" : "=r" (gsindex)); 
                if (unlikely(gsindex | next->gsindex | prev->gs)) {
                        load_gs_index(next->gsindex);
                        if (gsindex)
        /* If the task has used fpu the last 5 timeslices, just do a full
         * restore of the math state immediately to avoid the trap; the
         * chances of needing FPU soon are obviously high now
+        *
+        * tsk_used_math() checks prevent calling math_state_restore(),
+        * which can sleep in the case of !tsk_used_math()
         */
-       if (next_p->fpu_counter>5)
+       if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
                math_state_restore();
        return prev_p;
  }
@@@ -794,7 -786,7 +786,7 @@@ long do_arch_prctl(struct task_struct *
                        set_32bit_tls(task, FS_TLS, addr);
                        if (doit) {
                                load_TLS(&task->thread, cpu);
-                               asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
+                               loadsegment(fs, FS_TLS_SEL);
                        }
                        task->thread.fsindex = FS_TLS_SEL;
                        task->thread.fs = 0;
                        if (doit) {
                                /* set the selector to 0 to not confuse
                                   __switch_to */
-                               asm volatile("movl %0,%%fs" :: "r" (0));
+                               loadsegment(fs, 0);
                                ret = checking_wrmsrl(MSR_FS_BASE, addr);
                        }
                }
                if (task->thread.gsindex == GS_TLS_SEL)
                        base = read_32bit_tls(task, GS_TLS);
                else if (doit) {
-                       asm("movl %%gs,%0" : "=r" (gsindex));
+                       savesegment(gs, gsindex);
                        if (gsindex)
                                rdmsrl(MSR_KERNEL_GS_BASE, base);
                        else
diff --combined kernel/softirq.c
index 05f248039d77184527c4912ce8ab527a5689e164,81e2fe0f983a04d1407a77e214b8f0612feedf9a..f6b03d56c2bf130d81a4cda68b193267ef6b7466
@@@ -131,23 -131,17 +131,17 @@@ void _local_bh_enable(void
  
  EXPORT_SYMBOL(_local_bh_enable);
  
void local_bh_enable(void)
static inline void _local_bh_enable_ip(unsigned long ip)
  {
+       WARN_ON_ONCE(in_irq() || irqs_disabled());
  #ifdef CONFIG_TRACE_IRQFLAGS
-       unsigned long flags;
-       WARN_ON_ONCE(in_irq());
- #endif
-       WARN_ON_ONCE(irqs_disabled());
- #ifdef CONFIG_TRACE_IRQFLAGS
-       local_irq_save(flags);
+       local_irq_disable();
  #endif
        /*
         * Are softirqs going to be turned on now:
         */
        if (softirq_count() == SOFTIRQ_OFFSET)
-               trace_softirqs_on((unsigned long)__builtin_return_address(0));
+               trace_softirqs_on(ip);
        /*
         * Keep preemption disabled until we are done with
         * softirq processing:
  
        dec_preempt_count();
  #ifdef CONFIG_TRACE_IRQFLAGS
-       local_irq_restore(flags);
+       local_irq_enable();
  #endif
        preempt_check_resched();
  }
+ void local_bh_enable(void)
+ {
+       _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+ }
  EXPORT_SYMBOL(local_bh_enable);
  
  void local_bh_enable_ip(unsigned long ip)
  {
- #ifdef CONFIG_TRACE_IRQFLAGS
-       unsigned long flags;
-       WARN_ON_ONCE(in_irq());
-       local_irq_save(flags);
- #endif
-       /*
-        * Are softirqs going to be turned on now:
-        */
-       if (softirq_count() == SOFTIRQ_OFFSET)
-               trace_softirqs_on(ip);
-       /*
-        * Keep preemption disabled until we are done with
-        * softirq processing:
-        */
-       sub_preempt_count(SOFTIRQ_OFFSET - 1);
-       if (unlikely(!in_interrupt() && local_softirq_pending()))
-               do_softirq();
-       dec_preempt_count();
- #ifdef CONFIG_TRACE_IRQFLAGS
-       local_irq_restore(flags);
- #endif
-       preempt_check_resched();
+       _local_bh_enable_ip(ip);
  }
  EXPORT_SYMBOL(local_bh_enable_ip);
  
@@@ -312,7 -286,7 +286,7 @@@ void irq_exit(void
  #ifdef CONFIG_NO_HZ
        /* Make sure that timer wheel updates are propagated */
        if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
 -              tick_nohz_stop_sched_tick();
 +              tick_nohz_stop_sched_tick(0);
        rcu_irq_exit();
  #endif
        preempt_enable_no_resched();
@@@ -347,9 -321,8 +321,8 @@@ void raise_softirq(unsigned int nr
        local_irq_restore(flags);
  }
  
- void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+ void open_softirq(int nr, void (*action)(struct softirq_action *))
  {
-       softirq_vec[nr].data = data;
        softirq_vec[nr].action = action;
  }
  
@@@ -360,10 -333,8 +333,8 @@@ struct tasklet_hea
        struct tasklet_struct **tail;
  };
  
- /* Some compilers disobey section attribute on statics when not
-    initialized -- RR */
- static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
- static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
+ static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
+ static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  
  void __tasklet_schedule(struct tasklet_struct *t)
  {
@@@ -503,8 -474,8 +474,8 @@@ void __init softirq_init(void
                        &per_cpu(tasklet_hi_vec, cpu).head;
        }
  
-       open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
-       open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+       open_softirq(TASKLET_SOFTIRQ, tasklet_action);
+       open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  }
  
  static int ksoftirqd(void * __bind_cpu)
@@@ -645,7 -616,7 +616,7 @@@ static int __cpuinit cpu_callback(struc
  
                p = per_cpu(ksoftirqd, hotcpu);
                per_cpu(ksoftirqd, hotcpu) = NULL;
-               sched_setscheduler(p, SCHED_FIFO, &param);
+               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
                kthread_stop(p);
                takeover_tasklets(hotcpu);
                break;
@@@ -674,12 -645,12 +645,12 @@@ __init int spawn_ksoftirqd(void
  /*
   * Call a function on all processors
   */
- int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+ int on_each_cpu(void (*func) (void *info), void *info, int wait)
  {
        int ret = 0;
  
        preempt_disable();
-       ret = smp_call_function(func, info, retry, wait);
+       ret = smp_call_function(func, info, wait);
        local_irq_disable();
        func(info);
        local_irq_enable();
diff --combined kernel/time/tick-sched.c
index ee962d11107b67c46ce097a96c576823b8190a62,beef7ccdf842f3cea9994c19b879baa06f3e94f5..a5c26d2b132307bd7199e0f56e3a07b3578c2dcb
@@@ -195,7 -195,7 +195,7 @@@ u64 get_cpu_idle_time_us(int cpu, u64 *
   * Called either from the idle loop or from irq_exit() when an idle period was
   * just interrupted by an interrupt which did not cause a reschedule.
   */
 -void tick_nohz_stop_sched_tick(void)
 +void tick_nohz_stop_sched_tick(int inidle)
  {
        unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
        struct tick_sched *ts;
        if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
                goto end;
  
 +      if (!inidle && !ts->inidle)
 +              goto end;
 +
 +      ts->inidle = 1;
 +
        if (need_resched())
                goto end;
  
                        ts->tick_stopped = 1;
                        ts->idle_jiffies = last_jiffies;
                        rcu_enter_nohz();
+                       sched_clock_tick_stop(cpu);
                }
  
                /*
@@@ -377,20 -373,18 +378,21 @@@ void tick_nohz_restart_sched_tick(void
        local_irq_disable();
        tick_nohz_stop_idle(cpu);
  
 -      if (!ts->tick_stopped) {
 +      if (!ts->inidle || !ts->tick_stopped) {
 +              ts->inidle = 0;
                local_irq_enable();
                return;
        }
  
 +      ts->inidle = 0;
 +
        rcu_exit_nohz();
  
        /* Update jiffies first */
        select_nohz_load_balancer(0);
        now = ktime_get();
        tick_do_update_jiffies64(now);
+       sched_clock_tick_start(cpu);
        cpu_clear(cpu, nohz_cpu_mask);
  
        /*