nohz: prevent tick stop outside of the idle loop
authorThomas Gleixner <tglx@linutronix.de>
Fri, 18 Jul 2008 15:27:28 +0000 (17:27 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Fri, 18 Jul 2008 16:10:28 +0000 (18:10 +0200)
Jack Ren and Eric Miao tracked down the following long standing
problem in the NOHZ code:

scheduler switch to idle task
enable interrupts

Window starts here

----> interrupt happens (does not set NEED_RESCHED)
       irq_exit() stops the tick

----> interrupt happens (does set NEED_RESCHED)

return from schedule()

cpu_idle(): preempt_disable();

Window ends here

The interrupts can happen at any point inside the race window. The
first interrupt stops the tick, the second one causes the scheduler to
rerun and switch away from idle again and we end up with the tick
disabled.

The fact that it needs two interrupts where the first one does not set
NEED_RESCHED and the second one does made the bug obscure and extremly
hard to reproduce and analyse. Kudos to Jack and Eric.

Solution: Limit the NOHZ functionality to the idle loop to make sure
that we can not run into such a situation ever again.

cpu_idle()
{
preempt_disable();

while(1) {
 tick_nohz_stop_sched_tick(1); <- tell NOHZ code that we
            are in the idle loop

 while (!need_resched())
       halt();

 tick_nohz_restart_sched_tick(); <- disables NOHZ mode
 preempt_enable_no_resched();
 schedule();
 preempt_disable();
}
}

In hindsight we should have done this forever, but ...

/me grabs a large brown paperbag.

Debugged-by: Jack Ren <jack.ren@marvell.com>,
Debugged-by: eric miao <eric.y.miao@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
14 files changed:
arch/arm/kernel/process.c
arch/avr32/kernel/process.c
arch/blackfin/kernel/process.c
arch/mips/kernel/process.c
arch/powerpc/kernel/idle.c
arch/powerpc/platforms/iseries/setup.c
arch/sh/kernel/process_32.c
arch/sparc64/kernel/process.c
arch/um/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
include/linux/tick.h
kernel/softirq.c
kernel/time/tick-sched.c

index 46bf2ede612840b84eb3f7994c4cc85ae0d4c076..84f5a4c778fb13815303de4a8057a2ce5c779e0f 100644 (file)
@@ -164,7 +164,7 @@ void cpu_idle(void)
                if (!idle)
                        idle = default_idle;
                leds_event(led_idle_start);
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched())
                        idle();
                leds_event(led_idle_end);
index 6cf9df1762743a29813db51b5d82d5ba8f6acedf..ff820a9e743a01c3d5fdd7bd61649fec474bdad3 100644 (file)
@@ -31,7 +31,7 @@ void cpu_idle(void)
 {
        /* endless idle loop with no priority at all */
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched())
                        cpu_idle_sleep();
                tick_nohz_restart_sched_tick();
index 53c2cd255441ff5a23a55ce9cc8e5702ef1ec966..77800dd83e578d49844edaceaec7fc4f3bcfe728 100644 (file)
@@ -105,7 +105,7 @@ void cpu_idle(void)
 #endif
                if (!idle)
                        idle = default_idle;
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched())
                        idle();
                tick_nohz_restart_sched_tick();
index 2c09a442e5e5939160843570e60e829a5995fe32..bdead3aad2537902cb35e75675ffe66fd08d7236 100644 (file)
@@ -53,7 +53,7 @@ void __noreturn cpu_idle(void)
 {
        /* endless idle loop with no priority at all */
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
 #ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
                        extern void smtc_idle_loop_hook(void);
index c3cf0e8f3ac173332a01148946ace8c43f45a5a5..d308a9f70f1b81b970ef0f13df10454f0deee3d9 100644 (file)
@@ -60,7 +60,7 @@ void cpu_idle(void)
 
        set_thread_flag(TIF_POLLING_NRFLAG);
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched() && !cpu_should_die()) {
                        ppc64_runlatch_off();
 
index b72120751bbed16460b9f3c87c3e849b59228c2c..70b688c1aefb9f7512c74d0d20d1583454328c70 100644 (file)
@@ -561,7 +561,7 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched() && !hvlpevent_is_pending()) {
                        local_irq_disable();
                        ppc64_runlatch_off();
@@ -591,7 +591,7 @@ static void iseries_dedicated_idle(void)
        set_thread_flag(TIF_POLLING_NRFLAG);
 
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                if (!need_resched()) {
                        while (!need_resched()) {
                                ppc64_runlatch_off();
index b98e37a1f54c05229c18eb4d865ff0e6e0e26437..921892c351dac9884482516331b20dd1a5a068bf 100644 (file)
@@ -86,7 +86,7 @@ void cpu_idle(void)
                if (!idle)
                        idle = default_idle;
 
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched())
                        idle();
                tick_nohz_restart_sched_tick();
index 2084f81a76e1d49319af047470f9348977d9968e..0798928ba3612cdeca93cfe35c29de4b99b9ca3b 100644 (file)
@@ -97,7 +97,7 @@ void cpu_idle(void)
        set_thread_flag(TIF_POLLING_NRFLAG);
 
        while(1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
 
                while (!need_resched() && !cpu_is_offline(cpu))
                        sparc64_yield(cpu);
index 83603cfbde819ce736a034d414361ea71c24cca7..a1c6d07cac3e46b2f3460e04729cf6a2ca203571 100644 (file)
@@ -243,7 +243,7 @@ void default_idle(void)
                if (need_resched())
                        schedule();
 
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                nsecs = disable_timer();
                idle_sleep(nsecs);
                tick_nohz_restart_sched_tick();
index f8476dfbb60d9818f5da08d3a5b95a223930964f..1f5fa1cf16ddff5a1957e649847ef7e010187fb0 100644 (file)
@@ -166,7 +166,7 @@ void cpu_idle(void)
 
        /* endless idle loop with no priority at all */
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
                        void (*idle)(void);
 
index e2319f39988b042f364a73174a0d2ac254a5b60d..c0a5c2a687e659fa20e23f47dcd171ed76b6a2f4 100644 (file)
@@ -148,7 +148,7 @@ void cpu_idle(void)
        current_thread_info()->status |= TS_POLLING;
        /* endless idle loop with no priority at all */
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
                        void (*idle)(void);
 
index a881c652f7e925cb8e3d2f296c492800c3f163eb..d3c02695dc5d7e0826a7fa6ac4eb27bfbd937374 100644 (file)
@@ -49,6 +49,7 @@ struct tick_sched {
        unsigned long                   check_clocks;
        enum tick_nohz_mode             nohz_mode;
        ktime_t                         idle_tick;
+       int                             inidle;
        int                             tick_stopped;
        unsigned long                   idle_jiffies;
        unsigned long                   idle_calls;
@@ -105,14 +106,14 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-extern void tick_nohz_stop_sched_tick(void);
+extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
-static inline void tick_nohz_stop_sched_tick(void) { }
+static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
 static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
index 36e0617400470f398700376c7fa3359d768e7e58..05f248039d77184527c4912ce8ab527a5689e164 100644 (file)
@@ -312,7 +312,7 @@ void irq_exit(void)
 #ifdef CONFIG_NO_HZ
        /* Make sure that timer wheel updates are propagated */
        if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(0);
        rcu_irq_exit();
 #endif
        preempt_enable_no_resched();
index 86baa4f0dfe4ba157403a614393cc5fbcc4d983f..ee962d11107b67c46ce097a96c576823b8190a62 100644 (file)
@@ -195,7 +195,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
  * Called either from the idle loop or from irq_exit() when an idle period was
  * just interrupted by an interrupt which did not cause a reschedule.
  */
-void tick_nohz_stop_sched_tick(void)
+void tick_nohz_stop_sched_tick(int inidle)
 {
        unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
        struct tick_sched *ts;
@@ -224,6 +224,11 @@ void tick_nohz_stop_sched_tick(void)
        if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
                goto end;
 
+       if (!inidle && !ts->inidle)
+               goto end;
+
+       ts->inidle = 1;
+
        if (need_resched())
                goto end;
 
@@ -372,11 +377,14 @@ void tick_nohz_restart_sched_tick(void)
        local_irq_disable();
        tick_nohz_stop_idle(cpu);
 
-       if (!ts->tick_stopped) {
+       if (!ts->inidle || !ts->tick_stopped) {
+               ts->inidle = 0;
                local_irq_enable();
                return;
        }
 
+       ts->inidle = 0;
+
        rcu_exit_nohz();
 
        /* Update jiffies first */