Merge branch 'linus' into cpumask-for-linus

author Ingo Molnar <mingo@elte.hu>

Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)

committer Ingo Molnar <mingo@elte.hu>

Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
author Ingo Molnar <mingo@elte.hu>
Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
committer Ingo Molnar <mingo@elte.hu>
Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
diff --combined arch/x86/include/asm/processor.h

index 9874dd98a29f41f1f0a53cf7f7af56f225f81c8b,ae85a8d66a30601a1a22c3b6b84f3df6d3056477..34c52370f2fe848a2e7c9dcb73fdc4a3048c087f
--- 1/arch/x86/include/asm/processor.h
--- 2/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@@ -75,9 -75,9 +75,9 @@@ struct cpuinfo_x86 
   #else
         /* Number of 4K pages in DTLB/ITLB combined(in pages): */
         int                     x86_tlbsize;
+ #endif
         __u8                    x86_virt_bits;
         __u8                    x86_phys_bits;
- #endif
         /* CPUID returned core id bits: */
         __u8                    x86_coreid_bits;
         /* Max extended CPUID function supported: */
@@@ -94,7 -94,7 +94,7 @@@
         unsigned long           loops_per_jiffy;
   #ifdef CONFIG_SMP
         /* cpus sharing the last level cache: */
- -      cpumask_t               llc_shared_map;
+ +      cpumask_var_t           llc_shared_map;
   #endif
         /* cpuid returned max cores value: */
         u16                      x86_max_cores;
@@@ -391,6 -391,9 +391,9 @@@ DECLARE_PER_CPU(union irq_stack_union, 
   DECLARE_INIT_PER_CPU(irq_stack_union);
   
   DECLARE_PER_CPU(char *, irq_stack_ptr);
+ DECLARE_PER_CPU(unsigned int, irq_count);
+ extern unsigned long kernel_eflags;
+ extern asmlinkage void ignore_sysret(void);
   #else /* X86_64 */
   #ifdef CONFIG_CC_STACKPROTECTOR
   DECLARE_PER_CPU(unsigned long, stack_canary);
@@@ -733,7 -736,6 +736,7 @@@ static inline void __sti_mwait(unsigne
   extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
   
   extern void select_idle_routine(const struct cpuinfo_x86 *c);
+ +extern void init_c1e_mask(void);
   
   extern unsigned long          boot_option_idle_override;
   extern unsigned long          idle_halt;
diff --combined arch/x86/kernel/cpu/common.c

index d7dd3c294e2a30669ae3cdf6faf3563d89a55c42,e2962cc1e27b742965f6af45a8cfdcf9b4c1a0b8..c4f667896c28f1a3eb7e6ae33341f5ba98f74c98
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -1,50 -1,52 +1,50 @@@
- #include <linux/init.h>
- #include <linux/kernel.h>
- #include <linux/sched.h>
- #include <linux/string.h>
   #include <linux/bootmem.h>
+ #include <linux/linkage.h>
   #include <linux/bitops.h>
+ #include <linux/kernel.h>
   #include <linux/module.h>
- #include <linux/kgdb.h>
- #include <linux/topology.h>
+ #include <linux/percpu.h>
+ #include <linux/string.h>
   #include <linux/delay.h>
+ #include <linux/sched.h>
+ #include <linux/init.h>
+ #include <linux/kgdb.h>
   #include <linux/smp.h>
- #include <linux/percpu.h>
- #include <asm/i387.h>
- #include <asm/msr.h>
- #include <asm/io.h>
- #include <asm/linkage.h>
+ #include <linux/io.h>
+ 
+ #include <asm/stackprotector.h>
   #include <asm/mmu_context.h>
+ #include <asm/hypervisor.h>
+ #include <asm/processor.h>
+ #include <asm/sections.h>
+ #include <asm/topology.h>
+ #include <asm/cpumask.h>
+ #include <asm/pgtable.h>
+ #include <asm/atomic.h>
+ #include <asm/proto.h>
+ #include <asm/setup.h>
+ #include <asm/apic.h>
+ #include <asm/desc.h>
+ #include <asm/i387.h>
   #include <asm/mtrr.h>
+ #include <asm/numa.h>
+ #include <asm/asm.h>
+ #include <asm/cpu.h>
   #include <asm/mce.h>
+ #include <asm/msr.h>
   #include <asm/pat.h>
- #include <asm/asm.h>
- #include <asm/numa.h>
   #include <asm/smp.h>
- #include <asm/cpu.h>
- #include <asm/cpumask.h>
- #include <asm/apic.h>
   
   #ifdef CONFIG_X86_LOCAL_APIC
   #include <asm/uv/uv.h>
   #endif
   
- #include <asm/pgtable.h>
- #include <asm/processor.h>
- #include <asm/desc.h>
- #include <asm/atomic.h>
- #include <asm/proto.h>
- #include <asm/sections.h>
- #include <asm/setup.h>
- #include <asm/hypervisor.h>
- #include <asm/stackprotector.h>
- 
   #include "cpu.h"
   
- -#ifdef CONFIG_X86_64
- -
   /* all of these masks are initialized in setup_cpu_local_masks() */
- cpumask_var_t cpu_callin_mask;
- cpumask_var_t cpu_callout_mask;
   cpumask_var_t cpu_initialized_mask;
+ cpumask_var_t cpu_callout_mask;
+ cpumask_var_t cpu_callin_mask;
   
   /* representing cpus for which sibling maps can be computed */
   cpumask_var_t cpu_sibling_setup_mask;
@@@ -58,7 -60,17 +58,7 @@@ void __init setup_cpu_local_masks(void
         alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
   }
   
- static struct cpu_dev *this_cpu __cpuinitdata;
- -#else /* CONFIG_X86_32 */
- -
- -cpumask_t cpu_sibling_setup_map;
- -cpumask_t cpu_callout_map;
- -cpumask_t cpu_initialized;
- -cpumask_t cpu_callin_map;
- -
- -#endif /* CONFIG_X86_32 */
- -
- -
+ static const struct cpu_dev *this_cpu __cpuinitdata;
   
   DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
   #ifdef CONFIG_X86_64
@@@ -67,48 -79,48 +67,48 @@@
          * IRET will check the segment types  kkeil 2000/10/28
          * Also sysret mandates a special GDT layout
          *
-        * The TLS descriptors are currently at a different place compared to i386.
+        * TLS descriptors are currently at a different place compared to i386.
          * Hopefully nobody expects them at a fixed place (Wine?)
          */
-       [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
-       [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
-       [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
-       [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
-       [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
-       [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+       [GDT_ENTRY_KERNEL32_CS]         = { { { 0x0000ffff, 0x00cf9b00 } } },
+       [GDT_ENTRY_KERNEL_CS]           = { { { 0x0000ffff, 0x00af9b00 } } },
+       [GDT_ENTRY_KERNEL_DS]           = { { { 0x0000ffff, 0x00cf9300 } } },
+       [GDT_ENTRY_DEFAULT_USER32_CS]   = { { { 0x0000ffff, 0x00cffb00 } } },
+       [GDT_ENTRY_DEFAULT_USER_DS]     = { { { 0x0000ffff, 0x00cff300 } } },
+       [GDT_ENTRY_DEFAULT_USER_CS]     = { { { 0x0000ffff, 0x00affb00 } } },
   #else
-       [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
-       [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
-       [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
-       [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
+       [GDT_ENTRY_KERNEL_CS]           = { { { 0x0000ffff, 0x00cf9a00 } } },
+       [GDT_ENTRY_KERNEL_DS]           = { { { 0x0000ffff, 0x00cf9200 } } },
+       [GDT_ENTRY_DEFAULT_USER_CS]     = { { { 0x0000ffff, 0x00cffa00 } } },
+       [GDT_ENTRY_DEFAULT_USER_DS]     = { { { 0x0000ffff, 0x00cff200 } } },
         /*
          * Segments used for calling PnP BIOS have byte granularity.
          * They code segments and data segments have fixed 64k limits,
          * the transfer segment sizes are set at run time.
          */
         /* 32-bit code */
-       [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+       [GDT_ENTRY_PNPBIOS_CS32]        = { { { 0x0000ffff, 0x00409a00 } } },
         /* 16-bit code */
-       [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+       [GDT_ENTRY_PNPBIOS_CS16]        = { { { 0x0000ffff, 0x00009a00 } } },
         /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+       [GDT_ENTRY_PNPBIOS_DS]          = { { { 0x0000ffff, 0x00009200 } } },
         /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+       [GDT_ENTRY_PNPBIOS_TS1]         = { { { 0x00000000, 0x00009200 } } },
         /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
+       [GDT_ENTRY_PNPBIOS_TS2]         = { { { 0x00000000, 0x00009200 } } },
         /*
          * The APM segments have byte granularity and their bases
          * are set at run time.  All have 64k limits.
          */
         /* 32-bit code */
-       [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
+       [GDT_ENTRY_APMBIOS_BASE]        = { { { 0x0000ffff, 0x00409a00 } } },
         /* 16-bit code */
-       [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+       [GDT_ENTRY_APMBIOS_BASE+1]      = { { { 0x0000ffff, 0x00009a00 } } },
         /* data */
-       [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
+       [GDT_ENTRY_APMBIOS_BASE+2]      = { { { 0x0000ffff, 0x00409200 } } },
   
-       [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
-       [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+       [GDT_ENTRY_ESPFIX_SS]           = { { { 0x00000000, 0x00c09200 } } },
+       [GDT_ENTRY_PERCPU]              = { { { 0x0000ffff, 0x00cf9200 } } },
         GDT_STACK_CANARY_INIT
   #endif
   } };
@@@ -152,16 -164,17 +152,17 @@@ static inline int flag_is_changeable_p(
          * the CPUID. Add "volatile" to not allow gcc to
          * optimize the subsequent calls to this function.
          */
-       asm volatile ("pushfl\n\t"
-                     "pushfl\n\t"
-                     "popl %0\n\t"
-                     "movl %0,%1\n\t"
-                     "xorl %2,%0\n\t"
-                     "pushl %0\n\t"
-                     "popfl\n\t"
-                     "pushfl\n\t"
-                     "popl %0\n\t"
-                     "popfl\n\t"
+       asm volatile ("pushfl           \n\t"
+                     "pushfl           \n\t"
+                     "popl %0          \n\t"
+                     "movl %0, %1      \n\t"
+                     "xorl %2, %0      \n\t"
+                     "pushl %0         \n\t"
+                     "popfl            \n\t"
+                     "pushfl           \n\t"
+                     "popl %0          \n\t"
+                     "popfl            \n\t"
+ 
                       : "=&r" (f1), "=&r" (f2)
                       : "ir" (flag));
   
@@@ -176,18 -189,22 +177,22 @@@ static int __cpuinit have_cpuid_p(void
   
   static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
   {
-       if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
-               /* Disable processor serial number */
-               unsigned long lo, hi;
-               rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
-               lo |= 0x200000;
-               wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
-               printk(KERN_NOTICE "CPU serial number disabled.\n");
-               clear_cpu_cap(c, X86_FEATURE_PN);
- 
-               /* Disabling the serial number may affect the cpuid level */
-               c->cpuid_level = cpuid_eax(0);
-       }
+       unsigned long lo, hi;
+ 
+       if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
+               return;
+ 
+       /* Disable processor serial number: */
+ 
+       rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+       lo |= 0x200000;
+       wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+ 
+       printk(KERN_NOTICE "CPU serial number disabled.\n");
+       clear_cpu_cap(c, X86_FEATURE_PN);
+ 
+       /* Disabling the serial number may affect the cpuid level */
+       c->cpuid_level = cpuid_eax(0);
   }
   
   static int __init x86_serial_nr_setup(char *s)
@@@ -220,6 -237,7 +225,7 @@@ struct cpuid_dependent_feature 
         u32 feature;
         u32 level;
   };
+ 
   static const struct cpuid_dependent_feature __cpuinitconst
   cpuid_dependent_features[] = {
         { X86_FEATURE_MWAIT,            0x00000005 },
@@@ -231,7 -249,11 +237,11 @@@
   static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
   {
         const struct cpuid_dependent_feature *df;
+ 
         for (df = cpuid_dependent_features; df->feature; df++) {
+ 
+               if (!cpu_has(c, df->feature))
+                       continue;
                 /*
                  * Note: cpuid_level is set to -1 if unavailable, but
                  * extended_extended_level is set to 0 if unavailable
@@@ -239,32 -261,32 +249,32 @@@
                  * when signed; hence the weird messing around with
                  * signs here...
                  */
-               if (cpu_has(c, df->feature) &&
-                   ((s32)df->level < 0 ?
+               if (!((s32)df->level < 0 ?
                      (u32)df->level > (u32)c->extended_cpuid_level :
-                    (s32)df->level > (s32)c->cpuid_level)) {
-                       clear_cpu_cap(c, df->feature);
-                       if (warn)
-                               printk(KERN_WARNING
-                                      "CPU: CPU feature %s disabled "
-                                      "due to lack of CPUID level 0x%x\n",
-                                      x86_cap_flags[df->feature],
-                                      df->level);
-               }
+                    (s32)df->level > (s32)c->cpuid_level))
+                       continue;
+ 
+               clear_cpu_cap(c, df->feature);
+               if (!warn)
+                       continue;
+ 
+               printk(KERN_WARNING
+                      "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
+                               x86_cap_flags[df->feature], df->level);
         }
   }
   
   /*
    * Naming convention should be: <Name> [(<Codename>)]
    * This table only is used unless init_<vendor>() below doesn't set it;
-  * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
-  *
+  * in particular, if CPUID levels 0x80000002..4 are supported, this
+  * isn't used
    */
   
   /* Look up CPU names by table lookup. */
- static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
   {
-       struct cpu_model_info *info;
+       const struct cpu_model_info *info;
   
         if (c->x86_model >= 16)
                 return NULL;    /* Range check */
@@@ -295,8 -317,10 +305,10 @@@ void load_percpu_segment(int cpu
         load_stack_canary_segment();
   }
   
- /* Current gdt points %fs at the "master" per-cpu area: after this,
-  * it's on the real one. */
+ /*
+  * Current gdt points %fs at the "master" per-cpu area: after this,
+  * it's on the real one.
+  */
   void switch_to_new_gdt(int cpu)
   {
         struct desc_ptr gdt_descr;
@@@ -309,7 -333,7 +321,7 @@@
         load_percpu_segment(cpu);
   }
   
- static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+ static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
   
   static void __cpuinit default_init(struct cpuinfo_x86 *c)
   {
@@@ -328,7 -352,7 +340,7 @@@
   #endif
   }
   
- static struct cpu_dev __cpuinitdata default_cpu = {
+ static const struct cpu_dev __cpuinitconst default_cpu = {
         .c_init = default_init,
         .c_vendor = "Unknown",
         .c_x86_vendor = X86_VENDOR_UNKNOWN,
@@@ -342,22 -366,24 +354,24 @@@ static void __cpuinit get_model_name(st
         if (c->extended_cpuid_level < 0x80000004)
                 return;
   
-       v = (unsigned int *) c->x86_model_id;
+       v = (unsigned int *)c->x86_model_id;
         cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
         cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
         cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
         c->x86_model_id[48] = 0;
   
-       /* Intel chips right-justify this string for some dumb reason;
-          undo that brain damage */
+       /*
+        * Intel chips right-justify this string for some dumb reason;
+        * undo that brain damage:
+        */
         p = q = &c->x86_model_id[0];
         while (*p == ' ')
-            p++;
+               p++;
         if (p != q) {
-            while (*p)
-                 *q++ = *p++;
-            while (q <= &c->x86_model_id[48])
-                 *q++ = '\0';  /* Zero-pad the rest */
+               while (*p)
+                       *q++ = *p++;
+               while (q <= &c->x86_model_id[48])
+                       *q++ = '\0';    /* Zero-pad the rest */
         }
   }
   
@@@ -426,27 -452,30 +440,30 @@@ void __cpuinit detect_ht(struct cpuinfo
   
         if (smp_num_siblings == 1) {
                 printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-       } else if (smp_num_siblings > 1) {
+               goto out;
+       }
   
-               if (smp_num_siblings > nr_cpu_ids) {
-                       printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
-                                       smp_num_siblings);
-                       smp_num_siblings = 1;
-                       return;
-               }
+       if (smp_num_siblings <= 1)
+               goto out;
   
-               index_msb = get_count_order(smp_num_siblings);
-               c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
+       if (smp_num_siblings > nr_cpu_ids) {
+               pr_warning("CPU: Unsupported number of siblings %d",
+                          smp_num_siblings);
+               smp_num_siblings = 1;
+               return;
+       }
   
-               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+       index_msb = get_count_order(smp_num_siblings);
+       c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
   
-               index_msb = get_count_order(smp_num_siblings);
+       smp_num_siblings = smp_num_siblings / c->x86_max_cores;
   
-               core_bits = get_count_order(c->x86_max_cores);
+       index_msb = get_count_order(smp_num_siblings);
   
-               c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
-                                              ((1 << core_bits) - 1);
-       }
+       core_bits = get_count_order(c->x86_max_cores);
+ 
+       c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
+                                      ((1 << core_bits) - 1);
   
   out:
         if ((c->x86_max_cores * smp_num_siblings) > 1) {
@@@ -461,8 -490,8 +478,8 @@@
   static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
   {
         char *v = c->x86_vendor_id;
-       int i;
         static int printed;
+       int i;
   
         for (i = 0; i < X86_VENDOR_NUM; i++) {
                 if (!cpu_devs[i])
@@@ -471,6 -500,7 +488,7 @@@
                 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
                     (cpu_devs[i]->c_ident[1] &&
                      !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+ 
                         this_cpu = cpu_devs[i];
                         c->x86_vendor = this_cpu->c_x86_vendor;
                         return;
@@@ -479,7 -509,9 +497,9 @@@
   
         if (!printed) {
                 printed++;
-               printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
+               printk(KERN_ERR
+                   "CPU: vendor_id '%s' unknown, using generic init.\n", v);
+ 
                 printk(KERN_ERR "CPU: Your system may be unstable.\n");
         }
   
@@@ -499,14 -531,17 +519,17 @@@ void __cpuinit cpu_detect(struct cpuinf
         /* Intel-defined flags: level 0x00000001 */
         if (c->cpuid_level >= 0x00000001) {
                 u32 junk, tfms, cap0, misc;
+ 
                 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                 c->x86 = (tfms >> 8) & 0xf;
                 c->x86_model = (tfms >> 4) & 0xf;
                 c->x86_mask = tfms & 0xf;
+ 
                 if (c->x86 == 0xf)
                         c->x86 += (tfms >> 20) & 0xff;
                 if (c->x86 >= 0x6)
                         c->x86_model += ((tfms >> 16) & 0xf) << 4;
+ 
                 if (cap0 & (1<<19)) {
                         c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
                         c->x86_cache_alignment = c->x86_clflush_size;
@@@ -522,6 -557,7 +545,7 @@@ static void __cpuinit get_cpu_cap(struc
         /* Intel-defined flags: level 0x00000001 */
         if (c->cpuid_level >= 0x00000001) {
                 u32 capability, excap;
+ 
                 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
                 c->x86_capability[0] = capability;
                 c->x86_capability[4] = excap;
@@@ -530,6 -566,7 +554,7 @@@
         /* AMD-defined flags: level 0x80000001 */
         xlvl = cpuid_eax(0x80000000);
         c->extended_cpuid_level = xlvl;
+ 
         if ((xlvl & 0xffff0000) == 0x80000000) {
                 if (xlvl >= 0x80000001) {
                         c->x86_capability[1] = cpuid_edx(0x80000001);
@@@ -537,13 -574,15 +562,15 @@@
                 }
         }
   
- #ifdef CONFIG_X86_64
         if (c->extended_cpuid_level >= 0x80000008) {
                 u32 eax = cpuid_eax(0x80000008);
   
                 c->x86_virt_bits = (eax >> 8) & 0xff;
                 c->x86_phys_bits = eax & 0xff;
         }
+ #ifdef CONFIG_X86_32
+       else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
+               c->x86_phys_bits = 36;
   #endif
   
         if (c->extended_cpuid_level >= 0x80000007)
@@@ -590,8 -629,12 +617,12 @@@ static void __init early_identify_cpu(s
   {
   #ifdef CONFIG_X86_64
         c->x86_clflush_size = 64;
+       c->x86_phys_bits = 36;
+       c->x86_virt_bits = 48;
   #else
         c->x86_clflush_size = 32;
+       c->x86_phys_bits = 32;
+       c->x86_virt_bits = 32;
   #endif
         c->x86_cache_alignment = c->x86_clflush_size;
   
@@@ -622,12 -665,12 +653,12 @@@
   
   void __init early_cpu_init(void)
   {
-       struct cpu_dev **cdev;
+       const struct cpu_dev *const *cdev;
         int count = 0;
   
-       printk("KERNEL supported cpus:\n");
+       printk(KERN_INFO "KERNEL supported cpus:\n");
         for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
-               struct cpu_dev *cpudev = *cdev;
+               const struct cpu_dev *cpudev = *cdev;
                 unsigned int j;
   
                 if (count >= X86_VENDOR_NUM)
@@@ -638,7 -681,7 +669,7 @@@
                 for (j = 0; j < 2; j++) {
                         if (!cpudev->c_ident[j])
                                 continue;
-                       printk("  %s %s\n", cpudev->c_vendor,
+                       printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
                                 cpudev->c_ident[j]);
                 }
         }
@@@ -714,9 -757,13 +745,13 @@@ static void __cpuinit identify_cpu(stru
         c->x86_coreid_bits = 0;
   #ifdef CONFIG_X86_64
         c->x86_clflush_size = 64;
+       c->x86_phys_bits = 36;
+       c->x86_virt_bits = 48;
   #else
         c->cpuid_level = -1;    /* CPUID not detected */
         c->x86_clflush_size = 32;
+       c->x86_phys_bits = 32;
+       c->x86_virt_bits = 32;
   #endif
         c->x86_cache_alignment = c->x86_clflush_size;
         memset(&c->x86_capability, 0, sizeof c->x86_capability);
@@@ -747,8 -794,8 +782,8 @@@
         squash_the_stupid_serial_number(c);
   
         /*
-        * The vendor-specific functions might have changed features.  Now
-        * we do "generic changes."
+        * The vendor-specific functions might have changed features.
+        * Now we do "generic changes."
          */
   
         /* Filter out anything that depends on CPUID levels we don't have */
@@@ -756,7 -803,7 +791,7 @@@
   
         /* If the model name is still unset, do table lookup. */
         if (!c->x86_model_id[0]) {
-               char *p;
+               const char *p;
                 p = table_lookup_model(c);
                 if (p)
                         strcpy(c->x86_model_id, p);
@@@ -812,7 -859,6 +847,7 @@@ static void vgetcpu_set_mode(void
   void __init identify_boot_cpu(void)
   {
         identify_cpu(&boot_cpu_data);
+ +      init_c1e_mask();
   #ifdef CONFIG_X86_32
         sysenter_setup();
         enable_sep_cpu();
@@@ -832,11 -878,11 +867,11 @@@ void __cpuinit identify_secondary_cpu(s
   }
   
   struct msr_range {
-       unsigned min;
-       unsigned max;
+       unsigned        min;
+       unsigned        max;
   };
   
- static struct msr_range msr_range_array[] __cpuinitdata = {
+ static const struct msr_range msr_range_array[] __cpuinitconst = {
         { 0x00000000, 0x00000418},
         { 0xc0000000, 0xc000040b},
         { 0xc0010000, 0xc0010142},
@@@ -845,14 -891,15 +880,15 @@@
   
   static void __cpuinit print_cpu_msr(void)
   {
+       unsigned index_min, index_max;
         unsigned index;
         u64 val;
         int i;
-       unsigned index_min, index_max;
   
         for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
                 index_min = msr_range_array[i].min;
                 index_max = msr_range_array[i].max;
+ 
                 for (index = index_min; index < index_max; index++) {
                         if (rdmsrl_amd_safe(index, &val))
                                 continue;
@@@ -862,6 -909,7 +898,7 @@@
   }
   
   static int show_msr __cpuinitdata;
+ 
   static __init int setup_show_msr(char *arg)
   {
         int num;
@@@ -883,12 -931,14 +920,14 @@@ __setup("noclflush", setup_noclflush)
   
   void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
   {
-       char *vendor = NULL;
+       const char *vendor = NULL;
   
-       if (c->x86_vendor < X86_VENDOR_NUM)
+       if (c->x86_vendor < X86_VENDOR_NUM) {
                 vendor = this_cpu->c_vendor;
-       else if (c->cpuid_level >= 0)
-               vendor = c->x86_vendor_id;
+       } else {
+               if (c->cpuid_level >= 0)
+                       vendor = c->x86_vendor_id;
+       }
   
         if (vendor && !strstr(c->x86_model_id, vendor))
                 printk(KERN_CONT "%s ", vendor);
@@@ -915,10 -965,12 +954,12 @@@
   static __init int setup_disablecpuid(char *arg)
   {
         int bit;
+ 
         if (get_option(&arg, &bit) && bit < NCAPINTS*32)
                 setup_clear_cpu_cap(bit);
         else
                 return 0;
+ 
         return 1;
   }
   __setup("clearcpuid=", setup_disablecpuid);
@@@ -928,6 -980,7 +969,7 @@@ struct desc_ptr idt_descr = { 256 * 16 
   
   DEFINE_PER_CPU_FIRST(union irq_stack_union,
                      irq_stack_union) __aligned(PAGE_SIZE);
+ 
   DEFINE_PER_CPU(char *, irq_stack_ptr) =
         init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
   
@@@ -937,12 -990,21 +979,21 @@@ EXPORT_PER_CPU_SYMBOL(kernel_stack)
   
   DEFINE_PER_CPU(unsigned int, irq_count) = -1;
   
+ /*
+  * Special IST stacks which the CPU switches to when it calls
+  * an IST-marked descriptor entry. Up to 7 stacks (hardware
+  * limit), all of them are 4K, except the debug stack which
+  * is 8K.
+  */
+ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+ };
+ 
   static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
         [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
         __aligned(PAGE_SIZE);
   
- extern asmlinkage void ignore_sysret(void);
- 
   /* May not be marked __init: used by software suspend */
   void syscall_init(void)
   {
@@@ -972,7 -1034,7 +1023,7 @@@ unsigned long kernel_eflags
    */
   DEFINE_PER_CPU(struct orig_ist, orig_ist);
   
- #else /* x86_64 */
+ #else /* CONFIG_X86_64 */
   
   #ifdef CONFIG_CC_STACKPROTECTOR
   DEFINE_PER_CPU(unsigned long, stack_canary);
@@@ -984,9 -1046,26 +1035,26 @@@ struct pt_regs * __cpuinit idle_regs(st
         memset(regs, 0, sizeof(struct pt_regs));
         regs->fs = __KERNEL_PERCPU;
         regs->gs = __KERNEL_STACK_CANARY;
+ 
         return regs;
   }
- #endif        /* x86_64 */
+ #endif        /* CONFIG_X86_64 */
+ 
+ /*
+  * Clear all 6 debug registers:
+  */
+ static void clear_all_debug_regs(void)
+ {
+       int i;
+ 
+       for (i = 0; i < 8; i++) {
+               /* Ignore db4, db5 */
+               if ((i == 4) || (i == 5))
+                       continue;
+ 
+               set_debugreg(0, i);
+       }
+ }
   
   /*
    * cpu_init() initializes state that is per-CPU. Some data is already
@@@ -996,15 -1075,20 +1064,20 @@@
    * A lot of state is already set up in PDA init for 64 bit
    */
   #ifdef CONFIG_X86_64
+ 
   void __cpuinit cpu_init(void)
   {
-       int cpu = stack_smp_processor_id();
-       struct tss_struct *t = &per_cpu(init_tss, cpu);
-       struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
-       unsigned long v;
+       struct orig_ist *orig_ist;
         struct task_struct *me;
+       struct tss_struct *t;
+       unsigned long v;
+       int cpu;
         int i;
   
+       cpu = stack_smp_processor_id();
+       t = &per_cpu(init_tss, cpu);
+       orig_ist = &per_cpu(orig_ist, cpu);
+ 
   #ifdef CONFIG_NUMA
         if (cpu != 0 && percpu_read(node_number) == 0 &&
             cpu_to_node(cpu) != NUMA_NO_NODE)
@@@ -1045,19 -1129,17 +1118,17 @@@
          * set up and load the per-CPU TSS
          */
         if (!orig_ist->ist[0]) {
-               static const unsigned int sizes[N_EXCEPTION_STACKS] = {
-                 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
-                 [DEBUG_STACK - 1] = DEBUG_STKSZ
-               };
                 char *estacks = per_cpu(exception_stacks, cpu);
+ 
                 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-                       estacks += sizes[v];
+                       estacks += exception_stack_sizes[v];
                         orig_ist->ist[v] = t->x86_tss.ist[v] =
                                         (unsigned long)estacks;
                 }
         }
   
         t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ 
         /*
          * <= is required because the CPU will access up to
          * 8 bits beyond the end of the IO permission bitmap.
@@@ -1067,8 -1149,7 +1138,7 @@@
   
         atomic_inc(&init_mm.mm_count);
         me->active_mm = &init_mm;
-       if (me->mm)
-               BUG();
+       BUG_ON(me->mm);
         enter_lazy_tlb(&init_mm, me);
   
         load_sp0(t, &current->thread);
@@@ -1087,17 -1168,7 +1157,7 @@@
                 arch_kgdb_ops.correct_hw_break();
         else
   #endif
-       {
-               /*
-                * Clear all 6 debug registers:
-                */
-               set_debugreg(0UL, 0);
-               set_debugreg(0UL, 1);
-               set_debugreg(0UL, 2);
-               set_debugreg(0UL, 3);
-               set_debugreg(0UL, 6);
-               set_debugreg(0UL, 7);
-       }
+               clear_all_debug_regs();
   
         fpu_init();
   
@@@ -1118,7 -1189,8 +1178,8 @@@ void __cpuinit cpu_init(void
   
         if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
                 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
-               for (;;) local_irq_enable();
+               for (;;)
+                       local_irq_enable();
         }
   
         printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@@ -1134,8 -1206,7 +1195,7 @@@
          */
         atomic_inc(&init_mm.mm_count);
         curr->active_mm = &init_mm;
-       if (curr->mm)
-               BUG();
+       BUG_ON(curr->mm);
         enter_lazy_tlb(&init_mm, curr);
   
         load_sp0(t, thread);
@@@ -1148,13 -1219,7 +1208,7 @@@
         __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
   #endif
   
-       /* Clear all 6 debug registers: */
-       set_debugreg(0, 0);
-       set_debugreg(0, 1);
-       set_debugreg(0, 2);
-       set_debugreg(0, 3);
-       set_debugreg(0, 6);
-       set_debugreg(0, 7);
+       clear_all_debug_regs();
   
         /*
          * Force FPU initialization:
@@@ -1174,6 -1239,4 +1228,4 @@@
   
         xsave_init();
   }
- 
- 
   #endif
diff --combined arch/x86/kernel/cpu/cpufreq/p4-clockmod.c

index d8341d17c1890e239c470f211efc9806d358d7b1,41ed94915f974ce0310510d9864b54d8e96f37df..6ac55bd341ae8802abf29c6180533d08ab68930c
--- 1/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
--- 2/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@@ -27,15 -27,17 +27,17 @@@
   #include <linux/cpufreq.h>
   #include <linux/slab.h>
   #include <linux/cpumask.h>
+ #include <linux/timex.h>
   
   #include <asm/processor.h>
   #include <asm/msr.h>
- #include <asm/timex.h>
+ #include <asm/timer.h>
   
   #include "speedstep-lib.h"
   
   #define PFX   "p4-clockmod: "
- #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
+ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+               "p4-clockmod", msg)
   
   /*
    * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@@ -58,7 -60,8 +60,8 @@@ static int cpufreq_p4_setdc(unsigned in
   {
         u32 l, h;
   
-       if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
+       if (!cpu_online(cpu) ||
+           (newstate > DC_DISABLE) || (newstate == DC_RESV))
                 return -EINVAL;
   
         rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
@@@ -66,7 -69,8 +69,8 @@@
         if (l & 0x01)
                 dprintk("CPU#%d currently thermal throttled\n", cpu);
   
-       if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
+       if (has_N44_O17_errata[cpu] &&
+           (newstate == DC_25PT || newstate == DC_DFLT))
                 newstate = DC_38PT;
   
         rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
@@@ -112,7 -116,8 +116,8 @@@ static int cpufreq_p4_target(struct cpu
         struct cpufreq_freqs freqs;
         int i;
   
-       if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
+       if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
+                               target_freq, relation, &newstate))
                 return -EINVAL;
   
         freqs.old = cpufreq_p4_get(policy->cpu);
@@@ -127,7 -132,8 +132,8 @@@
                 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
         }
   
-       /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
+       /* run on each logical CPU,
+        * see section 13.15.3 of IA32 Intel Architecture Software
          * Developer's Manual, Volume 3
          */
         for_each_cpu(i, policy->cpus)
@@@ -153,28 -159,30 +159,30 @@@ static unsigned int cpufreq_p4_get_freq
   {
         if (c->x86 == 0x06) {
                 if (cpu_has(c, X86_FEATURE_EST))
-                       printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. "
-                              "The acpi-cpufreq module offers voltage scaling"
-                              " in addition of frequency scaling. You should use "
-                              "that instead of p4-clockmod, if possible.\n");
+                       printk(KERN_WARNING PFX "Warning: EST-capable CPU "
+                              "detected. The acpi-cpufreq module offers "
+                              "voltage scaling in addition of frequency "
+                              "scaling. You should use that instead of "
+                              "p4-clockmod, if possible.\n");
                 switch (c->x86_model) {
                 case 0x0E: /* Core */
                 case 0x0F: /* Core Duo */
                 case 0x16: /* Celeron Core */
                         p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-                       return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
+                       return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
                 case 0x0D: /* Pentium M (Dothan) */
                         p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
                         /* fall through */
                 case 0x09: /* Pentium M (Banias) */
-                       return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+                       return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
                 }
         }
   
         if (c->x86 != 0xF) {
                 if (!cpu_has(c, X86_FEATURE_EST))
-                       printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
-                               "Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+                       printk(KERN_WARNING PFX "Unknown CPU. "
+                               "Please send an e-mail to "
+                               "<cpufreq@vger.kernel.org>\n");
                 return 0;
         }
   
@@@ -182,16 -190,16 +190,16 @@@
          * throttling is active or not. */
         p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
   
-       if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
+       if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
                 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
                        "The speedstep-ich or acpi cpufreq modules offer "
                        "voltage scaling in addition of frequency scaling. "
                        "You should use either one instead of p4-clockmod, "
                        "if possible.\n");
-               return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
+               return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
         }
   
-       return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
+       return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
   }
   
   
@@@ -203,7 -211,7 +211,7 @@@ static int cpufreq_p4_cpu_init(struct c
         unsigned int i;
   
   #ifdef CONFIG_SMP
- -      cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
+ +      cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
   #endif
   
         /* Errata workaround */
@@@ -217,14 -225,20 +225,20 @@@
                 dprintk("has errata -- disabling low frequencies\n");
         }
   
+       if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
+           c->x86_model < 2) {
+               /* switch to maximum frequency and measure result */
+               cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
+               recalibrate_cpu_khz();
+       }
         /* get max frequency */
         stock_freq = cpufreq_p4_get_frequency(c);
         if (!stock_freq)
                 return -EINVAL;
   
         /* table init */
-       for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-               if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+       for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+               if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
                         p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
                 else
                         p4clockmod_table[i].frequency = (stock_freq * i)/8;
@@@ -232,7 -246,10 +246,10 @@@
         cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
   
         /* cpuinfo and default policy values */
-       policy->cpuinfo.transition_latency = 1000000; /* assumed */
+ 
+       /* the transition latency is set to be 1 higher than the maximum
+        * transition latency of the ondemand governor */
+       policy->cpuinfo.transition_latency = 10000001;
         policy->cur = stock_freq;
   
         return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
@@@ -258,12 -275,12 +275,12 @@@ static unsigned int cpufreq_p4_get(unsi
                 l = DC_DISABLE;
   
         if (l != DC_DISABLE)
-               return (stock_freq * l / 8);
+               return stock_freq * l / 8;
   
         return stock_freq;
   }
   
- static struct freq_attr* p4clockmod_attr[] = {
+ static struct freq_attr *p4clockmod_attr[] = {
         &cpufreq_freq_attr_scaling_available_freqs,
         NULL,
   };
@@@ -298,9 -315,10 +315,10 @@@ static int __init cpufreq_p4_init(void
   
         ret = cpufreq_register_driver(&p4clockmod_driver);
         if (!ret)
-               printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+               printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
+                               "Modulation available\n");
   
-       return (ret);
+       return ret;
   }
   
   
@@@ -310,9 -328,9 +328,9 @@@ static void __exit cpufreq_p4_exit(void
   }
   
   
- MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
- MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
- MODULE_LICENSE ("GPL");
+ MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
+ MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+ MODULE_LICENSE("GPL");
   
   late_initcall(cpufreq_p4_init);
   module_exit(cpufreq_p4_exit);
diff --combined arch/x86/kernel/cpu/cpufreq/powernow-k8.c

index e8fd76f9888321e2843ba80214810f24f1fc02cc,a15ac94e0b9b8c2ad1ee540fc7bad1b5415aff36..4709ead2db526bfc59b803ea06791a0717d539c8
--- 1/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
--- 2/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@@ -33,16 -33,14 +33,14 @@@
   #include <linux/string.h>
   #include <linux/cpumask.h>
   #include <linux/sched.h>      /* for current / set_cpus_allowed() */
+ #include <linux/io.h>
+ #include <linux/delay.h>
   
   #include <asm/msr.h>
- #include <asm/io.h>
- #include <asm/delay.h>
   
- #ifdef CONFIG_X86_POWERNOW_K8_ACPI
   #include <linux/acpi.h>
   #include <linux/mutex.h>
   #include <acpi/processor.h>
- #endif
   
   #define PFX "powernow-k8: "
   #define VERSION "version 2.20.00"
@@@ -56,10 -54,7 +54,10 @@@ static DEFINE_PER_CPU(struct powernow_k
   static int cpu_family = CPU_OPTERON;
   
   #ifndef CONFIG_SMP
- -DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+ +static inline const struct cpumask *cpu_core_mask(int cpu)
+ +{
+ +      return cpumask_of(0);
+ +}
   #endif
   
   /* Return a frequency in MHz, given an input fid */
@@@ -74,7 -69,8 +72,8 @@@ static u32 find_khz_freq_from_fid(u32 f
         return 1000 * find_freq_from_fid(fid);
   }
   
- static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate)
+ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
+               u32 pstate)
   {
         return data[pstate].frequency;
   }
@@@ -189,7 -185,9 +188,9 @@@ static int write_new_fid(struct powerno
                 return 1;
         }
   
-       lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+       lo = fid;
+       lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
+       lo |= MSR_C_LO_INIT_FID_VID;
   
         dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
                 fid, lo, data->plllock * PLL_LOCK_CONVERSION);
@@@ -197,7 -195,9 +198,9 @@@
         do {
                 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
                 if (i++ > 100) {
-                       printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
+                       printk(KERN_ERR PFX
+                               "Hardware error - pending bit very stuck - "
+                               "no further pstate changes possible\n");
                         return 1;
                 }
         } while (query_current_values_with_pending_wait(data));
@@@ -205,14 -205,16 +208,16 @@@
         count_off_irt(data);
   
         if (savevid != data->currvid) {
-               printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
-                      savevid, data->currvid);
+               printk(KERN_ERR PFX
+                       "vid change on fid trans, old 0x%x, new 0x%x\n",
+                       savevid, data->currvid);
                 return 1;
         }
   
         if (fid != data->currfid) {
-               printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
-                       data->currfid);
+               printk(KERN_ERR PFX
+                       "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+                       data->currfid);
                 return 1;
         }
   
@@@ -231,7 -233,9 +236,9 @@@ static int write_new_vid(struct powerno
                 return 1;
         }
   
-       lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+       lo = data->currfid;
+       lo |= (vid << MSR_C_LO_VID_SHIFT);
+       lo |= MSR_C_LO_INIT_FID_VID;
   
         dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
                 vid, lo, STOP_GRANT_5NS);
@@@ -239,20 -243,24 +246,24 @@@
         do {
                 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
                 if (i++ > 100) {
-                       printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+                       printk(KERN_ERR PFX "internal error - pending bit "
+                                       "very stuck - no further pstate "
+                                       "changes possible\n");
                         return 1;
                 }
         } while (query_current_values_with_pending_wait(data));
   
         if (savefid != data->currfid) {
-               printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
+               printk(KERN_ERR PFX "fid changed on vid trans, old "
+                       "0x%x new 0x%x\n",
                        savefid, data->currfid);
                 return 1;
         }
   
         if (vid != data->currvid) {
-               printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
-                               data->currvid);
+               printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
+                               "curr 0x%x\n",
+                               vid, data->currvid);
                 return 1;
         }
   
@@@ -264,7 -272,8 +275,8 @@@
    * Decreasing vid codes represent increasing voltages:
    * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
    */
- static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
+ static int decrease_vid_code_by_step(struct powernow_k8_data *data,
+               u32 reqvid, u32 step)
   {
         if ((data->currvid - reqvid) > step)
                 reqvid = data->currvid - step;
@@@ -286,7 -295,8 +298,8 @@@ static int transition_pstate(struct pow
   }
   
   /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
- static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
+ static int transition_fid_vid(struct powernow_k8_data *data,
+               u32 reqfid, u32 reqvid)
   {
         if (core_voltage_pre_transition(data, reqvid))
                 return 1;
@@@ -301,7 -311,8 +314,8 @@@
                 return 1;
   
         if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
-               printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
+               printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
+                               "curr 0x%x 0x%x\n",
                                 smp_processor_id(),
                                 reqfid, reqvid, data->currfid, data->currvid);
                 return 1;
@@@ -314,13 -325,15 +328,15 @@@
   }
   
   /* Phase 1 - core voltage transition ... setup voltage */
- static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
+ static int core_voltage_pre_transition(struct powernow_k8_data *data,
+               u32 reqvid)
   {
         u32 rvosteps = data->rvo;
         u32 savefid = data->currfid;
         u32 maxvid, lo;
   
-       dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
+       dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+               "reqvid 0x%x, rvo 0x%x\n",
                 smp_processor_id(),
                 data->currfid, data->currvid, reqvid, data->rvo);
   
@@@ -343,7 -356,7 +359,7 @@@
                 } else {
                         dprintk("ph1: changing vid for rvo, req 0x%x\n",
                                 data->currvid - 1);
-                       if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
+                       if (decrease_vid_code_by_step(data, data->currvid-1, 1))
                                 return 1;
                         rvosteps--;
                 }
@@@ -353,7 -366,8 +369,8 @@@
                 return 1;
   
         if (savefid != data->currfid) {
-               printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
+               printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
+                               data->currfid);
                 return 1;
         }
   
@@@ -366,20 -380,24 +383,24 @@@
   /* Phase 2 - core frequency transition */
   static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
   {
-       u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
+       u32 vcoreqfid, vcocurrfid, vcofiddiff;
+       u32 fid_interval, savevid = data->currvid;
   
-       if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
-               printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
-                       reqfid, data->currfid);
+       if ((reqfid < HI_FID_TABLE_BOTTOM) &&
+           (data->currfid < HI_FID_TABLE_BOTTOM)) {
+               printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
+                               "0x%x 0x%x\n", reqfid, data->currfid);
                 return 1;
         }
   
         if (data->currfid == reqfid) {
-               printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+               printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
+                               data->currfid);
                 return 0;
         }
   
-       dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
+       dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+               "reqfid 0x%x\n",
                 smp_processor_id(),
                 data->currfid, data->currvid, reqfid);
   
@@@ -393,14 -411,14 +414,14 @@@
   
                 if (reqfid > data->currfid) {
                         if (data->currfid > LO_FID_TABLE_TOP) {
-                               if (write_new_fid(data, data->currfid + fid_interval)) {
+                               if (write_new_fid(data,
+                                               data->currfid + fid_interval))
                                         return 1;
-                               }
                         } else {
                                 if (write_new_fid
-                                   (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
+                                   (data,
+                                    2 + convert_fid_to_vco_fid(data->currfid)))
                                         return 1;
-                               }
                         }
                 } else {
                         if (write_new_fid(data, data->currfid - fid_interval))
@@@ -420,7 -438,8 +441,8 @@@
   
         if (data->currfid != reqfid) {
                 printk(KERN_ERR PFX
-                       "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
+                       "ph2: mismatch, failed fid transition, "
+                       "curr 0x%x, req 0x%x\n",
                         data->currfid, reqfid);
                 return 1;
         }
@@@ -438,7 -457,8 +460,8 @@@
   }
   
   /* Phase 3 - core voltage transition flow ... jump to the final vid. */
- static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
+ static int core_voltage_post_transition(struct powernow_k8_data *data,
+               u32 reqvid)
   {
         u32 savefid = data->currfid;
         u32 savereqvid = reqvid;
@@@ -460,7 -480,8 +483,8 @@@
   
                 if (data->currvid != reqvid) {
                         printk(KERN_ERR PFX
-                              "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
+                              "ph3: failed vid transition\n, "
+                              "req 0x%x, curr 0x%x",
                                reqvid, data->currvid);
                         return 1;
                 }
@@@ -511,7 -532,8 +535,8 @@@ static int check_supported_cpu(unsigne
         if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
                 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
                     ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
-                       printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+                       printk(KERN_INFO PFX
+                               "Processor cpuid %x not supported\n", eax);
                         goto out;
                 }
   
@@@ -523,8 -545,10 +548,10 @@@
                 }
   
                 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-               if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
-                       printk(KERN_INFO PFX "Power state transitions not supported\n");
+               if ((edx & P_STATE_TRANSITION_CAPABLE)
+                       != P_STATE_TRANSITION_CAPABLE) {
+                       printk(KERN_INFO PFX
+                               "Power state transitions not supported\n");
                         goto out;
                 }
         } else { /* must be a HW Pstate capable processor */
@@@ -542,7 -566,8 +569,8 @@@ out
         return rc;
   }
   
- static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
+               u8 maxvid)
   {
         unsigned int j;
         u8 lastfid = 0xff;
@@@ -553,12 -578,14 +581,14 @@@
                                j, pst[j].vid);
                         return -EINVAL;
                 }
-               if (pst[j].vid < data->rvo) {   /* vid + rvo >= 0 */
+               if (pst[j].vid < data->rvo) {
+                       /* vid + rvo >= 0 */
                         printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
                                " %d\n", j);
                         return -ENODEV;
                 }
-               if (pst[j].vid < maxvid + data->rvo) {  /* vid + rvo >= maxvid */
+               if (pst[j].vid < maxvid + data->rvo) {
+                       /* vid + rvo >= maxvid */
                         printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
                                " %d\n", j);
                         return -ENODEV;
@@@ -582,23 -609,31 +612,31 @@@
                 return -EINVAL;
         }
         if (lastfid > LO_FID_TABLE_TOP)
-               printk(KERN_INFO FW_BUG PFX  "first fid not from lo freq table\n");
+               printk(KERN_INFO FW_BUG PFX
+                       "first fid not from lo freq table\n");
   
         return 0;
   }
   
+ static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
+ {
+       data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+ }
+ 
   static void print_basics(struct powernow_k8_data *data)
   {
         int j;
         for (j = 0; j < data->numps; j++) {
-               if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+               if (data->powernow_table[j].frequency !=
+                               CPUFREQ_ENTRY_INVALID) {
                         if (cpu_family == CPU_HW_PSTATE) {
-                               printk(KERN_INFO PFX "   %d : pstate %d (%d MHz)\n",
-                                       j,
+                               printk(KERN_INFO PFX
+                                       "   %d : pstate %d (%d MHz)\n", j,
                                         data->powernow_table[j].index,
                                         data->powernow_table[j].frequency/1000);
                         } else {
-                               printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x\n",
+                               printk(KERN_INFO PFX
+                                       "   %d : fid 0x%x (%d MHz), vid 0x%x\n",
                                         j,
                                         data->powernow_table[j].index & 0xff,
                                         data->powernow_table[j].frequency/1000,
@@@ -607,20 -642,25 +645,25 @@@
                 }
         }
         if (data->batps)
-               printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
+               printk(KERN_INFO PFX "Only %d pstates on battery\n",
+                               data->batps);
   }
   
- static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+ static int fill_powernow_table(struct powernow_k8_data *data,
+               struct pst_s *pst, u8 maxvid)
   {
         struct cpufreq_frequency_table *powernow_table;
         unsigned int j;
   
-       if (data->batps) {    /* use ACPI support to get full speed on mains power */
-               printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
+       if (data->batps) {
+               /* use ACPI support to get full speed on mains power */
+               printk(KERN_WARNING PFX
+                       "Only %d pstates usable (use ACPI driver for full "
+                       "range\n", data->batps);
                 data->numps = data->batps;
         }
   
-       for ( j=1; j<data->numps; j++ ) {
+       for (j = 1; j < data->numps; j++) {
                 if (pst[j-1].fid >= pst[j].fid) {
                         printk(KERN_ERR PFX "PST out of sequence\n");
                         return -EINVAL;
@@@ -643,9 -683,11 +686,11 @@@
         }
   
         for (j = 0; j < data->numps; j++) {
+               int freq;
                 powernow_table[j].index = pst[j].fid; /* lower 8 bits */
                 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
-               powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
+               freq = find_khz_freq_from_fid(pst[j].fid);
+               powernow_table[j].frequency = freq;
         }
         powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
         powernow_table[data->numps].index = 0;
@@@ -657,11 -699,12 +702,12 @@@
   
         dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
         data->powernow_table = powernow_table;
- -      if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
+ +      if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
                 print_basics(data);
   
         for (j = 0; j < data->numps; j++)
-               if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
+               if ((pst[j].fid == data->currfid) &&
+                   (pst[j].vid == data->currvid))
                         return 0;
   
         dprintk("currfid/vid do not match PST, ignoring\n");
@@@ -701,7 -744,8 +747,8 @@@ static int find_psb_table(struct powern
                 }
   
                 data->vstable = psb->vstable;
-               dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
+               dprintk("voltage stabilization time: %d(*20us)\n",
+                               data->vstable);
   
                 dprintk("flags2: 0x%x\n", psb->flags2);
                 data->rvo = psb->flags2 & 3;
@@@ -716,11 -760,12 +763,12 @@@
   
                 dprintk("numpst: 0x%x\n", psb->num_tables);
                 cpst = psb->num_tables;
-               if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
+               if ((psb->cpuid == 0x00000fc0) ||
+                   (psb->cpuid == 0x00000fe0)) {
                         thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-                       if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
+                       if ((thiscpuid == 0x00000fc0) ||
+                           (thiscpuid == 0x00000fe0))
                                 cpst = 1;
-                       }
                 }
                 if (cpst != 1) {
                         printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
@@@ -735,7 -780,8 +783,8 @@@
   
                 data->numps = psb->numps;
                 dprintk("numpstates: 0x%x\n", data->numps);
-               return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
+               return fill_powernow_table(data,
+                               (struct pst_s *)(psb+1), maxvid);
         }
         /*
          * If you see this message, complain to BIOS manufacturer. If
@@@ -748,28 -794,31 +797,31 @@@
          * BIOS and Kernel Developer's Guide, which is available on
          * www.amd.com
          */
-       printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
+       printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
         return -ENODEV;
   }
   
- #ifdef CONFIG_X86_POWERNOW_K8_ACPI
- static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
+ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
+               unsigned int index)
   {
+       acpi_integer control;
+ 
         if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
                 return;
   
-       data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
-       data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
-       data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
-       data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
-       data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
-       data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
- }
+       control = data->acpi_data.states[index].control; data->irt = (control
+                       >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
+                               RVO_SHIFT) & RVO_MASK; data->exttype = (control
+                                       >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+       data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
+               << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
+               (control >> VST_SHIFT) & VST_MASK; }
   
   static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
   {
         struct cpufreq_frequency_table *powernow_table;
         int ret_val = -ENODEV;
+       acpi_integer space_id;
   
         if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
                 dprintk("register performance failed: bad ACPI data\n");
@@@ -782,11 -831,12 +834,12 @@@
                 goto err_out;
         }
   
-       if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-               (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+       space_id = data->acpi_data.control_register.space_id;
+       if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+               (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
                 dprintk("Invalid control/status registers (%x - %x)\n",
                         data->acpi_data.control_register.space_id,
-                       data->acpi_data.status_register.space_id);
+                       space_id);
                 goto err_out;
         }
   
@@@ -805,13 -855,14 +858,14 @@@
         if (ret_val)
                 goto err_out_mem;
   
-       powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+       powernow_table[data->acpi_data.state_count].frequency =
+               CPUFREQ_TABLE_END;
         powernow_table[data->acpi_data.state_count].index = 0;
         data->powernow_table = powernow_table;
   
         /* fill in data */
         data->numps = data->acpi_data.state_count;
- -      if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
+ +      if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
                 print_basics(data);
         powernow_k8_acpi_pst_values(data, 0);
   
@@@ -833,13 -884,15 +887,15 @@@ err_out_mem
   err_out:
         acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
   
-       /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+       /* data->acpi_data.state_count informs us at ->exit()
+        * whether ACPI was used */
         data->acpi_data.state_count = 0;
   
         return ret_val;
   }
   
- static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
+               struct cpufreq_frequency_table *powernow_table)
   {
         int i;
         u32 hi = 0, lo = 0;
@@@ -851,84 -904,101 +907,101 @@@
   
                 index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
                 if (index > data->max_hw_pstate) {
-                       printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
-                       printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       printk(KERN_ERR PFX "invalid pstate %d - "
+                                       "bad value %d.\n", i, index);
+                       printk(KERN_ERR PFX "Please report to BIOS "
+                                       "manufacturer\n");
+                       invalidate_entry(data, i);
                         continue;
                 }
                 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
                 if (!(hi & HW_PSTATE_VALID_MASK)) {
                         dprintk("invalid pstate %d, ignoring\n", index);
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       invalidate_entry(data, i);
                         continue;
                 }
   
                 powernow_table[i].index = index;
   
-               powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+               powernow_table[i].frequency =
+                       data->acpi_data.states[i].core_frequency * 1000;
         }
         return 0;
   }
   
- static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
+               struct cpufreq_frequency_table *powernow_table)
   {
         int i;
         int cntlofreq = 0;
+ 
         for (i = 0; i < data->acpi_data.state_count; i++) {
                 u32 fid;
                 u32 vid;
+               u32 freq, index;
+               acpi_integer status, control;
   
                 if (data->exttype) {
-                       fid = data->acpi_data.states[i].status & EXT_FID_MASK;
-                       vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+                       status =  data->acpi_data.states[i].status;
+                       fid = status & EXT_FID_MASK;
+                       vid = (status >> VID_SHIFT) & EXT_VID_MASK;
                 } else {
-                       fid = data->acpi_data.states[i].control & FID_MASK;
-                       vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+                       control =  data->acpi_data.states[i].control;
+                       fid = control & FID_MASK;
+                       vid = (control >> VID_SHIFT) & VID_MASK;
                 }
   
                 dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
   
-               powernow_table[i].index = fid; /* lower 8 bits */
-               powernow_table[i].index |= (vid << 8); /* upper 8 bits */
-               powernow_table[i].frequency = find_khz_freq_from_fid(fid);
+               index = fid | (vid<<8);
+               powernow_table[i].index = index;
+ 
+               freq = find_khz_freq_from_fid(fid);
+               powernow_table[i].frequency = freq;
   
                 /* verify frequency is OK */
-               if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
-                       (powernow_table[i].frequency < (MIN_FREQ * 1000))) {
-                       dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+               if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
+                       dprintk("invalid freq %u kHz, ignoring\n", freq);
+                       invalidate_entry(data, i);
                         continue;
                 }
   
-               /* verify voltage is OK - BIOSs are using "off" to indicate invalid */
+               /* verify voltage is OK -
+                * BIOSs are using "off" to indicate invalid */
                 if (vid == VID_OFF) {
                         dprintk("invalid vid %u, ignoring\n", vid);
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       invalidate_entry(data, i);
                         continue;
                 }
   
                 /* verify only 1 entry from the lo frequency table */
                 if (fid < HI_FID_TABLE_BOTTOM) {
                         if (cntlofreq) {
-                               /* if both entries are the same, ignore this one ... */
-                               if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
-                                   (powernow_table[i].index != powernow_table[cntlofreq].index)) {
-                                       printk(KERN_ERR PFX "Too many lo freq table entries\n");
+                               /* if both entries are the same,
+                                * ignore this one ... */
+                               if ((freq != powernow_table[cntlofreq].frequency) ||
+                                   (index != powernow_table[cntlofreq].index)) {
+                                       printk(KERN_ERR PFX
+                                               "Too many lo freq table "
+                                               "entries\n");
                                         return 1;
                                 }
   
-                               dprintk("double low frequency table entry, ignoring it.\n");
-                               powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                               dprintk("double low frequency table entry, "
+                                               "ignoring it.\n");
+                               invalidate_entry(data, i);
                                 continue;
                         } else
                                 cntlofreq = i;
                 }
   
-               if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
-                       printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
-                               powernow_table[i].frequency,
-                               (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
-                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+               if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
+                       printk(KERN_INFO PFX "invalid freq entries "
+                               "%u kHz vs. %u kHz\n", freq,
+                               (unsigned int)
+                               (data->acpi_data.states[i].core_frequency
+                                * 1000));
+                       invalidate_entry(data, i);
                         continue;
                 }
         }
@@@ -938,7 -1008,8 +1011,8 @@@
   static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
   {
         if (data->acpi_data.state_count)
-               acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+               acpi_processor_unregister_performance(&data->acpi_data,
+                               data->cpu);
         free_cpumask_var(data->acpi_data.shared_cpu_map);
   }
   
@@@ -956,15 -1027,9 +1030,9 @@@ static int get_transition_latency(struc
         return 1000 * max_latency;
   }
   
- #else
- static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
- static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
- static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
- static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
- #endif /* CONFIG_X86_POWERNOW_K8_ACPI */
- 
   /* Take a frequency, and issue the fid/vid transition command */
- static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
+ static int transition_frequency_fidvid(struct powernow_k8_data *data,
+               unsigned int index)
   {
         u32 fid = 0;
         u32 vid = 0;
@@@ -992,7 -1057,8 +1060,8 @@@
                 return 0;
         }
   
-       if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+       if ((fid < HI_FID_TABLE_BOTTOM) &&
+           (data->currfid < HI_FID_TABLE_BOTTOM)) {
                 printk(KERN_ERR PFX
                        "ignoring illegal change in lo freq table-%x to 0x%x\n",
                        data->currfid, fid);
@@@ -1020,7 -1086,8 +1089,8 @@@
   }
   
   /* Take a frequency, and issue the hardware pstate transition command */
- static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+ static int transition_frequency_pstate(struct powernow_k8_data *data,
+               unsigned int index)
   {
         u32 pstate = 0;
         int res, i;
@@@ -1032,7 -1099,8 +1102,8 @@@
         pstate = index & HW_PSTATE_MASK;
         if (pstate > data->max_hw_pstate)
                 return 0;
-       freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+       freqs.old = find_khz_freq_from_pstate(data->powernow_table,
+                       data->currpstate);
         freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
   
         for_each_cpu_mask_nr(i, *(data->available_cores)) {
@@@ -1051,7 -1119,8 +1122,8 @@@
   }
   
   /* Driver entry point to switch to the target frequency */
- static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
+ static int powernowk8_target(struct cpufreq_policy *pol,
+               unsigned targfreq, unsigned relation)
   {
         cpumask_t oldmask;
         struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
@@@ -1090,14 -1159,18 +1162,18 @@@
                 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
                 data->currfid, data->currvid);
   
-               if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+               if ((checkvid != data->currvid) ||
+                   (checkfid != data->currfid)) {
                         printk(KERN_INFO PFX
-                               "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
-                               checkfid, data->currfid, checkvid, data->currvid);
+                               "error - out of sync, fix 0x%x 0x%x, "
+                               "vid 0x%x 0x%x\n",
+                               checkfid, data->currfid,
+                               checkvid, data->currvid);
                 }
         }
   
-       if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
+       if (cpufreq_frequency_table_target(pol, data->powernow_table,
+                               targfreq, relation, &newstate))
                 goto err_out;
   
         mutex_lock(&fidvid_mutex);
@@@ -1117,7 -1190,8 +1193,8 @@@
         mutex_unlock(&fidvid_mutex);
   
         if (cpu_family == CPU_HW_PSTATE)
-               pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate);
+               pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+                               newstate);
         else
                 pol->cur = find_khz_freq_from_fid(data->currfid);
         ret = 0;
@@@ -1144,6 -1218,7 +1221,7 @@@ static int __cpuinit powernowk8_cpu_ini
         struct powernow_k8_data *data;
         cpumask_t oldmask;
         int rc;
+       static int print_once;
   
         if (!cpu_online(pol->cpu))
                 return -ENODEV;
@@@ -1166,33 -1241,31 +1244,31 @@@
                  * an UP version, and is deprecated by AMD.
                  */
                 if (num_online_cpus() != 1) {
- #ifndef CONFIG_ACPI_PROCESSOR
-                       printk(KERN_ERR PFX "ACPI Processor support is required "
-                              "for SMP systems but is absent. Please load the "
-                              "ACPI Processor module before starting this "
-                              "driver.\n");
- #else
-                       printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide"
-                              " ACPI _PSS objects in a way that Linux "
-                              "understands. Please report this to the Linux "
-                              "ACPI maintainers and complain to your BIOS "
-                              "vendor.\n");
- #endif
-                       kfree(data);
-                       return -ENODEV;
+                       /*
+                        * Replace this one with print_once as soon as such a
+                        * thing gets introduced
+                        */
+                       if (!print_once) {
+                               WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
+                                       "does not provide ACPI _PSS objects "
+                                       "in a way that Linux understands. "
+                                       "Please report this to the Linux ACPI"
+                                       " maintainers and complain to your "
+                                       "BIOS vendor.\n");
+                               print_once++;
+                       }
+                       goto err_out;
                 }
                 if (pol->cpu != 0) {
                         printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
                                "CPU other than CPU0. Complain to your BIOS "
                                "vendor.\n");
-                       kfree(data);
-                       return -ENODEV;
+                       goto err_out;
                 }
                 rc = find_psb_table(data);
-               if (rc) {
-                       kfree(data);
-                       return -ENODEV;
-               }
+               if (rc)
+                       goto err_out;
+ 
                 /* Take a crude guess here.
                  * That guess was in microseconds, so multiply with 1000 */
                 pol->cpuinfo.transition_latency = (
@@@ -1207,16 -1280,16 +1283,16 @@@
   
         if (smp_processor_id() != pol->cpu) {
                 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-               goto err_out;
+               goto err_out_unmask;
         }
   
         if (pending_bit_stuck()) {
                 printk(KERN_ERR PFX "failing init, change pending bit set\n");
-               goto err_out;
+               goto err_out_unmask;
         }
   
         if (query_current_values_with_pending_wait(data))
-               goto err_out;
+               goto err_out_unmask;
   
         if (cpu_family == CPU_OPTERON)
                 fidvid_msr_init();
@@@ -1227,11 -1300,12 +1303,12 @@@
         if (cpu_family == CPU_HW_PSTATE)
                 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
         else
- -              cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
+ +              cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
         data->available_cores = pol->cpus;
   
         if (cpu_family == CPU_HW_PSTATE)
-               pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+               pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+                               data->currpstate);
         else
                 pol->cur = find_khz_freq_from_fid(data->currfid);
         dprintk("policy current frequency %d kHz\n", pol->cur);
@@@ -1248,7 -1322,8 +1325,8 @@@
         cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
   
         if (cpu_family == CPU_HW_PSTATE)
-               dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate);
+               dprintk("cpu_init done, current pstate 0x%x\n",
+                               data->currpstate);
         else
                 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
                         data->currfid, data->currvid);
@@@ -1257,15 -1332,16 +1335,16 @@@
   
         return 0;
   
- err_out:
+ err_out_unmask:
         set_cpus_allowed_ptr(current, &oldmask);
         powernow_k8_cpu_exit_acpi(data);
   
+ err_out:
         kfree(data);
         return -ENODEV;
   }
   
- static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
+ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
   {
         struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
   
@@@ -1282,14 -1358,14 +1361,14 @@@
         return 0;
   }
   
- static unsigned int powernowk8_get (unsigned int cpu)
+ static unsigned int powernowk8_get(unsigned int cpu)
   {
         struct powernow_k8_data *data;
         cpumask_t oldmask = current->cpus_allowed;
         unsigned int khz = 0;
         unsigned int first;
   
- -      first = first_cpu(per_cpu(cpu_core_map, cpu));
+ +      first = cpumask_first(cpu_core_mask(cpu));
         data = per_cpu(powernow_data, first);
   
         if (!data)
@@@ -1318,7 -1394,7 +1397,7 @@@ out
         return khz;
   }
   
- static struct freq_attr* powernow_k8_attr[] = {
+ static struct freq_attr *powernow_k8_attr[] = {
         &cpufreq_freq_attr_scaling_available_freqs,
         NULL,
   };
@@@ -1363,7 -1439,8 +1442,8 @@@ static void __exit powernowk8_exit(void
         cpufreq_unregister_driver(&cpufreq_amd64_driver);
   }
   
- MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
+ MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
+               "Mark Langsdorf <mark.langsdorf@amd.com>");
   MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
   MODULE_LICENSE("GPL");
   
diff --combined arch/x86/kernel/cpu/cpufreq/speedstep-ich.c

index 1f0ec83d343b76e62db2de80bcb7b306a786b038,8bbb11adb3157d1e0d436910dd6b2423cec025ef..016c1a4fa3fc5a88d8ab9950da8d4a92591894e1
--- 1/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
--- 2/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@@ -39,7 -39,7 +39,7 @@@ static struct pci_dev *speedstep_chipse
   
   /* speedstep_processor
    */
- static unsigned int speedstep_processor = 0;
+ static unsigned int speedstep_processor;
   
   static u32 pmbase;
   
@@@ -54,7 -54,8 +54,8 @@@ static struct cpufreq_frequency_table s
   };
   
   
- #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
+ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+               "speedstep-ich", msg)
   
   
   /**
@@@ -62,7 -63,7 +63,7 @@@
    *
    * Returns: -ENODEV if no register could be found
    */
- static int speedstep_find_register (void)
+ static int speedstep_find_register(void)
   {
         if (!speedstep_chipset_dev)
                 return -ENODEV;
@@@ -90,7 -91,7 +91,7 @@@
    *
    *   Tries to change the SpeedStep state.
    */
- static void speedstep_set_state (unsigned int state)
+ static void speedstep_set_state(unsigned int state)
   {
         u8 pm2_blk;
         u8 value;
@@@ -133,11 -134,11 +134,11 @@@
   
         dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
   
-       if (state == (value & 0x1)) {
-               dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
-       } else {
-               printk (KERN_ERR "cpufreq: change failed - I/O error\n");
-       }
+       if (state == (value & 0x1))
+               dprintk("change to %u MHz succeeded\n",
+                       speedstep_get_frequency(speedstep_processor) / 1000);
+       else
+               printk(KERN_ERR "cpufreq: change failed - I/O error\n");
   
         return;
   }
@@@ -149,7 -150,7 +150,7 @@@
    *   Tries to activate the SpeedStep status and control registers.
    * Returns -EINVAL on an unsupported chipset, and zero on success.
    */
- static int speedstep_activate (void)
+ static int speedstep_activate(void)
   {
         u16 value = 0;
   
@@@ -175,20 -176,18 +176,18 @@@
    * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
    * chipset, or zero on failure.
    */
- static unsigned int speedstep_detect_chipset (void)
+ static unsigned int speedstep_detect_chipset(void)
   {
         speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                               PCI_DEVICE_ID_INTEL_82801DB_12,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                               NULL);
         if (speedstep_chipset_dev)
                 return 4; /* 4-M */
   
         speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                               PCI_DEVICE_ID_INTEL_82801CA_12,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                               NULL);
         if (speedstep_chipset_dev)
                 return 3; /* 3-M */
@@@ -196,8 -195,7 +195,7 @@@
   
         speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                               PCI_DEVICE_ID_INTEL_82801BA_10,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                               NULL);
         if (speedstep_chipset_dev) {
                 /* speedstep.c causes lockups on Dell Inspirons 8000 and
@@@ -208,8 -206,7 +206,7 @@@
   
                 hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
                               PCI_DEVICE_ID_INTEL_82815_MC,
-                             PCI_ANY_ID,
-                             PCI_ANY_ID,
+                             PCI_ANY_ID, PCI_ANY_ID,
                               NULL);
   
                 if (!hostbridge)
@@@ -236,7 -233,7 +233,7 @@@ static unsigned int _speedstep_get(cons
   
         cpus_allowed = current->cpus_allowed;
         set_cpus_allowed_ptr(current, cpus);
-       speed = speedstep_get_processor_frequency(speedstep_processor);
+       speed = speedstep_get_frequency(speedstep_processor);
         set_cpus_allowed_ptr(current, &cpus_allowed);
         dprintk("detected %u kHz as current frequency\n", speed);
         return speed;
@@@ -251,11 -248,12 +248,12 @@@ static unsigned int speedstep_get(unsig
    * speedstep_target - set a new CPUFreq policy
    * @policy: new policy
    * @target_freq: the target frequency
-  * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+  * @relation: how that frequency relates to achieved frequency
+  *    (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
    *
    * Sets a new CPUFreq policy.
    */
- static int speedstep_target (struct cpufreq_policy *policy,
+ static int speedstep_target(struct cpufreq_policy *policy,
                              unsigned int target_freq,
                              unsigned int relation)
   {
@@@ -264,7 -262,8 +262,8 @@@
         cpumask_t cpus_allowed;
         int i;
   
-       if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+       if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+                               target_freq, relation, &newstate))
                 return -EINVAL;
   
         freqs.old = _speedstep_get(policy->cpus);
@@@ -308,7 -307,7 +307,7 @@@
    * Limit must be within speedstep_low_freq and speedstep_high_freq, with
    * at least one border included.
    */
- static int speedstep_verify (struct cpufreq_policy *policy)
+ static int speedstep_verify(struct cpufreq_policy *policy)
   {
         return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
   }
@@@ -322,7 -321,7 +321,7 @@@ static int speedstep_cpu_init(struct cp
   
         /* only run on CPU to be set, or on its sibling */
   #ifdef CONFIG_SMP
- -      cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
+ +      cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
   #endif
   
         cpus_allowed = current->cpus_allowed;
@@@ -344,7 -343,8 +343,8 @@@
                 return -EIO;
   
         dprintk("currently at %s speed setting - %i MHz\n",
-               (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+               (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+               ? "low" : "high",
                 (speed / 1000));
   
         /* cpuinfo and default policy values */
@@@ -352,9 -352,9 +352,9 @@@
   
         result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
         if (result)
-               return (result);
+               return result;
   
-         cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+       cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
   
         return 0;
   }
@@@ -366,7 -366,7 +366,7 @@@ static int speedstep_cpu_exit(struct cp
         return 0;
   }
   
- static struct freq_attr* speedstep_attr[] = {
+ static struct freq_attr *speedstep_attr[] = {
         &cpufreq_freq_attr_scaling_available_freqs,
         NULL,
   };
@@@ -396,13 -396,15 +396,15 @@@ static int __init speedstep_init(void
         /* detect processor */
         speedstep_processor = speedstep_detect_processor();
         if (!speedstep_processor) {
-               dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
+               dprintk("Intel(R) SpeedStep(TM) capable processor "
+                               "not found\n");
                 return -ENODEV;
         }
   
         /* detect chipset */
         if (!speedstep_detect_chipset()) {
-               dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
+               dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+                               "(yet) available.\n");
                 return -ENODEV;
         }
   
@@@ -431,9 -433,11 +433,11 @@@ static void __exit speedstep_exit(void
   }
   
   
- MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
- MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
- MODULE_LICENSE ("GPL");
+ MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
+               "Dominik Brodowski <linux@brodo.de>");
+ MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
+               "with ICH-M southbridges.");
+ MODULE_LICENSE("GPL");
   
   module_init(speedstep_init);
   module_exit(speedstep_exit);
diff --combined arch/x86/kernel/cpu/intel_cacheinfo.c

index 8e6ce2c146d65559a11bbf4b43bd210c5c909cac,c471eb1a389cc02c4f788d5a828f3f4bfe56c93d..483eda96e102062b23f3e29820d911a9c7d6ab59
--- 1/arch/x86/kernel/cpu/intel_cacheinfo.c
--- 2/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@@ -32,7 -32,7 +32,7 @@@ struct _cache_tabl
   };
   
   /* all the cache descriptor types we care about (no TLB or trace cache entries) */
- static struct _cache_table cache_table[] __cpuinitdata =
+ static const struct _cache_table __cpuinitconst cache_table[] =
   {
         { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
         { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
@@@ -159,7 -159,7 +159,7 @@@ struct _cpuid4_info_regs 
         unsigned long can_disable;
   };
   
- -#ifdef CONFIG_PCI
+ +#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
   static struct pci_device_id k8_nb_id[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@@ -206,15 -206,15 +206,15 @@@ union l3_cache 
         unsigned val;
   };
   
- static unsigned short assocs[] __cpuinitdata = {
+ static const unsigned short __cpuinitconst assocs[] = {
         [1] = 1, [2] = 2, [4] = 4, [6] = 8,
         [8] = 16, [0xa] = 32, [0xb] = 48,
         [0xc] = 64,
         [0xf] = 0xffff // ??
   };
   
- static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
- static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
+ static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
+ static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
   
   static void __cpuinit
   amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
@@@ -324,6 -324,15 +324,6 @@@ __cpuinit cpuid4_cache_lookup_regs(int 
         return 0;
   }
   
- -static int
- -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
- -{
- -      struct _cpuid4_info_regs *leaf_regs =
- -              (struct _cpuid4_info_regs *)this_leaf;
- -
- -      return cpuid4_cache_lookup_regs(index, leaf_regs);
- -}
- -
   static int __cpuinit find_num_cache_leaves(void)
   {
         unsigned int            eax, ebx, ecx, edx;
@@@ -499,8 -508,6 +499,8 @@@ unsigned int __cpuinit init_intel_cache
         return l2;
   }
   
+ +#ifdef CONFIG_SYSFS
+ +
   /* pointer to _cpuid4_info array (for each cache leaf) */
   static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
   #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
@@@ -564,15 -571,6 +564,15 @@@ static void __cpuinit free_cache_attrib
         per_cpu(cpuid4_info, cpu) = NULL;
   }
   
+ +static int
+ +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+ +{
+ +      struct _cpuid4_info_regs *leaf_regs =
+ +              (struct _cpuid4_info_regs *)this_leaf;
+ +
+ +      return cpuid4_cache_lookup_regs(index, leaf_regs);
+ +}
+ +
   static void __cpuinit get_cpu_leaves(void *_retval)
   {
         int j, *retval = _retval, cpu = smp_processor_id();
@@@ -614,6 -612,8 +614,6 @@@ static int __cpuinit detect_cache_attri
         return retval;
   }
   
- -#ifdef CONFIG_SYSFS
- -
   #include <linux/kobject.h>
   #include <linux/sysfs.h>
   
diff --combined arch/x86/kernel/cpu/mcheck/mce_amd_64.c

index 1f429ee3477d7c9c10617c8c33c03e6e1604c185,7d01be868870d1a7922c7ec18fc7a8b666ddb25b..56dde9c4bc96b8fe3202a2051a500efe52853b0f
--- 1/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- 2/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@@ -92,7 -92,8 +92,8 @@@ struct thresh_restart 
   };
   
   /* must be called with correct cpu affinity */
- static long threshold_restart_bank(void *_tr)
+ /* Called via smp_call_function_single() */
+ static void threshold_restart_bank(void *_tr)
   {
         struct thresh_restart *tr = _tr;
         u32 mci_misc_hi, mci_misc_lo;
@@@ -119,7 -120,6 +120,6 @@@
   
         mci_misc_hi |= MASK_COUNT_EN_HI;
         wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
-       return 0;
   }
   
   /* cpu init entry point, called from mce.c with preempt off */
@@@ -279,7 -279,7 +279,7 @@@ static ssize_t store_interrupt_enable(s
         tr.b = b;
         tr.reset = 0;
         tr.old_limit = 0;
-       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+       smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
   
         return end - buf;
   }
@@@ -301,23 -301,32 +301,32 @@@ static ssize_t store_threshold_limit(st
         tr.b = b;
         tr.reset = 0;
   
-       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+       smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
   
         return end - buf;
   }
   
- static long local_error_count(void *_b)
+ struct threshold_block_cross_cpu {
+       struct threshold_block *tb;
+       long retval;
+ };
+ 
+ static void local_error_count_handler(void *_tbcc)
   {
-       struct threshold_block *b = _b;
+       struct threshold_block_cross_cpu *tbcc = _tbcc;
+       struct threshold_block *b = tbcc->tb;
         u32 low, high;
   
         rdmsr(b->address, low, high);
-       return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+       tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
   }
   
   static ssize_t show_error_count(struct threshold_block *b, char *buf)
   {
-       return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
+       struct threshold_block_cross_cpu tbcc = { .tb = b, };
+ 
+       smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
+       return sprintf(buf, "%lx\n", tbcc.retval);
   }
   
   static ssize_t store_error_count(struct threshold_block *b,
@@@ -325,7 -334,7 +334,7 @@@
   {
         struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
   
-       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+       smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
         return 1;
   }
   
@@@ -394,7 -403,7 +403,7 @@@ static __cpuinit int allocate_threshold
         if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
                 return 0;
   
-       if (rdmsr_safe(address, &low, &high))
+       if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
                 return 0;
   
         if (!(high & MASK_VALID_HI)) {
@@@ -458,12 -467,11 +467,11 @@@ out_free
         return err;
   }
   
- static __cpuinit long local_allocate_threshold_blocks(void *_bank)
+ static __cpuinit long
+ local_allocate_threshold_blocks(int cpu, unsigned int bank)
   {
-       unsigned int *bank = _bank;
- 
-       return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
-                                        MSR_IA32_MC0_MISC + *bank * 4);
+       return allocate_threshold_blocks(cpu, bank, 0,
+                                        MSR_IA32_MC0_MISC + bank * 4);
   }
   
   /* symlinks sibling shared banks to first core.  first core owns dir/files. */
@@@ -477,7 -485,7 +485,7 @@@ static __cpuinit int threshold_create_b
   
   #ifdef CONFIG_SMP
         if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {   /* symlink */
- -              i = cpumask_first(&per_cpu(cpu_core_map, cpu));
+ +              i = cpumask_first(cpu_core_mask(cpu));
   
                 /* first core not up yet */
                 if (cpu_data(i).cpu_core_id)
@@@ -497,7 -505,7 +505,7 @@@
                 if (err)
                         goto out;
   
- -              cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
+ +              cpumask_copy(b->cpus, cpu_core_mask(cpu));
                 per_cpu(threshold_banks, cpu)[bank] = b;
                 goto out;
         }
@@@ -521,12 -529,12 +529,12 @@@
   #ifndef CONFIG_SMP
         cpumask_setall(b->cpus);
   #else
- -      cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
+ +      cpumask_copy(b->cpus, cpu_core_mask(cpu));
   #endif
   
         per_cpu(threshold_banks, cpu)[bank] = b;
   
-       err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
+       err = local_allocate_threshold_blocks(cpu, bank);
         if (err)
                 goto out_free;
   
diff --combined arch/x86/kernel/cpu/mcheck/mce_intel_64.c

index 96b2a85545aa8d6940775d54b191edce0aacc7dc,57df3d383470bb158a7d64bccb7948d45ec9e4ac..d6b72df89d697cb6b809cc1e3c68540d28cfe1f3
--- 1/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
--- 2/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@@ -249,7 -249,7 +249,7 @@@ void cmci_rediscover(int dying
         for_each_online_cpu (cpu) {
                 if (cpu == dying)
                         continue;
- -              if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
+ +              if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
                         continue;
                 /* Recheck banks in case CPUs don't all have the same */
                 if (cmci_supported(&banks))
@@@ -270,7 -270,7 +270,7 @@@ void cmci_reenable(void
                 cmci_discover(banks, 0);
   }
   
- static __cpuinit void intel_init_cmci(void)
+ static void intel_init_cmci(void)
   {
         int banks;
   
diff --combined arch/x86/kernel/process.c

index 78533a519d8f48e38d00d81bef94a87313c34249,156f87582c6cd5e0fcb677b7c685d92c68443d72..25e28087a3ee628e31569206868de49e183360d3
--- 1/arch/x86/kernel/process.c
--- 2/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@@ -65,11 -65,11 +65,11 @@@ void exit_thread(void
   {
         struct task_struct *me = current;
         struct thread_struct *t = &me->thread;
+       unsigned long *bp = t->io_bitmap_ptr;
   
-       if (me->thread.io_bitmap_ptr) {
+       if (bp) {
                 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
   
-               kfree(t->io_bitmap_ptr);
                 t->io_bitmap_ptr = NULL;
                 clear_thread_flag(TIF_IO_BITMAP);
                 /*
@@@ -78,6 -78,7 +78,7 @@@
                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
                 t->io_bitmap_max = 0;
                 put_cpu();
+               kfree(bp);
         }
   
         ds_exit_thread(current);
@@@ -324,7 -325,7 +325,7 @@@ void stop_this_cpu(void *dummy
         /*
          * Remove this CPU:
          */
- -      cpu_clear(smp_processor_id(), cpu_online_map);
+ +      set_cpu_online(smp_processor_id(), false);
         disable_local_APIC();
   
         for (;;) {
@@@ -474,13 -475,12 +475,13 @@@ static int __cpuinit check_c1e_idle(con
         return 1;
   }
   
- -static cpumask_t c1e_mask = CPU_MASK_NONE;
+ +static cpumask_var_t c1e_mask;
   static int c1e_detected;
   
   void c1e_remove_cpu(int cpu)
   {
- -      cpu_clear(cpu, c1e_mask);
+ +      if (c1e_mask != NULL)
+ +              cpumask_clear_cpu(cpu, c1e_mask);
   }
   
   /*
@@@ -509,8 -509,8 +510,8 @@@ static void c1e_idle(void
         if (c1e_detected) {
                 int cpu = smp_processor_id();
   
- -              if (!cpu_isset(cpu, c1e_mask)) {
- -                      cpu_set(cpu, c1e_mask);
+ +              if (!cpumask_test_cpu(cpu, c1e_mask)) {
+ +                      cpumask_set_cpu(cpu, c1e_mask);
                         /*
                          * Force broadcast so ACPI can not interfere. Needs
                          * to run with interrupts enabled as it uses
@@@ -562,15 -562,6 +563,15 @@@ void __cpuinit select_idle_routine(cons
                 pm_idle = default_idle;
   }
   
+ +void __init init_c1e_mask(void)
+ +{
+ +      /* If we're using c1e_idle, we need to allocate c1e_mask. */
+ +      if (pm_idle == c1e_idle) {
+ +              alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
+ +              cpumask_clear(c1e_mask);
+ +      }
+ +}
+ +
   static int __init idle_setup(char *str)
   {
         if (!str)
diff --combined arch/x86/kernel/tlb_uv.c

index 8afb69180c9bb2bf49b87bfad9dcb3b599b31656,79c073247284cbdb334cf2a03b4e7ffaae8d0bcd..deb5ebb32c3b71a8a698df7b68ae5447e12f0cf8
--- 1/arch/x86/kernel/tlb_uv.c
--- 2/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@@ -275,8 -275,6 +275,8 @@@ const struct cpumask *uv_flush_send_and
         return NULL;
   }
   
+ +static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
+ +
   /**
    * uv_flush_tlb_others - globally purge translation cache of a virtual
    * address or all TLB's
@@@ -306,7 -304,8 +306,7 @@@ const struct cpumask *uv_flush_tlb_othe
                                           struct mm_struct *mm,
                                           unsigned long va, unsigned int cpu)
   {
- -      static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
- -      struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
+ +      struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
         int i;
         int bit;
         int blade;
@@@ -751,20 -750,17 +751,21 @@@ static int __init uv_bau_init(void
         int node;
         int nblades;
         int last_blade;
-       int cur_cpu = 0;
+       int cur_cpu;
   
         if (!is_uv_system())
                 return 0;
   
+ +      for_each_possible_cpu(cur_cpu)
+ +              alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+ +                                     GFP_KERNEL, cpu_to_node(cur_cpu));
+ +
         uv_bau_retry_limit = 1;
         uv_nshift = uv_hub_info->n_val;
         uv_mmask = (1UL << uv_hub_info->n_val) - 1;
         nblades = 0;
         last_blade = -1;
+       cur_cpu = 0;
         for_each_online_node(node) {
                 blade = uv_node_to_blade_id(node);
                 if (blade == last_blade)
diff --combined kernel/sched.c

index 11dd52780adb556a5d825174620bac18a095fca3,5757e03cfac0bdf7cd50f3625a318645c562b973..0ad7c28b7236e1f9e9ea4226849fb9a94863bf76
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -331,6 -331,13 +331,13 @@@ static DEFINE_PER_CPU(struct rt_rq, ini
    */
   static DEFINE_SPINLOCK(task_group_lock);
   
+ #ifdef CONFIG_SMP
+ static int root_task_group_empty(void)
+ {
+       return list_empty(&root_task_group.children);
+ }
+ #endif
+ 
   #ifdef CONFIG_FAIR_GROUP_SCHED
   #ifdef CONFIG_USER_SCHED
   # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
@@@ -391,6 -398,13 +398,13 @@@ static inline void set_task_rq(struct t
   
   #else
   
+ #ifdef CONFIG_SMP
+ static int root_task_group_empty(void)
+ {
+       return 1;
+ }
+ #endif
+ 
   static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
   static inline struct task_group *task_group(struct task_struct *p)
   {
@@@ -467,11 -481,17 +481,17 @@@ struct rt_rq 
         struct rt_prio_array active;
         unsigned long rt_nr_running;
   #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-       int highest_prio; /* highest queued rt task prio */
+       struct {
+               int curr; /* highest queued rt task prio */
+ #ifdef CONFIG_SMP
+               int next; /* next highest */
+ #endif
+       } highest_prio;
   #endif
   #ifdef CONFIG_SMP
         unsigned long rt_nr_migratory;
         int overloaded;
+       struct plist_head pushable_tasks;
   #endif
         int rt_throttled;
         u64 rt_time;
@@@ -549,7 -569,6 +569,6 @@@ struct rq 
         unsigned long nr_running;
         #define CPU_LOAD_IDX_MAX 5
         unsigned long cpu_load[CPU_LOAD_IDX_MAX];
-       unsigned char idle_at_tick;
   #ifdef CONFIG_NO_HZ
         unsigned long last_tick_seen;
         unsigned char in_nohz_recently;
@@@ -590,6 -609,7 +609,7 @@@
         struct root_domain *rd;
         struct sched_domain *sd;
   
+       unsigned char idle_at_tick;
         /* For active balancing */
         int active_balance;
         int push_cpu;
@@@ -618,9 -638,6 +638,6 @@@
         /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
   
         /* sys_sched_yield() stats */
-       unsigned int yld_exp_empty;
-       unsigned int yld_act_empty;
-       unsigned int yld_both_empty;
         unsigned int yld_count;
   
         /* schedule() stats */
@@@ -1183,10 -1200,10 +1200,10 @@@ static void resched_task(struct task_st
   
         assert_spin_locked(&task_rq(p)->lock);
   
-       if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+       if (test_tsk_need_resched(p))
                 return;
   
-       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+       set_tsk_need_resched(p);
   
         cpu = task_cpu(p);
         if (cpu == smp_processor_id())
@@@ -1242,7 -1259,7 +1259,7 @@@ void wake_up_idle_cpu(int cpu
          * lockless. The worst case is that the other CPU runs the
          * idle task through an additional NOOP schedule()
          */
-       set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
+       set_tsk_need_resched(rq->idle);
   
         /* NEED_RESCHED must be visible before we test polling */
         smp_mb();
@@@ -1610,21 -1627,42 +1627,42 @@@ static inline void update_shares_locked
   
   #endif
   
+ #ifdef CONFIG_PREEMPT
+ 
   /*
-  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+  * fair double_lock_balance: Safely acquires both rq->locks in a fair
+  * way at the expense of forcing extra atomic operations in all
+  * invocations.  This assures that the double_lock is acquired using the
+  * same underlying policy as the spinlock_t on this architecture, which
+  * reduces latency compared to the unfair variant below.  However, it
+  * also adds more overhead and therefore may reduce throughput.
    */
- static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(this_rq->lock)
+       __acquires(busiest->lock)
+       __acquires(this_rq->lock)
+ {
+       spin_unlock(&this_rq->lock);
+       double_rq_lock(this_rq, busiest);
+ 
+       return 1;
+ }
+ 
+ #else
+ /*
+  * Unfair double_lock_balance: Optimizes throughput at the expense of
+  * latency by eliminating extra atomic operations when the locks are
+  * already in proper order on entry.  This favors lower cpu-ids and will
+  * grant the double lock to lower cpus over higher ids under contention,
+  * regardless of entry order into the function.
+  */
+ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
         __releases(this_rq->lock)
         __acquires(busiest->lock)
         __acquires(this_rq->lock)
   {
         int ret = 0;
   
-       if (unlikely(!irqs_disabled())) {
-               /* printk() doesn't work good under rq->lock */
-               spin_unlock(&this_rq->lock);
-               BUG_ON(1);
-       }
         if (unlikely(!spin_trylock(&busiest->lock))) {
                 if (busiest < this_rq) {
                         spin_unlock(&this_rq->lock);
@@@ -1637,6 -1675,22 +1675,22 @@@
         return ret;
   }
   
+ #endif /* CONFIG_PREEMPT */
+ 
+ /*
+  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+  */
+ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ {
+       if (unlikely(!irqs_disabled())) {
+               /* printk() doesn't work good under rq->lock */
+               spin_unlock(&this_rq->lock);
+               BUG_ON(1);
+       }
+ 
+       return _double_lock_balance(this_rq, busiest);
+ }
+ 
   static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
         __releases(busiest->lock)
   {
@@@ -1705,6 -1759,9 +1759,9 @@@ static void update_avg(u64 *avg, u64 sa
   
   static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
   {
+       if (wakeup)
+               p->se.start_runtime = p->se.sum_exec_runtime;
+ 
         sched_info_queued(p);
         p->sched_class->enqueue_task(rq, p, wakeup);
         p->se.on_rq = 1;
@@@ -1712,10 -1769,15 +1769,15 @@@
   
   static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
   {
-       if (sleep && p->se.last_wakeup) {
-               update_avg(&p->se.avg_overlap,
-                          p->se.sum_exec_runtime - p->se.last_wakeup);
-               p->se.last_wakeup = 0;
+       if (sleep) {
+               if (p->se.last_wakeup) {
+                       update_avg(&p->se.avg_overlap,
+                               p->se.sum_exec_runtime - p->se.last_wakeup);
+                       p->se.last_wakeup = 0;
+               } else {
+                       update_avg(&p->se.avg_wakeup,
+                               sysctl_sched_wakeup_granularity);
+               }
         }
   
         sched_info_dequeued(p);
@@@ -2017,7 -2079,7 +2079,7 @@@ unsigned long wait_task_inactive(struc
                  * it must be off the runqueue _entirely_, and not
                  * preempted!
                  *
-                * So if it wa still runnable (but just not actively
+                * So if it was still runnable (but just not actively
                  * running right now), it's preempted, and we should
                  * yield - it could be a while.
                  */
@@@ -2267,7 -2329,7 +2329,7 @@@ static int try_to_wake_up(struct task_s
                 sync = 0;
   
   #ifdef CONFIG_SMP
-       if (sched_feat(LB_WAKEUP_UPDATE)) {
+       if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
                 struct sched_domain *sd;
   
                 this_cpu = raw_smp_processor_id();
@@@ -2345,6 -2407,22 +2407,22 @@@ out_activate
         activate_task(rq, p, 1);
         success = 1;
   
+       /*
+        * Only attribute actual wakeups done by this task.
+        */
+       if (!in_interrupt()) {
+               struct sched_entity *se = &current->se;
+               u64 sample = se->sum_exec_runtime;
+ 
+               if (se->last_wakeup)
+                       sample -= se->last_wakeup;
+               else
+                       sample -= se->start_runtime;
+               update_avg(&se->avg_wakeup, sample);
+ 
+               se->last_wakeup = se->sum_exec_runtime;
+       }
+ 
   out_running:
         trace_sched_wakeup(rq, p, success);
         check_preempt_curr(rq, p, sync);
@@@ -2355,8 -2433,6 +2433,6 @@@
                 p->sched_class->task_wake_up(rq, p);
   #endif
   out:
-       current->se.last_wakeup = current->se.sum_exec_runtime;
- 
         task_rq_unlock(rq, &flags);
   
         return success;
@@@ -2386,6 -2462,8 +2462,8 @@@ static void __sched_fork(struct task_st
         p->se.prev_sum_exec_runtime     = 0;
         p->se.last_wakeup               = 0;
         p->se.avg_overlap               = 0;
+       p->se.start_runtime             = 0;
+       p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
   
   #ifdef CONFIG_SCHEDSTATS
         p->se.wait_start                = 0;
@@@ -2448,6 -2526,8 +2526,8 @@@ void sched_fork(struct task_struct *p, 
         /* Want to start with kernel preemption disabled. */
         task_thread_info(p)->preempt_count = 1;
   #endif
+       plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ 
         put_cpu();
   }
   
@@@ -2491,7 -2571,7 +2571,7 @@@ void wake_up_new_task(struct task_struc
   #ifdef CONFIG_PREEMPT_NOTIFIERS
   
   /**
-  * preempt_notifier_register - tell me when current is being being preempted & rescheduled
+  * preempt_notifier_register - tell me when current is being preempted & rescheduled
    * @notifier: notifier struct to register
    */
   void preempt_notifier_register(struct preempt_notifier *notifier)
@@@ -2588,6 -2668,12 +2668,12 @@@ static void finish_task_switch(struct r
   {
         struct mm_struct *mm = rq->prev_mm;
         long prev_state;
+ #ifdef CONFIG_SMP
+       int post_schedule = 0;
+ 
+       if (current->sched_class->needs_post_schedule)
+               post_schedule = current->sched_class->needs_post_schedule(rq);
+ #endif
   
         rq->prev_mm = NULL;
   
@@@ -2606,7 -2692,7 +2692,7 @@@
         finish_arch_switch(prev);
         finish_lock_switch(rq, prev);
   #ifdef CONFIG_SMP
-       if (current->sched_class->post_schedule)
+       if (post_schedule)
                 current->sched_class->post_schedule(rq);
   #endif
   
@@@ -2913,6 -2999,7 +2999,7 @@@ int can_migrate_task(struct task_struc
                      struct sched_domain *sd, enum cpu_idle_type idle,
                      int *all_pinned)
   {
+       int tsk_cache_hot = 0;
         /*
          * We do not migrate tasks that are:
          * 1) running (obviously), or
@@@ -2936,10 -3023,11 +3023,11 @@@
          * 2) too many balance attempts have failed.
          */
   
-       if (!task_hot(p, rq->clock, sd) ||
-                       sd->nr_balance_failed > sd->cache_nice_tries) {
+       tsk_cache_hot = task_hot(p, rq->clock, sd);
+       if (!tsk_cache_hot ||
+               sd->nr_balance_failed > sd->cache_nice_tries) {
   #ifdef CONFIG_SCHEDSTATS
-               if (task_hot(p, rq->clock, sd)) {
+               if (tsk_cache_hot) {
                         schedstat_inc(sd, lb_hot_gained[idle]);
                         schedstat_inc(p, se.nr_forced_migrations);
                 }
@@@ -2947,7 -3035,7 +3035,7 @@@
                 return 1;
         }
   
-       if (task_hot(p, rq->clock, sd)) {
+       if (tsk_cache_hot) {
                 schedstat_inc(p, se.nr_failed_migrations_hot);
                 return 0;
         }
@@@ -2987,6 -3075,16 +3075,16 @@@ next
         pulled++;
         rem_load_move -= p->se.load.weight;
   
+ #ifdef CONFIG_PREEMPT
+       /*
+        * NEWIDLE balancing is a source of latency, so preemptible kernels
+        * will stop after the first task is pulled to minimize the critical
+        * section.
+        */
+       if (idle == CPU_NEWLY_IDLE)
+               goto out;
+ #endif
+ 
         /*
          * We only want to steal up to the prescribed amount of weighted load.
          */
@@@ -3033,9 -3131,15 +3131,15 @@@ static int move_tasks(struct rq *this_r
                                 sd, idle, all_pinned, &this_best_prio);
                 class = class->next;
   
+ #ifdef CONFIG_PREEMPT
+               /*
+                * NEWIDLE balancing is a source of latency, so preemptible
+                * kernels will stop after the first task is pulled to minimize
+                * the critical section.
+                */
                 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
                         break;
- 
+ #endif
         } while (class && max_load_move > total_load_moved);
   
         return total_load_moved > 0;
@@@ -3085,246 -3189,480 +3189,480 @@@ static int move_one_task(struct rq *thi
   
         return 0;
   }
- 
+ /********** Helpers for find_busiest_group ************************/
   /*
-  * find_busiest_group finds and returns the busiest CPU group within the
-  * domain. It calculates and returns the amount of weighted load which
-  * should be moved to restore balance via the imbalance parameter.
+  * sd_lb_stats - Structure to store the statistics of a sched_domain
+  *            during load balancing.
    */
- static struct sched_group *
- find_busiest_group(struct sched_domain *sd, int this_cpu,
-                  unsigned long *imbalance, enum cpu_idle_type idle,
-                  int *sd_idle, const struct cpumask *cpus, int *balance)
- {
-       struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
-       unsigned long max_load, avg_load, total_load, this_load, total_pwr;
-       unsigned long max_pull;
-       unsigned long busiest_load_per_task, busiest_nr_running;
-       unsigned long this_load_per_task, this_nr_running;
-       int load_idx, group_imb = 0;
+ struct sd_lb_stats {
+       struct sched_group *busiest; /* Busiest group in this sd */
+       struct sched_group *this;  /* Local group in this sd */
+       unsigned long total_load;  /* Total load of all groups in sd */
+       unsigned long total_pwr;   /*   Total power of all groups in sd */
+       unsigned long avg_load;    /* Average load across all groups in sd */
+ 
+       /** Statistics of this group */
+       unsigned long this_load;
+       unsigned long this_load_per_task;
+       unsigned long this_nr_running;
+ 
+       /* Statistics of the busiest group */
+       unsigned long max_load;
+       unsigned long busiest_load_per_task;
+       unsigned long busiest_nr_running;
+ 
+       int group_imb; /* Is there imbalance in this sd */
   #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-       int power_savings_balance = 1;
-       unsigned long leader_nr_running = 0, min_load_per_task = 0;
-       unsigned long min_nr_running = ULONG_MAX;
-       struct sched_group *group_min = NULL, *group_leader = NULL;
+       int power_savings_balance; /* Is powersave balance needed for this sd */
+       struct sched_group *group_min; /* Least loaded group in sd */
+       struct sched_group *group_leader; /* Group which relieves group_min */
+       unsigned long min_load_per_task; /* load_per_task in group_min */
+       unsigned long leader_nr_running; /* Nr running of group_leader */
+       unsigned long min_nr_running; /* Nr running of group_min */
   #endif
+ };
   
-       max_load = this_load = total_load = total_pwr = 0;
-       busiest_load_per_task = busiest_nr_running = 0;
-       this_load_per_task = this_nr_running = 0;
+ /*
+  * sg_lb_stats - stats of a sched_group required for load_balancing
+  */
+ struct sg_lb_stats {
+       unsigned long avg_load; /*Avg load across the CPUs of the group */
+       unsigned long group_load; /* Total load over the CPUs of the group */
+       unsigned long sum_nr_running; /* Nr tasks running in the group */
+       unsigned long sum_weighted_load; /* Weighted load of group's tasks */
+       unsigned long group_capacity;
+       int group_imb; /* Is there an imbalance in the group ? */
+ };
   
-       if (idle == CPU_NOT_IDLE)
+ /**
+  * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
+  * @group: The group whose first cpu is to be returned.
+  */
+ static inline unsigned int group_first_cpu(struct sched_group *group)
+ {
+       return cpumask_first(sched_group_cpus(group));
+ }
+ 
+ /**
+  * get_sd_load_idx - Obtain the load index for a given sched domain.
+  * @sd: The sched_domain whose load_idx is to be obtained.
+  * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+  */
+ static inline int get_sd_load_idx(struct sched_domain *sd,
+                                       enum cpu_idle_type idle)
+ {
+       int load_idx;
+ 
+       switch (idle) {
+       case CPU_NOT_IDLE:
                 load_idx = sd->busy_idx;
-       else if (idle == CPU_NEWLY_IDLE)
+               break;
+ 
+       case CPU_NEWLY_IDLE:
                 load_idx = sd->newidle_idx;
-       else
+               break;
+       default:
                 load_idx = sd->idle_idx;
+               break;
+       }
   
-       do {
-               unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
-               int local_group;
-               int i;
-               int __group_imb = 0;
-               unsigned int balance_cpu = -1, first_idle_cpu = 0;
-               unsigned long sum_nr_running, sum_weighted_load;
-               unsigned long sum_avg_load_per_task;
-               unsigned long avg_load_per_task;
+       return load_idx;
+ }
   
-               local_group = cpumask_test_cpu(this_cpu,
-                                              sched_group_cpus(group));
   
-               if (local_group)
-                       balance_cpu = cpumask_first(sched_group_cpus(group));
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ /**
+  * init_sd_power_savings_stats - Initialize power savings statistics for
+  * the given sched_domain, during load balancing.
+  *
+  * @sd: Sched domain whose power-savings statistics are to be initialized.
+  * @sds: Variable containing the statistics for sd.
+  * @idle: Idle status of the CPU at which we're performing load-balancing.
+  */
+ static inline void init_sd_power_savings_stats(struct sched_domain *sd,
+       struct sd_lb_stats *sds, enum cpu_idle_type idle)
+ {
+       /*
+        * Busy processors will not participate in power savings
+        * balance.
+        */
+       if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
+               sds->power_savings_balance = 0;
+       else {
+               sds->power_savings_balance = 1;
+               sds->min_nr_running = ULONG_MAX;
+               sds->leader_nr_running = 0;
+       }
+ }
   
-               /* Tally up the load of all CPUs in the group */
-               sum_weighted_load = sum_nr_running = avg_load = 0;
-               sum_avg_load_per_task = avg_load_per_task = 0;
+ /**
+  * update_sd_power_savings_stats - Update the power saving stats for a
+  * sched_domain while performing load balancing.
+  *
+  * @group: sched_group belonging to the sched_domain under consideration.
+  * @sds: Variable containing the statistics of the sched_domain
+  * @local_group: Does group contain the CPU for which we're performing
+  *            load balancing ?
+  * @sgs: Variable containing the statistics of the group.
+  */
+ static inline void update_sd_power_savings_stats(struct sched_group *group,
+       struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
+ {
   
-               max_cpu_load = 0;
-               min_cpu_load = ~0UL;
+       if (!sds->power_savings_balance)
+               return;
   
-               for_each_cpu_and(i, sched_group_cpus(group), cpus) {
-                       struct rq *rq = cpu_rq(i);
+       /*
+        * If the local group is idle or completely loaded
+        * no need to do power savings balance at this domain
+        */
+       if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
+                               !sds->this_nr_running))
+               sds->power_savings_balance = 0;
   
-                       if (*sd_idle && rq->nr_running)
-                               *sd_idle = 0;
+       /*
+        * If a group is already running at full capacity or idle,
+        * don't include that group in power savings calculations
+        */
+       if (!sds->power_savings_balance ||
+               sgs->sum_nr_running >= sgs->group_capacity ||
+               !sgs->sum_nr_running)
+               return;
   
-                       /* Bias balancing toward cpus of our domain */
-                       if (local_group) {
-                               if (idle_cpu(i) && !first_idle_cpu) {
-                                       first_idle_cpu = 1;
-                                       balance_cpu = i;
-                               }
+       /*
+        * Calculate the group which has the least non-idle load.
+        * This is the group from where we need to pick up the load
+        * for saving power
+        */
+       if ((sgs->sum_nr_running < sds->min_nr_running) ||
+           (sgs->sum_nr_running == sds->min_nr_running &&
+            group_first_cpu(group) > group_first_cpu(sds->group_min))) {
+               sds->group_min = group;
+               sds->min_nr_running = sgs->sum_nr_running;
+               sds->min_load_per_task = sgs->sum_weighted_load /
+                                               sgs->sum_nr_running;
+       }
   
-                               load = target_load(i, load_idx);
-                       } else {
-                               load = source_load(i, load_idx);
-                               if (load > max_cpu_load)
-                                       max_cpu_load = load;
-                               if (min_cpu_load > load)
-                                       min_cpu_load = load;
-                       }
+       /*
+        * Calculate the group which is almost near its
+        * capacity but still has some space to pick up some load
+        * from other group and save more power
+        */
+       if (sgs->sum_nr_running > sgs->group_capacity - 1)
+               return;
   
-                       avg_load += load;
-                       sum_nr_running += rq->nr_running;
-                       sum_weighted_load += weighted_cpuload(i);
+       if (sgs->sum_nr_running > sds->leader_nr_running ||
+           (sgs->sum_nr_running == sds->leader_nr_running &&
+            group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
+               sds->group_leader = group;
+               sds->leader_nr_running = sgs->sum_nr_running;
+       }
+ }
   
-                       sum_avg_load_per_task += cpu_avg_load_per_task(i);
-               }
+ /**
+  * check_power_save_busiest_group - see if there is potential for some power-savings balance
+  * @sds: Variable containing the statistics of the sched_domain
+  *    under consideration.
+  * @this_cpu: Cpu at which we're currently performing load-balancing.
+  * @imbalance: Variable to store the imbalance.
+  *
+  * Description:
+  * Check if we have potential to perform some power-savings balance.
+  * If yes, set the busiest group to be the least loaded group in the
+  * sched_domain, so that it's CPUs can be put to idle.
+  *
+  * Returns 1 if there is potential to perform power-savings balance.
+  * Else returns 0.
+  */
+ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
+                                       int this_cpu, unsigned long *imbalance)
+ {
+       if (!sds->power_savings_balance)
+               return 0;
   
-               /*
-                * First idle cpu or the first cpu(busiest) in this sched group
-                * is eligible for doing load balancing at this and above
-                * domains. In the newly idle case, we will allow all the cpu's
-                * to do the newly idle load balance.
-                */
-               if (idle != CPU_NEWLY_IDLE && local_group &&
-                   balance_cpu != this_cpu && balance) {
-                       *balance = 0;
-                       goto ret;
-               }
+       if (sds->this != sds->group_leader ||
+                       sds->group_leader == sds->group_min)
+               return 0;
   
-               total_load += avg_load;
-               total_pwr += group->__cpu_power;
+       *imbalance = sds->min_load_per_task;
+       sds->busiest = sds->group_min;
   
-               /* Adjust by relative CPU power of the group */
-               avg_load = sg_div_cpu_power(group,
-                               avg_load * SCHED_LOAD_SCALE);
+       if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+               cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+                       group_first_cpu(sds->group_leader);
+       }
   
+       return 1;
   
-               /*
-                * Consider the group unbalanced when the imbalance is larger
-                * than the average weight of two tasks.
-                *
-                * APZ: with cgroup the avg task weight can vary wildly and
-                *      might not be a suitable number - should we keep a
-                *      normalized nr_running number somewhere that negates
-                *      the hierarchy?
-                */
-               avg_load_per_task = sg_div_cpu_power(group,
-                               sum_avg_load_per_task * SCHED_LOAD_SCALE);
+ }
+ #else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ static inline void init_sd_power_savings_stats(struct sched_domain *sd,
+       struct sd_lb_stats *sds, enum cpu_idle_type idle)
+ {
+       return;
+ }
   
-               if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
-                       __group_imb = 1;
+ static inline void update_sd_power_savings_stats(struct sched_group *group,
+       struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
+ {
+       return;
+ }
+ 
+ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
+                                       int this_cpu, unsigned long *imbalance)
+ {
+       return 0;
+ }
+ #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
   
-               group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
   
+ /**
+  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
+  * @group: sched_group whose statistics are to be updated.
+  * @this_cpu: Cpu for which load balance is currently performed.
+  * @idle: Idle status of this_cpu
+  * @load_idx: Load index of sched_domain of this_cpu for load calc.
+  * @sd_idle: Idle status of the sched_domain containing group.
+  * @local_group: Does group contain this_cpu.
+  * @cpus: Set of cpus considered for load balancing.
+  * @balance: Should we balance.
+  * @sgs: variable to hold the statistics for this group.
+  */
+ static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
+                       enum cpu_idle_type idle, int load_idx, int *sd_idle,
+                       int local_group, const struct cpumask *cpus,
+                       int *balance, struct sg_lb_stats *sgs)
+ {
+       unsigned long load, max_cpu_load, min_cpu_load;
+       int i;
+       unsigned int balance_cpu = -1, first_idle_cpu = 0;
+       unsigned long sum_avg_load_per_task;
+       unsigned long avg_load_per_task;
+ 
+       if (local_group)
+               balance_cpu = group_first_cpu(group);
+ 
+       /* Tally up the load of all CPUs in the group */
+       sum_avg_load_per_task = avg_load_per_task = 0;
+       max_cpu_load = 0;
+       min_cpu_load = ~0UL;
+ 
+       for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+               struct rq *rq = cpu_rq(i);
+ 
+               if (*sd_idle && rq->nr_running)
+                       *sd_idle = 0;
+ 
+               /* Bias balancing toward cpus of our domain */
                 if (local_group) {
-                       this_load = avg_load;
-                       this = group;
-                       this_nr_running = sum_nr_running;
-                       this_load_per_task = sum_weighted_load;
-               } else if (avg_load > max_load &&
-                          (sum_nr_running > group_capacity || __group_imb)) {
-                       max_load = avg_load;
-                       busiest = group;
-                       busiest_nr_running = sum_nr_running;
-                       busiest_load_per_task = sum_weighted_load;
-                       group_imb = __group_imb;
+                       if (idle_cpu(i) && !first_idle_cpu) {
+                               first_idle_cpu = 1;
+                               balance_cpu = i;
+                       }
+ 
+                       load = target_load(i, load_idx);
+               } else {
+                       load = source_load(i, load_idx);
+                       if (load > max_cpu_load)
+                               max_cpu_load = load;
+                       if (min_cpu_load > load)
+                               min_cpu_load = load;
                 }
   
- #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-               /*
-                * Busy processors will not participate in power savings
-                * balance.
-                */
-               if (idle == CPU_NOT_IDLE ||
-                               !(sd->flags & SD_POWERSAVINGS_BALANCE))
-                       goto group_next;
+               sgs->group_load += load;
+               sgs->sum_nr_running += rq->nr_running;
+               sgs->sum_weighted_load += weighted_cpuload(i);
   
-               /*
-                * If the local group is idle or completely loaded
-                * no need to do power savings balance at this domain
-                */
-               if (local_group && (this_nr_running >= group_capacity ||
-                                   !this_nr_running))
-                       power_savings_balance = 0;
+               sum_avg_load_per_task += cpu_avg_load_per_task(i);
+       }
   
-               /*
-                * If a group is already running at full capacity or idle,
-                * don't include that group in power savings calculations
-                */
-               if (!power_savings_balance || sum_nr_running >= group_capacity
-                   || !sum_nr_running)
-                       goto group_next;
+       /*
+        * First idle cpu or the first cpu(busiest) in this sched group
+        * is eligible for doing load balancing at this and above
+        * domains. In the newly idle case, we will allow all the cpu's
+        * to do the newly idle load balance.
+        */
+       if (idle != CPU_NEWLY_IDLE && local_group &&
+           balance_cpu != this_cpu && balance) {
+               *balance = 0;
+               return;
+       }
   
-               /*
-                * Calculate the group which has the least non-idle load.
-                * This is the group from where we need to pick up the load
-                * for saving power
-                */
-               if ((sum_nr_running < min_nr_running) ||
-                   (sum_nr_running == min_nr_running &&
-                    cpumask_first(sched_group_cpus(group)) >
-                    cpumask_first(sched_group_cpus(group_min)))) {
-                       group_min = group;
-                       min_nr_running = sum_nr_running;
-                       min_load_per_task = sum_weighted_load /
-                                               sum_nr_running;
-               }
+       /* Adjust by relative CPU power of the group */
+       sgs->avg_load = sg_div_cpu_power(group,
+                       sgs->group_load * SCHED_LOAD_SCALE);
   
-               /*
-                * Calculate the group which is almost near its
-                * capacity but still has some space to pick up some load
-                * from other group and save more power
-                */
-               if (sum_nr_running <= group_capacity - 1) {
-                       if (sum_nr_running > leader_nr_running ||
-                           (sum_nr_running == leader_nr_running &&
-                            cpumask_first(sched_group_cpus(group)) <
-                            cpumask_first(sched_group_cpus(group_leader)))) {
-                               group_leader = group;
-                               leader_nr_running = sum_nr_running;
-                       }
+ 
+       /*
+        * Consider the group unbalanced when the imbalance is larger
+        * than the average weight of two tasks.
+        *
+        * APZ: with cgroup the avg task weight can vary wildly and
+        *      might not be a suitable number - should we keep a
+        *      normalized nr_running number somewhere that negates
+        *      the hierarchy?
+        */
+       avg_load_per_task = sg_div_cpu_power(group,
+                       sum_avg_load_per_task * SCHED_LOAD_SCALE);
+ 
+       if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
+               sgs->group_imb = 1;
+ 
+       sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
+ 
+ }
+ 
+ /**
+  * update_sd_lb_stats - Update sched_group's statistics for load balancing.
+  * @sd: sched_domain whose statistics are to be updated.
+  * @this_cpu: Cpu for which load balance is currently performed.
+  * @idle: Idle status of this_cpu
+  * @sd_idle: Idle status of the sched_domain containing group.
+  * @cpus: Set of cpus considered for load balancing.
+  * @balance: Should we balance.
+  * @sds: variable to hold the statistics for this sched_domain.
+  */
+ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
+                       enum cpu_idle_type idle, int *sd_idle,
+                       const struct cpumask *cpus, int *balance,
+                       struct sd_lb_stats *sds)
+ {
+       struct sched_group *group = sd->groups;
+       struct sg_lb_stats sgs;
+       int load_idx;
+ 
+       init_sd_power_savings_stats(sd, sds, idle);
+       load_idx = get_sd_load_idx(sd, idle);
+ 
+       do {
+               int local_group;
+ 
+               local_group = cpumask_test_cpu(this_cpu,
+                                              sched_group_cpus(group));
+               memset(&sgs, 0, sizeof(sgs));
+               update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
+                               local_group, cpus, balance, &sgs);
+ 
+               if (local_group && balance && !(*balance))
+                       return;
+ 
+               sds->total_load += sgs.group_load;
+               sds->total_pwr += group->__cpu_power;
+ 
+               if (local_group) {
+                       sds->this_load = sgs.avg_load;
+                       sds->this = group;
+                       sds->this_nr_running = sgs.sum_nr_running;
+                       sds->this_load_per_task = sgs.sum_weighted_load;
+               } else if (sgs.avg_load > sds->max_load &&
+                          (sgs.sum_nr_running > sgs.group_capacity ||
+                               sgs.group_imb)) {
+                       sds->max_load = sgs.avg_load;
+                       sds->busiest = group;
+                       sds->busiest_nr_running = sgs.sum_nr_running;
+                       sds->busiest_load_per_task = sgs.sum_weighted_load;
+                       sds->group_imb = sgs.group_imb;
                 }
- group_next:
- #endif
+ 
+               update_sd_power_savings_stats(group, sds, local_group, &sgs);
                 group = group->next;
         } while (group != sd->groups);
   
-       if (!busiest || this_load >= max_load || busiest_nr_running == 0)
-               goto out_balanced;
- 
-       avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+ }
   
-       if (this_load >= avg_load ||
-                       100*max_load <= sd->imbalance_pct*this_load)
-               goto out_balanced;
+ /**
+  * fix_small_imbalance - Calculate the minor imbalance that exists
+  *                    amongst the groups of a sched_domain, during
+  *                    load balancing.
+  * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
+  * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+  * @imbalance: Variable to store the imbalance.
+  */
+ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
+                               int this_cpu, unsigned long *imbalance)
+ {
+       unsigned long tmp, pwr_now = 0, pwr_move = 0;
+       unsigned int imbn = 2;
+ 
+       if (sds->this_nr_running) {
+               sds->this_load_per_task /= sds->this_nr_running;
+               if (sds->busiest_load_per_task >
+                               sds->this_load_per_task)
+                       imbn = 1;
+       } else
+               sds->this_load_per_task =
+                       cpu_avg_load_per_task(this_cpu);
   
-       busiest_load_per_task /= busiest_nr_running;
-       if (group_imb)
-               busiest_load_per_task = min(busiest_load_per_task, avg_load);
+       if (sds->max_load - sds->this_load + sds->busiest_load_per_task >=
+                       sds->busiest_load_per_task * imbn) {
+               *imbalance = sds->busiest_load_per_task;
+               return;
+       }
   
         /*
-        * We're trying to get all the cpus to the average_load, so we don't
-        * want to push ourselves above the average load, nor do we wish to
-        * reduce the max loaded cpu below the average load, as either of these
-        * actions would just result in more rebalancing later, and ping-pong
-        * tasks around. Thus we look for the minimum possible imbalance.
-        * Negative imbalances (*we* are more loaded than anyone else) will
-        * be counted as no imbalance for these purposes -- we can't fix that
-        * by pulling tasks to us. Be careful of negative numbers as they'll
-        * appear as very large values with unsigned longs.
+        * OK, we don't have enough imbalance to justify moving tasks,
+        * however we may be able to increase total CPU power used by
+        * moving them.
          */
-       if (max_load <= busiest_load_per_task)
-               goto out_balanced;
   
+       pwr_now += sds->busiest->__cpu_power *
+                       min(sds->busiest_load_per_task, sds->max_load);
+       pwr_now += sds->this->__cpu_power *
+                       min(sds->this_load_per_task, sds->this_load);
+       pwr_now /= SCHED_LOAD_SCALE;
+ 
+       /* Amount of load we'd subtract */
+       tmp = sg_div_cpu_power(sds->busiest,
+                       sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+       if (sds->max_load > tmp)
+               pwr_move += sds->busiest->__cpu_power *
+                       min(sds->busiest_load_per_task, sds->max_load - tmp);
+ 
+       /* Amount of load we'd add */
+       if (sds->max_load * sds->busiest->__cpu_power <
+               sds->busiest_load_per_task * SCHED_LOAD_SCALE)
+               tmp = sg_div_cpu_power(sds->this,
+                       sds->max_load * sds->busiest->__cpu_power);
+       else
+               tmp = sg_div_cpu_power(sds->this,
+                       sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+       pwr_move += sds->this->__cpu_power *
+                       min(sds->this_load_per_task, sds->this_load + tmp);
+       pwr_move /= SCHED_LOAD_SCALE;
+ 
+       /* Move if we gain throughput */
+       if (pwr_move > pwr_now)
+               *imbalance = sds->busiest_load_per_task;
+ }
+ 
+ /**
+  * calculate_imbalance - Calculate the amount of imbalance present within the
+  *                     groups of a given sched_domain during load balance.
+  * @sds: statistics of the sched_domain whose imbalance is to be calculated.
+  * @this_cpu: Cpu for which currently load balance is being performed.
+  * @imbalance: The variable to store the imbalance.
+  */
+ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
+               unsigned long *imbalance)
+ {
+       unsigned long max_pull;
         /*
          * In the presence of smp nice balancing, certain scenarios can have
          * max load less than avg load(as we skip the groups at or below
          * its cpu_power, while calculating max_load..)
          */
-       if (max_load < avg_load) {
+       if (sds->max_load < sds->avg_load) {
                 *imbalance = 0;
-               goto small_imbalance;
+               return fix_small_imbalance(sds, this_cpu, imbalance);
         }
   
         /* Don't want to pull so many tasks that a group would go idle */
-       max_pull = min(max_load - avg_load, max_load - busiest_load_per_task);
+       max_pull = min(sds->max_load - sds->avg_load,
+                       sds->max_load - sds->busiest_load_per_task);
   
         /* How much load to actually move to equalise the imbalance */
-       *imbalance = min(max_pull * busiest->__cpu_power,
-                               (avg_load - this_load) * this->__cpu_power)
+       *imbalance = min(max_pull * sds->busiest->__cpu_power,
+               (sds->avg_load - sds->this_load) * sds->this->__cpu_power)
                         / SCHED_LOAD_SCALE;
   
         /*
@@@ -3333,78 -3671,110 +3671,110 @@@
          * a think about bumping its value to force at least one task to be
          * moved
          */
-       if (*imbalance < busiest_load_per_task) {
-               unsigned long tmp, pwr_now, pwr_move;
-               unsigned int imbn;
- 
- small_imbalance:
-               pwr_move = pwr_now = 0;
-               imbn = 2;
-               if (this_nr_running) {
-                       this_load_per_task /= this_nr_running;
-                       if (busiest_load_per_task > this_load_per_task)
-                               imbn = 1;
-               } else
-                       this_load_per_task = cpu_avg_load_per_task(this_cpu);
+       if (*imbalance < sds->busiest_load_per_task)
+               return fix_small_imbalance(sds, this_cpu, imbalance);
   
-               if (max_load - this_load + busiest_load_per_task >=
-                                       busiest_load_per_task * imbn) {
-                       *imbalance = busiest_load_per_task;
-                       return busiest;
-               }
+ }
+ /******* find_busiest_group() helpers end here *********************/
   
-               /*
-                * OK, we don't have enough imbalance to justify moving tasks,
-                * however we may be able to increase total CPU power used by
-                * moving them.
-                */
+ /**
+  * find_busiest_group - Returns the busiest group within the sched_domain
+  * if there is an imbalance. If there isn't an imbalance, and
+  * the user has opted for power-savings, it returns a group whose
+  * CPUs can be put to idle by rebalancing those tasks elsewhere, if
+  * such a group exists.
+  *
+  * Also calculates the amount of weighted load which should be moved
+  * to restore balance.
+  *
+  * @sd: The sched_domain whose busiest group is to be returned.
+  * @this_cpu: The cpu for which load balancing is currently being performed.
+  * @imbalance: Variable which stores amount of weighted load which should
+  *            be moved to restore balance/put a group to idle.
+  * @idle: The idle status of this_cpu.
+  * @sd_idle: The idleness of sd
+  * @cpus: The set of CPUs under consideration for load-balancing.
+  * @balance: Pointer to a variable indicating if this_cpu
+  *    is the appropriate cpu to perform load balancing at this_level.
+  *
+  * Returns:   - the busiest group if imbalance exists.
+  *            - If no imbalance and user has opted for power-savings balance,
+  *               return the least loaded group whose CPUs can be
+  *               put to idle by rebalancing its tasks onto our group.
+  */
+ static struct sched_group *
+ find_busiest_group(struct sched_domain *sd, int this_cpu,
+                  unsigned long *imbalance, enum cpu_idle_type idle,
+                  int *sd_idle, const struct cpumask *cpus, int *balance)
+ {
+       struct sd_lb_stats sds;
   
-               pwr_now += busiest->__cpu_power *
-                               min(busiest_load_per_task, max_load);
-               pwr_now += this->__cpu_power *
-                               min(this_load_per_task, this_load);
-               pwr_now /= SCHED_LOAD_SCALE;
- 
-               /* Amount of load we'd subtract */
-               tmp = sg_div_cpu_power(busiest,
-                               busiest_load_per_task * SCHED_LOAD_SCALE);
-               if (max_load > tmp)
-                       pwr_move += busiest->__cpu_power *
-                               min(busiest_load_per_task, max_load - tmp);
- 
-               /* Amount of load we'd add */
-               if (max_load * busiest->__cpu_power <
-                               busiest_load_per_task * SCHED_LOAD_SCALE)
-                       tmp = sg_div_cpu_power(this,
-                                       max_load * busiest->__cpu_power);
-               else
-                       tmp = sg_div_cpu_power(this,
-                               busiest_load_per_task * SCHED_LOAD_SCALE);
-               pwr_move += this->__cpu_power *
-                               min(this_load_per_task, this_load + tmp);
-               pwr_move /= SCHED_LOAD_SCALE;
+       memset(&sds, 0, sizeof(sds));
   
-               /* Move if we gain throughput */
-               if (pwr_move > pwr_now)
-                       *imbalance = busiest_load_per_task;
-       }
+       /*
+        * Compute the various statistics relavent for load balancing at
+        * this level.
+        */
+       update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
+                                       balance, &sds);
+ 
+       /* Cases where imbalance does not exist from POV of this_cpu */
+       /* 1) this_cpu is not the appropriate cpu to perform load balancing
+        *    at this level.
+        * 2) There is no busy sibling group to pull from.
+        * 3) This group is the busiest group.
+        * 4) This group is more busy than the avg busieness at this
+        *    sched_domain.
+        * 5) The imbalance is within the specified limit.
+        * 6) Any rebalance would lead to ping-pong
+        */
+       if (balance && !(*balance))
+               goto ret;
   
-       return busiest;
+       if (!sds.busiest || sds.busiest_nr_running == 0)
+               goto out_balanced;
   
- out_balanced:
- #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-       if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
-               goto ret;
+       if (sds.this_load >= sds.max_load)
+               goto out_balanced;
   
-       if (this == group_leader && group_leader != group_min) {
-               *imbalance = min_load_per_task;
-               if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
-                       cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-                               cpumask_first(sched_group_cpus(group_leader));
-               }
-               return group_min;
-       }
- #endif
+       sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
+ 
+       if (sds.this_load >= sds.avg_load)
+               goto out_balanced;
+ 
+       if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+               goto out_balanced;
+ 
+       sds.busiest_load_per_task /= sds.busiest_nr_running;
+       if (sds.group_imb)
+               sds.busiest_load_per_task =
+                       min(sds.busiest_load_per_task, sds.avg_load);
+ 
+       /*
+        * We're trying to get all the cpus to the average_load, so we don't
+        * want to push ourselves above the average load, nor do we wish to
+        * reduce the max loaded cpu below the average load, as either of these
+        * actions would just result in more rebalancing later, and ping-pong
+        * tasks around. Thus we look for the minimum possible imbalance.
+        * Negative imbalances (*we* are more loaded than anyone else) will
+        * be counted as no imbalance for these purposes -- we can't fix that
+        * by pulling tasks to us. Be careful of negative numbers as they'll
+        * appear as very large values with unsigned longs.
+        */
+       if (sds.max_load <= sds.busiest_load_per_task)
+               goto out_balanced;
+ 
+       /* Looks like there is an imbalance. Compute it */
+       calculate_imbalance(&sds, this_cpu, imbalance);
+       return sds.busiest;
+ 
+ out_balanced:
+       /*
+        * There is no obvious imbalance. But check if we can do some balancing
+        * to save power.
+        */
+       if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
+               return sds.busiest;
   ret:
         *imbalance = 0;
         return NULL;
@@@ -3448,23 -3818,19 +3818,23 @@@ find_busiest_queue(struct sched_group *
    */
   #define MAX_PINNED_INTERVAL   512
   
+ +/* Working cpumask for load_balance and load_balance_newidle. */
+ +static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+ +
   /*
    * Check this_cpu to ensure it is balanced within domain. Attempt to move
    * tasks if there is an imbalance.
    */
   static int load_balance(int this_cpu, struct rq *this_rq,
                         struct sched_domain *sd, enum cpu_idle_type idle,
- -                      int *balance, struct cpumask *cpus)
+ +                      int *balance)
   {
         int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
         struct sched_group *group;
         unsigned long imbalance;
         struct rq *busiest;
         unsigned long flags;
+ +      struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
   
         cpumask_setall(cpus);
   
@@@ -3619,7 -3985,8 +3989,7 @@@ out
    * this_rq is locked.
    */
   static int
- -load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
- -                      struct cpumask *cpus)
+ +load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
   {
         struct sched_group *group;
         struct rq *busiest = NULL;
@@@ -3627,7 -3994,6 +3997,7 @@@
         int ld_moved = 0;
         int sd_idle = 0;
         int all_pinned = 0;
+ +      struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
   
         cpumask_setall(cpus);
   
@@@ -3768,6 -4134,10 +4138,6 @@@ static void idle_balance(int this_cpu, 
         struct sched_domain *sd;
         int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
- -      cpumask_var_t tmpmask;
- -
- -      if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
- -              return;
   
         for_each_domain(this_cpu, sd) {
                 unsigned long interval;
@@@ -3778,7 -4148,7 +4148,7 @@@
                 if (sd->flags & SD_BALANCE_NEWIDLE)
                         /* If we've pulled tasks over stop searching: */
                         pulled_task = load_balance_newidle(this_cpu, this_rq,
- -                                                         sd, tmpmask);
+ +                                                         sd);
   
                 interval = msecs_to_jiffies(sd->balance_interval);
                 if (time_after(next_balance, sd->last_balance + interval))
@@@ -3793,6 -4163,7 +4163,6 @@@
                  */
                 this_rq->next_balance = next_balance;
         }
- -      free_cpumask_var(tmpmask);
   }
   
   /*
@@@ -3942,6 -4313,11 +4312,6 @@@ static void rebalance_domains(int cpu, 
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
         int need_serialize;
- -      cpumask_var_t tmp;
- -
- -      /* Fails alloc?  Rebalancing probably not a priority right now. */
- -      if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
- -              return;
   
         for_each_domain(cpu, sd) {
                 if (!(sd->flags & SD_LOAD_BALANCE))
@@@ -3966,7 -4342,7 +4336,7 @@@
                 }
   
                 if (time_after_eq(jiffies, sd->last_balance + interval)) {
- -                      if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+ +                      if (load_balance(cpu, rq, sd, idle, &balance)) {
                                 /*
                                  * We've pulled tasks over so either we're no
                                  * longer idle, or one of our SMT siblings is
@@@ -4000,6 -4376,8 +4370,6 @@@ out
          */
         if (likely(update_next_balance))
                 rq->next_balance = next_balance;
- -
- -      free_cpumask_var(tmp);
   }
   
   /*
@@@ -4049,6 -4427,11 +4419,11 @@@ static void run_rebalance_domains(struc
   #endif
   }
   
+ static inline int on_null_domain(int cpu)
+ {
+       return !rcu_dereference(cpu_rq(cpu)->sd);
+ }
+ 
   /*
    * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
    *
@@@ -4106,7 -4489,9 +4481,9 @@@ static inline void trigger_load_balance
             cpumask_test_cpu(cpu, nohz.cpu_mask))
                 return;
   #endif
-       if (time_after_eq(jiffies, rq->next_balance))
+       /* Don't need to rebalance while attached to NULL domain */
+       if (time_after_eq(jiffies, rq->next_balance) &&
+           likely(!on_null_domain(cpu)))
                 raise_softirq(SCHED_SOFTIRQ);
   }
   
@@@ -4500,11 -4885,33 +4877,33 @@@ static inline void schedule_debug(struc
   #endif
   }
   
+ static void put_prev_task(struct rq *rq, struct task_struct *prev)
+ {
+       if (prev->state == TASK_RUNNING) {
+               u64 runtime = prev->se.sum_exec_runtime;
+ 
+               runtime -= prev->se.prev_sum_exec_runtime;
+               runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+ 
+               /*
+                * In order to avoid avg_overlap growing stale when we are
+                * indeed overlapping and hence not getting put to sleep, grow
+                * the avg_overlap on preemption.
+                *
+                * We use the average preemption runtime because that
+                * correlates to the amount of cache footprint a task can
+                * build up.
+                */
+               update_avg(&prev->se.avg_overlap, runtime);
+       }
+       prev->sched_class->put_prev_task(rq, prev);
+ }
+ 
   /*
    * Pick up the highest-prio task:
    */
   static inline struct task_struct *
- pick_next_task(struct rq *rq, struct task_struct *prev)
+ pick_next_task(struct rq *rq)
   {
         const struct sched_class *class;
         struct task_struct *p;
@@@ -4578,8 -4985,8 +4977,8 @@@ need_resched_nonpreemptible
         if (unlikely(!rq->nr_running))
                 idle_balance(cpu, rq);
   
-       prev->sched_class->put_prev_task(rq, prev);
-       next = pick_next_task(rq, prev);
+       put_prev_task(rq, prev);
+       next = pick_next_task(rq);
   
         if (likely(prev != next)) {
                 sched_info_switch(prev, next);
@@@ -4634,7 -5041,7 +5033,7 @@@ asmlinkage void __sched preempt_schedul
                  * between schedule and now.
                  */
                 barrier();
-       } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
+       } while (need_resched());
   }
   EXPORT_SYMBOL(preempt_schedule);
   
@@@ -4663,7 -5070,7 +5062,7 @@@ asmlinkage void __sched preempt_schedul
                  * between schedule and now.
                  */
                 barrier();
-       } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
+       } while (need_resched());
   }
   
   #endif /* CONFIG_PREEMPT */
@@@ -5137,7 -5544,7 +5536,7 @@@ SYSCALL_DEFINE1(nice, int, increment
         if (increment > 40)
                 increment = 40;
   
-       nice = PRIO_TO_NICE(current->static_prio) + increment;
+       nice = TASK_NICE(current) + increment;
         if (nice < -20)
                 nice = -20;
         if (nice > 19)
@@@ -6410,7 -6817,7 +6809,7 @@@ static void migrate_dead_tasks(unsigne
                 if (!rq->nr_running)
                         break;
                 update_rq_clock(rq);
-               next = pick_next_task(rq, rq->curr);
+               next = pick_next_task(rq);
                 if (!next)
                         break;
                 next->sched_class->put_prev_task(rq, next);
@@@ -7241,7 -7648,7 +7640,7 @@@ cpu_to_core_group(int cpu, const struc
   {
         int group;
   
- -      cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+ +      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
         if (sg)
                 *sg = &per_cpu(sched_group_core, group).sg;
@@@ -7270,7 -7677,7 +7669,7 @@@ cpu_to_phys_group(int cpu, const struc
         cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
         group = cpumask_first(mask);
   #elif defined(CONFIG_SCHED_SMT)
- -      cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+ +      cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
   #else
         group = cpu;
@@@ -7613,7 -8020,7 +8012,7 @@@ static int __build_sched_domains(const 
                 SD_INIT(sd, SIBLING);
                 set_domain_attribute(sd, attr);
                 cpumask_and(sched_domain_span(sd),
- -                          &per_cpu(cpu_sibling_map, i), cpu_map);
+ +                          topology_thread_cpumask(i), cpu_map);
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@@ -7624,7 -8031,7 +8023,7 @@@
         /* Set up CPU (sibling) groups */
         for_each_cpu(i, cpu_map) {
                 cpumask_and(this_sibling_map,
- -                          &per_cpu(cpu_sibling_map, i), cpu_map);
+ +                          topology_thread_cpumask(i), cpu_map);
                 if (i != cpumask_first(this_sibling_map))
                         continue;
   
@@@ -8205,11 -8612,15 +8604,15 @@@ static void init_rt_rq(struct rt_rq *rt
         __set_bit(MAX_RT_PRIO, array->bitmap);
   
   #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-       rt_rq->highest_prio = MAX_RT_PRIO;
+       rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ #ifdef CONFIG_SMP
+       rt_rq->highest_prio.next = MAX_RT_PRIO;
+ #endif
   #endif
   #ifdef CONFIG_SMP
         rt_rq->rt_nr_migratory = 0;
         rt_rq->overloaded = 0;
+       plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
   #endif
   
         rt_rq->rt_time = 0;
@@@ -8295,9 -8706,6 +8698,9 @@@ void __init sched_init(void
   #endif
   #ifdef CONFIG_USER_SCHED
         alloc_size *= 2;
+ +#endif
+ +#ifdef CONFIG_CPUMASK_OFFSTACK
+ +      alloc_size += num_possible_cpus() * cpumask_size();
   #endif
         /*
          * As sched_init() is called before page_alloc is setup,
@@@ -8336,12 -8744,6 +8739,12 @@@
                 ptr += nr_cpu_ids * sizeof(void **);
   #endif /* CONFIG_USER_SCHED */
   #endif /* CONFIG_RT_GROUP_SCHED */
+ +#ifdef CONFIG_CPUMASK_OFFSTACK
+ +              for_each_possible_cpu(i) {
+ +                      per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+ +                      ptr += cpumask_size();
+ +              }
+ +#endif /* CONFIG_CPUMASK_OFFSTACK */
         }
   
   #ifdef CONFIG_SMP
@@@ -9594,7 -9996,7 +9997,7 @@@ static void cpuacct_charge(struct task_
         struct cpuacct *ca;
         int cpu;
   
-       if (!cpuacct_subsys.active)
+       if (unlikely(!cpuacct_subsys.active))
                 return;
   
         cpu = task_cpu(tsk);
diff --combined mm/vmscan.c

index 592bb9619f75ace42bb83aea694185000d499966,56ddf41149eb77a55158ced628c02f817b990618..1cdbf0b057278e8f157da93bb6eda0dc80644387
--- 1/mm/vmscan.c
--- 2/mm/vmscan.c
+++ b/mm/vmscan.c
@@@ -1262,7 -1262,6 +1262,6 @@@ static void shrink_active_list(unsigne
          * Move the pages to the [file or anon] inactive list.
          */
         pagevec_init(&pvec, 1);
-       pgmoved = 0;
         lru = LRU_BASE + file * LRU_FILE;
   
         spin_lock_irq(&zone->lru_lock);
@@@ -1274,6 -1273,7 +1273,7 @@@
          */
         reclaim_stat->recent_rotated[!!file] += pgmoved;
   
+       pgmoved = 0;
         while (!list_empty(&l_inactive)) {
                 page = lru_to_page(&l_inactive);
                 prefetchw_prev_lru_page(page, &l_inactive, flags);
@@@ -1963,7 -1963,7 +1963,7 @@@ static int kswapd(void *p
         struct reclaim_state reclaim_state = {
                 .reclaimed_slab = 0,
         };
- -      node_to_cpumask_ptr(cpumask, pgdat->node_id);
+ +      const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
   
         if (!cpumask_empty(cpumask))
                 set_cpus_allowed_ptr(tsk, cpumask);
@@@ -2198,9 -2198,7 +2198,9 @@@ static int __devinit cpu_callback(struc
         if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
                 for_each_node_state(nid, N_HIGH_MEMORY) {
                         pg_data_t *pgdat = NODE_DATA(nid);
- -                      node_to_cpumask_ptr(mask, pgdat->node_id);
+ +                      const struct cpumask *mask;
+ +
+ +                      mask = cpumask_of_node(pgdat->node_id);
   
                         if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
                                 /* One of our CPUs online: restore mask */
author	Ingo Molnar <mingo@elte.hu>
	Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 30 Mar 2009 21:53:32 +0000 (23:53 +0200)
		1	2
arch/x86/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/cpufreq/p4-clockmod.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/cpufreq/powernow-k8.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/cpufreq/speedstep-ich.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_cacheinfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/mcheck/mce_amd_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/mcheck/mce_intel_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tlb_uv.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmscan.c	patch \|	diff1 \|	diff2 \|	blob \| history