Merge branch 'generic-ipi' into generic-ipi-for-linus

author Ingo Molnar <mingo@elte.hu>

Tue, 15 Jul 2008 19:55:59 +0000 (21:55 +0200)

committer Ingo Molnar <mingo@elte.hu>

Tue, 15 Jul 2008 19:55:59 +0000 (21:55 +0200)
author Ingo Molnar <mingo@elte.hu>
Tue, 15 Jul 2008 19:55:59 +0000 (21:55 +0200)
committer Ingo Molnar <mingo@elte.hu>
Tue, 15 Jul 2008 19:55:59 +0000 (21:55 +0200)
diff --combined arch/arm/Kconfig

index 258f1369fb0cee850749492f74fc308a94aaf7e4,c72dae633f6068a6cdc1b3fc0d6bfc6ca007be2b..c7ad324ddf2cf407c89839f75487f8e27f9fa0cc
--- 1/arch/arm/Kconfig
--- 2/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@@ -14,8 -14,6 +14,8 @@@ config AR
         select HAVE_OPROFILE
         select HAVE_KPROBES if (!XIP_KERNEL)
         select HAVE_KRETPROBES if (HAVE_KPROBES)
+ +      select HAVE_FTRACE if (!XIP_KERNEL)
+ +      select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
         help
           The ARM series is a line of low-power-consumption RISC chip designs
           licensed by ARM Ltd and targeted at embedded applications and
@@@ -24,9 -22,6 +24,9 @@@
           Europe.  There is an ARM Linux project with a web page at
           <http://www.arm.linux.org.uk/>.
   
+ +config HAVE_PWM
+ +      bool
+ +
   config SYS_SUPPORTS_APM_EMULATION
         bool
   
@@@ -89,11 -84,6 +89,11 @@@ config STACKTRACE_SUPPOR
         bool
         default y
   
+ +config HAVE_LATENCYTOP_SUPPORT
+ +      bool
+ +      depends on !SMP
+ +      default y
+ +
   config LOCKDEP_SUPPORT
         bool
         default y
@@@ -157,10 -147,6 +157,10 @@@ config FI
   config ARCH_MTD_XIP
         bool
   
+ +config GENERIC_HARDIRQS_NO__DO_IRQ
+ +      bool
+ +      def_bool y
+ +
   if OPROFILE
   
   config OPROFILE_ARMV6
@@@ -246,6 -232,13 +246,6 @@@ config ARCH_CLPS711
         help
           Support for Cirrus Logic 711x/721x based boards.
   
- -config ARCH_CO285
- -      bool "Co-EBSA285"
- -      select FOOTBRIDGE
- -      select FOOTBRIDGE_ADDIN
- -      help
- -        Support for Intel's EBSA285 companion chip.
- -
   config ARCH_EBSA110
         bool "EBSA-110"
         select ISA
@@@ -306,8 -299,6 +306,8 @@@ config ARCH_IOP32
         depends on MMU
         select PLAT_IOP
         select PCI
+ +      select GENERIC_GPIO
+ +      select HAVE_GPIO_LIB
         help
           Support for Intel's 80219 and IOP32X (XScale) family of
           processors.
@@@ -317,8 -308,6 +317,8 @@@ config ARCH_IOP33
         depends on MMU
         select PLAT_IOP
         select PCI
+ +      select GENERIC_GPIO
+ +      select HAVE_GPIO_LIB
         help
           Support for Intel's IOP33X (XScale) family of processors.
   
@@@ -358,16 -347,6 +358,16 @@@ config ARCH_L720
           If you have any questions or comments about the Linux kernel port
           to this board, send e-mail to <sjhill@cotw.com>.
   
+ +config ARCH_KIRKWOOD
+ +      bool "Marvell Kirkwood"
+ +      select PCI
+ +      select GENERIC_TIME
+ +      select GENERIC_CLOCKEVENTS
+ +      select PLAT_ORION
+ +      help
+ +        Support for the following Marvell Kirkwood series SoCs:
+ +        88F6180, 88F6192 and 88F6281.
+ +
   config ARCH_KS8695
         bool "Micrel/Kendin KS8695"
         select GENERIC_GPIO
@@@ -386,31 -365,9 +386,31 @@@ config ARCH_NS9XX
   
           <http://www.digi.com/products/microprocessors/index.jsp>
   
+ +config ARCH_LOKI
+ +      bool "Marvell Loki (88RC8480)"
+ +      select GENERIC_TIME
+ +      select GENERIC_CLOCKEVENTS
+ +      select PLAT_ORION
+ +      help
+ +        Support for the Marvell Loki (88RC8480) SoC.
+ +
+ +config ARCH_MV78XX0
+ +      bool "Marvell MV78xx0"
+ +      select PCI
+ +      select GENERIC_TIME
+ +      select GENERIC_CLOCKEVENTS
+ +      select PLAT_ORION
+ +      help
+ +        Support for the following Marvell MV78xx0 series SoCs:
+ +        MV781x0, MV782x0.
+ +
   config ARCH_MXC
         bool "Freescale MXC/iMX-based"
+ +      select GENERIC_TIME
+ +      select GENERIC_CLOCKEVENTS
         select ARCH_MTD_XIP
+ +      select GENERIC_GPIO
+ +      select HAVE_GPIO_LIB
         help
           Support for Freescale MXC/iMX-based family of processors
   
@@@ -424,8 -381,7 +424,8 @@@ config ARCH_ORION5
         select PLAT_ORION
         help
           Support for the following Marvell Orion 5x series SoCs:
- -        Orion-1 (5181), Orion-NAS (5182), Orion-2 (5281.)
+ +        Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182),
+ +        Orion-2 (5281).
   
   config ARCH_PNX4008
         bool "Philips Nexperia PNX4008 Mobile"
@@@ -450,7 -406,6 +450,7 @@@ config ARCH_RP
         select FIQ
         select TIMER_ACORN
         select ARCH_MAY_HAVE_PC_FDC
+ +      select HAVE_PATA_PLATFORM
         select ISA_DMA_API
         select NO_IOPORT
         help
@@@ -547,10 -502,6 +547,10 @@@ source "arch/arm/mach-ixp2000/Kconfig
   
   source "arch/arm/mach-ixp23xx/Kconfig"
   
+ +source "arch/arm/mach-loki/Kconfig"
+ +
+ +source "arch/arm/mach-mv78xx0/Kconfig"
+ +
   source "arch/arm/mach-pxa/Kconfig"
   
   source "arch/arm/mach-sa1100/Kconfig"
@@@ -563,8 -514,6 +563,8 @@@ source "arch/arm/mach-omap2/Kconfig
   
   source "arch/arm/mach-orion5x/Kconfig"
   
+ +source "arch/arm/mach-kirkwood/Kconfig"
+ +
   source "arch/arm/plat-s3c24xx/Kconfig"
   source "arch/arm/plat-s3c/Kconfig"
   
@@@ -701,6 -650,7 +701,7 @@@ source "kernel/time/Kconfig
   config SMP
         bool "Symmetric Multi-Processing (EXPERIMENTAL)"
         depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP)
+       select USE_GENERIC_SMP_HELPERS
         help
           This enables support for systems with more than one CPU. If you have
           a system with only one CPU, like most personal computers, say N. If
@@@ -754,6 -704,27 +755,6 @@@ config PREEMP
           Say Y here if you are building a kernel for a desktop, embedded
           or real-time system.  Say N if you are unsure.
   
- -config NO_IDLE_HZ
- -      bool "Dynamic tick timer"
- -      depends on !GENERIC_CLOCKEVENTS
- -      help
- -        Select this option if you want to disable continuous timer ticks
- -        and have them programmed to occur as required. This option saves
- -        power as the system can remain in idle state for longer.
- -
- -        By default dynamic tick is disabled during the boot, and can be
- -        manually enabled with:
- -
- -          echo 1 > /sys/devices/system/timer/timer0/dyn_tick
- -
- -        Alternatively, if you want dynamic tick automatically enabled
- -        during boot, pass "dyntick=enable" via the kernel command string.
- -
- -        Please note that dynamic tick may affect the accuracy of
- -        timekeeping on some platforms depending on the implementation.
- -        Currently at least OMAP, PXA2xx and SA11x0 platforms are known
- -        to have accurate timekeeping with dynamic tick.
- -
   config HZ
         int
         default 128 if ARCH_L7200
@@@ -819,7 -790,7 +820,7 @@@ source "mm/Kconfig
   
   config LEDS
         bool "Timer and CPU usage LEDs"
- -      depends on ARCH_CDB89712 || ARCH_CO285 || ARCH_EBSA110 || \
+ +      depends on ARCH_CDB89712 || ARCH_EBSA110 || \
                    ARCH_EBSA285 || ARCH_IMX || ARCH_INTEGRATOR || \
                    ARCH_LUBBOCK || MACH_MAINSTONE || ARCH_NETWINDER || \
                    ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \
diff --combined arch/mips/Kconfig

index 24c5dee9176872d5358eb228a6228071fe28da25,ea70d5a225ca62642cf66ab8bd1c574fce0b657d..d2be3ffca2802206b1ef4955b16bf3063a08dbea
--- 1/arch/mips/Kconfig
--- 2/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@@ -1006,7 -1006,7 +1006,7 @@@ config BOOT_ELF3
   config MIPS_L1_CACHE_SHIFT
         int
         default "4" if MACH_DECSTATION
- -      default "7" if SGI_IP27 || SGI_IP28 || SNI_RM
+ +      default "7" if SGI_IP22 || SGI_IP27 || SGI_IP28 || SNI_RM
         default "4" if PMC_MSP4200_EVAL
         default "5"
   
@@@ -1763,6 -1763,7 +1763,7 @@@ config SM
         bool "Multi-Processing support"
         depends on SYS_SUPPORTS_SMP
         select IRQ_PER_CPU
+       select USE_GENERIC_SMP_HELPERS
         help
           This enables support for systems with more than one CPU. If you have
           a system with only one CPU, like most personal computers, say N. If
diff --combined arch/powerpc/Kconfig

index a5e9912e2d3773fdab23a4bc52ec5e147f47e1cf,852d40c29637ac199e5e539b843e60f9668b932d..20eacf2a842474ab0c4d5c3c39aadd6a85ad5ae7
--- 1/arch/powerpc/Kconfig
--- 2/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -105,13 -105,12 +105,14 @@@ config ARCH_NO_VIRT_TO_BU
   config PPC
         bool
         default y
+ +      select HAVE_DYNAMIC_FTRACE
+ +      select HAVE_FTRACE
         select HAVE_IDE
- -      select HAVE_OPROFILE
         select HAVE_KPROBES
         select HAVE_KRETPROBES
         select HAVE_LMB
+       select USE_GENERIC_SMP_HELPERS if SMP
+ +      select HAVE_OPROFILE
   
   config EARLY_PRINTK
         bool
diff --combined arch/s390/appldata/appldata_base.c

index 9cb3d92447a35651c5c67d5f495a5d1fa5981b2f,837a3b3e77599938d4467573b2ea32a1fc8f9d32..a7f8979fb92584bcf45ef94001557b2fe02d9674
--- 1/arch/s390/appldata/appldata_base.c
--- 2/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@@ -5,7 -5,7 +5,7 @@@
    * Exports appldata_register_ops() and appldata_unregister_ops() for the
    * data gathering modules.
    *
- - * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ + * Copyright IBM Corp. 2003, 2008
    *
    * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
    */
@@@ -108,6 -108,9 +108,6 @@@ static LIST_HEAD(appldata_ops_list)
    */
   static void appldata_timer_function(unsigned long data)
   {
- -      P_DEBUG("   -= Timer =-\n");
- -      P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(),
- -              atomic_read(&appldata_expire_count));
         if (atomic_dec_and_test(&appldata_expire_count)) {
                 atomic_set(&appldata_expire_count, num_online_cpus());
                 queue_work(appldata_wq, (struct work_struct *) data);
@@@ -125,11 -128,14 +125,11 @@@ static void appldata_work_fn(struct wor
         struct appldata_ops *ops;
         int i;
   
- -      P_DEBUG("  -= Work Queue =-\n");
         i = 0;
         get_online_cpus();
         spin_lock(&appldata_ops_lock);
         list_for_each(lh, &appldata_ops_list) {
                 ops = list_entry(lh, struct appldata_ops, list);
- -              P_DEBUG("list_for_each loop: %i) active = %u, name = %s\n",
- -                      ++i, ops->active, ops->name);
                 if (ops->active == 1) {
                         ops->callback(ops->data);
                 }
@@@ -203,9 -209,10 +203,9 @@@ __appldata_vtimer_setup(int cmd
                         per_cpu(appldata_timer, i).expires = per_cpu_interval;
                         smp_call_function_single(i, add_virt_timer_periodic,
                                                  &per_cpu(appldata_timer, i),
-                                                0, 1);
+                                                1);
                 }
                 appldata_timer_active = 1;
- -              P_INFO("Monitoring timer started.\n");
                 break;
         case APPLDATA_DEL_TIMER:
                 for_each_online_cpu(i)
@@@ -214,6 -221,7 +214,6 @@@
                         break;
                 appldata_timer_active = 0;
                 atomic_set(&appldata_expire_count, num_online_cpus());
- -              P_INFO("Monitoring timer stopped.\n");
                 break;
         case APPLDATA_MOD_TIMER:
                 per_cpu_interval = (u64) (appldata_interval*1000 /
@@@ -228,7 -236,7 +228,7 @@@
                         args.timer = &per_cpu(appldata_timer, i);
                         args.expires = per_cpu_interval;
                         smp_call_function_single(i, __appldata_mod_vtimer_wrap,
-                                                &args, 0, 1);
+                                                &args, 1);
                 }
         }
   }
@@@ -305,8 -313,10 +305,8 @@@ appldata_interval_handler(ctl_table *ct
         }
         interval = 0;
         sscanf(buf, "%i", &interval);
- -      if (interval <= 0) {
- -              P_ERROR("Timer CPU interval has to be > 0!\n");
+ +      if (interval <= 0)
                 return -EINVAL;
- -      }
   
         get_online_cpus();
         spin_lock(&appldata_timer_lock);
@@@ -314,6 -324,9 +314,6 @@@
         __appldata_vtimer_setup(APPLDATA_MOD_TIMER);
         spin_unlock(&appldata_timer_lock);
         put_online_cpus();
- -
- -      P_INFO("Monitoring CPU interval set to %u milliseconds.\n",
- -               interval);
   out:
         *lenp = len;
         *ppos += len;
@@@ -393,16 -406,23 +393,16 @@@ appldata_generic_handler(ctl_table *ctl
                         P_ERROR("START DIAG 0xDC for %s failed, "
                                 "return code: %d\n", ops->name, rc);
                         module_put(ops->owner);
- -              } else {
- -                      P_INFO("Monitoring %s data enabled, "
- -                              "DIAG 0xDC started.\n", ops->name);
+ +              } else
                         ops->active = 1;
- -              }
         } else if ((buf[0] == '0') && (ops->active == 1)) {
                 ops->active = 0;
                 rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
                                 (unsigned long) ops->data, ops->size,
                                 ops->mod_lvl);
- -              if (rc != 0) {
+ +              if (rc != 0)
                         P_ERROR("STOP DIAG 0xDC for %s failed, "
                                 "return code: %d\n", ops->name, rc);
- -              } else {
- -                      P_INFO("Monitoring %s data disabled, "
- -                              "DIAG 0xDC stopped.\n", ops->name);
- -              }
                 module_put(ops->owner);
         }
         spin_unlock(&appldata_ops_lock);
@@@ -448,6 -468,7 +448,6 @@@ int appldata_register_ops(struct applda
         ops->sysctl_header = register_sysctl_table(ops->ctl_table);
         if (!ops->sysctl_header)
                 goto out;
- -      P_INFO("%s-ops registered!\n", ops->name);
         return 0;
   out:
         spin_lock(&appldata_ops_lock);
@@@ -469,6 -490,7 +469,6 @@@ void appldata_unregister_ops(struct app
         spin_unlock(&appldata_ops_lock);
         unregister_sysctl_table(ops->sysctl_header);
         kfree(ops->ctl_table);
- -      P_INFO("%s-ops unregistered!\n", ops->name);
   }
   /********************** module-ops management <END> **************************/
   
@@@ -531,9 -553,14 +531,9 @@@ static int __init appldata_init(void
   {
         int i;
   
- -      P_DEBUG("sizeof(parameter_list) = %lu\n",
- -              sizeof(struct appldata_parameter_list));
- -
         appldata_wq = create_singlethread_workqueue("appldata");
- -      if (!appldata_wq) {
- -              P_ERROR("Could not create work queue\n");
+ +      if (!appldata_wq)
                 return -ENOMEM;
- -      }
   
         get_online_cpus();
         for_each_online_cpu(i)
@@@ -544,6 -571,8 +544,6 @@@
         register_hotcpu_notifier(&appldata_nb);
   
         appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
- -
- -      P_DEBUG("Base interface initialized.\n");
         return 0;
   }
   
@@@ -555,9 -584,7 +555,9 @@@ EXPORT_SYMBOL_GPL(appldata_register_ops
   EXPORT_SYMBOL_GPL(appldata_unregister_ops);
   EXPORT_SYMBOL_GPL(appldata_diag);
   
+ +#ifdef CONFIG_SWAP
   EXPORT_SYMBOL_GPL(si_swapinfo);
+ +#endif
   EXPORT_SYMBOL_GPL(nr_threads);
   EXPORT_SYMBOL_GPL(nr_running);
   EXPORT_SYMBOL_GPL(nr_iowait);
diff --combined arch/s390/kernel/time.c

index 7418bebb547fad641cb79a8b209e8d12d8b842c7,6037ed2b747121b6de869d3e7287e58e39a8d306..8051e9326dfcd0b8ba58b6fa44bf356637e7d2d8
--- 1/arch/s390/kernel/time.c
--- 2/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@@ -3,7 -3,7 +3,7 @@@
    *    Time of day based timer functions.
    *
    *  S390 version
- - *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ + *    Copyright IBM Corp. 1999, 2008
    *    Author(s): Hartmut Penner (hp@de.ibm.com),
    *               Martin Schwidefsky (schwidefsky@de.ibm.com),
    *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
@@@ -31,7 -31,6 +31,7 @@@
   #include <linux/notifier.h>
   #include <linux/clocksource.h>
   #include <linux/clockchips.h>
+ +#include <linux/bootmem.h>
   #include <asm/uaccess.h>
   #include <asm/delay.h>
   #include <asm/s390_ext.h>
@@@ -163,7 -162,7 +163,7 @@@ void init_cpu_timer(void
         /* Enable clock comparator timer interrupt. */
         __ctl_set_bit(0,11);
   
- -      /* Always allow ETR external interrupts, even without an ETR. */
+ +      /* Always allow the timing alert external interrupt. */
         __ctl_set_bit(0, 4);
   }
   
@@@ -171,21 -170,8 +171,21 @@@ static void clock_comparator_interrupt(
   {
   }
   
+ +static void etr_timing_alert(struct etr_irq_parm *);
+ +static void stp_timing_alert(struct stp_irq_parm *);
+ +
+ +static void timing_alert_interrupt(__u16 code)
+ +{
+ +      if (S390_lowcore.ext_params & 0x00c40000)
+ +              etr_timing_alert((struct etr_irq_parm *)
+ +                               &S390_lowcore.ext_params);
+ +      if (S390_lowcore.ext_params & 0x00038000)
+ +              stp_timing_alert((struct stp_irq_parm *)
+ +                               &S390_lowcore.ext_params);
+ +}
+ +
   static void etr_reset(void);
- -static void etr_ext_handler(__u16);
+ +static void stp_reset(void);
   
   /*
    * Get the TOD clock running.
@@@ -195,7 -181,6 +195,7 @@@ static u64 __init reset_tod_clock(void
         u64 time;
   
         etr_reset();
+ +      stp_reset();
         if (store_clock(&time) == 0)
                 return time;
         /* TOD clock not running. Set the clock to Unix Epoch. */
@@@ -246,9 -231,8 +246,9 @@@ void __init time_init(void
         if (clocksource_register(&clocksource_tod) != 0)
                 panic("Could not register TOD clock source");
   
- -      /* request the etr external interrupt */
- -      if (register_early_external_interrupt(0x1406, etr_ext_handler,
+ +      /* request the timing alert external interrupt */
+ +      if (register_early_external_interrupt(0x1406,
+ +                                            timing_alert_interrupt,
                                               &ext_int_etr_cc) != 0)
                 panic("Couldn't request external interrupt 0x1406");
   
@@@ -260,113 -244,11 +260,113 @@@
   #endif
   }
   
+ +/*
+ + * The time is "clock". old is what we think the time is.
+ + * Adjust the value by a multiple of jiffies and add the delta to ntp.
+ + * "delay" is an approximation how long the synchronization took. If
+ + * the time correction is positive, then "delay" is subtracted from
+ + * the time difference and only the remaining part is passed to ntp.
+ + */
+ +static unsigned long long adjust_time(unsigned long long old,
+ +                                    unsigned long long clock,
+ +                                    unsigned long long delay)
+ +{
+ +      unsigned long long delta, ticks;
+ +      struct timex adjust;
+ +
+ +      if (clock > old) {
+ +              /* It is later than we thought. */
+ +              delta = ticks = clock - old;
+ +              delta = ticks = (delta < delay) ? 0 : delta - delay;
+ +              delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ +              adjust.offset = ticks * (1000000 / HZ);
+ +      } else {
+ +              /* It is earlier than we thought. */
+ +              delta = ticks = old - clock;
+ +              delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ +              delta = -delta;
+ +              adjust.offset = -ticks * (1000000 / HZ);
+ +      }
+ +      jiffies_timer_cc += delta;
+ +      if (adjust.offset != 0) {
+ +              printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
+ +                     adjust.offset);
+ +              adjust.modes = ADJ_OFFSET_SINGLESHOT;
+ +              do_adjtimex(&adjust);
+ +      }
+ +      return delta;
+ +}
+ +
+ +static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+ +static unsigned long clock_sync_flags;
+ +
+ +#define CLOCK_SYNC_HAS_ETR    0
+ +#define CLOCK_SYNC_HAS_STP    1
+ +#define CLOCK_SYNC_ETR                2
+ +#define CLOCK_SYNC_STP                3
+ +
+ +/*
+ + * The synchronous get_clock function. It will write the current clock
+ + * value to the clock pointer and return 0 if the clock is in sync with
+ + * the external time source. If the clock mode is local it will return
+ + * -ENOSYS and -EAGAIN if the clock is not in sync with the external
+ + * reference.
+ + */
+ +int get_sync_clock(unsigned long long *clock)
+ +{
+ +      atomic_t *sw_ptr;
+ +      unsigned int sw0, sw1;
+ +
+ +      sw_ptr = &get_cpu_var(clock_sync_word);
+ +      sw0 = atomic_read(sw_ptr);
+ +      *clock = get_clock();
+ +      sw1 = atomic_read(sw_ptr);
+ +      put_cpu_var(clock_sync_sync);
+ +      if (sw0 == sw1 && (sw0 & 0x80000000U))
+ +              /* Success: time is in sync. */
+ +              return 0;
+ +      if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
+ +          !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ +              return -ENOSYS;
+ +      if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
+ +          !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ +              return -EACCES;
+ +      return -EAGAIN;
+ +}
+ +EXPORT_SYMBOL(get_sync_clock);
+ +
+ +/*
+ + * Make get_sync_clock return -EAGAIN.
+ + */
+ +static void disable_sync_clock(void *dummy)
+ +{
+ +      atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ +      /*
+ +       * Clear the in-sync bit 2^31. All get_sync_clock calls will
+ +       * fail until the sync bit is turned back on. In addition
+ +       * increase the "sequence" counter to avoid the race of an
+ +       * etr event and the complete recovery against get_sync_clock.
+ +       */
+ +      atomic_clear_mask(0x80000000, sw_ptr);
+ +      atomic_inc(sw_ptr);
+ +}
+ +
+ +/*
+ + * Make get_sync_clock return 0 again.
+ + * Needs to be called from a context disabled for preemption.
+ + */
+ +static void enable_sync_clock(void)
+ +{
+ +      atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ +      atomic_set_mask(0x80000000, sw_ptr);
+ +}
+ +
   /*
    * External Time Reference (ETR) code.
    */
   static int etr_port0_online;
   static int etr_port1_online;
+ +static int etr_steai_available;
   
   static int __init early_parse_etr(char *p)
   {
@@@ -391,6 -273,12 +391,6 @@@ enum etr_event 
         ETR_EVENT_UPDATE,
   };
   
- -enum etr_flags {
- -      ETR_FLAG_ENOSYS,
- -      ETR_FLAG_EACCES,
- -      ETR_FLAG_STEAI,
- -};
- -
   /*
    * Valid bit combinations of the eacr register are (x = don't care):
    * e0 e1 dp p0 p1 ea es sl
@@@ -417,17 -305,73 +417,17 @@@
    */
   static struct etr_eacr etr_eacr;
   static u64 etr_tolec;                 /* time of last eacr update */
- -static unsigned long etr_flags;
   static struct etr_aib etr_port0;
   static int etr_port0_uptodate;
   static struct etr_aib etr_port1;
   static int etr_port1_uptodate;
   static unsigned long etr_events;
   static struct timer_list etr_timer;
- -static DEFINE_PER_CPU(atomic_t, etr_sync_word);
   
   static void etr_timeout(unsigned long dummy);
   static void etr_work_fn(struct work_struct *work);
   static DECLARE_WORK(etr_work, etr_work_fn);
   
- -/*
- - * The etr get_clock function. It will write the current clock value
- - * to the clock pointer and return 0 if the clock is in sync with the
- - * external time source. If the clock mode is local it will return
- - * -ENOSYS and -EAGAIN if the clock is not in sync with the external
- - * reference. This function is what ETR is all about..
- - */
- -int get_sync_clock(unsigned long long *clock)
- -{
- -      atomic_t *sw_ptr;
- -      unsigned int sw0, sw1;
- -
- -      sw_ptr = &get_cpu_var(etr_sync_word);
- -      sw0 = atomic_read(sw_ptr);
- -      *clock = get_clock();
- -      sw1 = atomic_read(sw_ptr);
- -      put_cpu_var(etr_sync_sync);
- -      if (sw0 == sw1 && (sw0 & 0x80000000U))
- -              /* Success: time is in sync. */
- -              return 0;
- -      if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
- -              return -ENOSYS;
- -      if (test_bit(ETR_FLAG_EACCES, &etr_flags))
- -              return -EACCES;
- -      return -EAGAIN;
- -}
- -EXPORT_SYMBOL(get_sync_clock);
- -
- -/*
- - * Make get_sync_clock return -EAGAIN.
- - */
- -static void etr_disable_sync_clock(void *dummy)
- -{
- -      atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
- -      /*
- -       * Clear the in-sync bit 2^31. All get_sync_clock calls will
- -       * fail until the sync bit is turned back on. In addition
- -       * increase the "sequence" counter to avoid the race of an
- -       * etr event and the complete recovery against get_sync_clock.
- -       */
- -      atomic_clear_mask(0x80000000, sw_ptr);
- -      atomic_inc(sw_ptr);
- -}
- -
- -/*
- - * Make get_sync_clock return 0 again.
- - * Needs to be called from a context disabled for preemption.
- - */
- -static void etr_enable_sync_clock(void)
- -{
- -      atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
- -      atomic_set_mask(0x80000000, sw_ptr);
- -}
- -
   /*
    * Reset ETR attachment.
    */
@@@ -437,13 -381,15 +437,13 @@@ static void etr_reset(void
                 .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
                 .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
                 .es = 0, .sl = 0 };
- -      if (etr_setr(&etr_eacr) == 0)
+ +      if (etr_setr(&etr_eacr) == 0) {
                 etr_tolec = get_clock();
- -      else {
- -              set_bit(ETR_FLAG_ENOSYS, &etr_flags);
- -              if (etr_port0_online || etr_port1_online) {
- -                      printk(KERN_WARNING "Running on non ETR capable "
- -                             "machine, only local mode available.\n");
- -                      etr_port0_online = etr_port1_online = 0;
- -              }
+ +              set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+ +      } else if (etr_port0_online || etr_port1_online) {
+ +              printk(KERN_WARNING "Running on non ETR capable "
+ +                     "machine, only local mode available.\n");
+ +              etr_port0_online = etr_port1_online = 0;
         }
   }
   
@@@ -451,12 -397,14 +451,12 @@@ static int __init etr_init(void
   {
         struct etr_aib aib;
   
- -      if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
+ +      if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
                 return 0;
         /* Check if this machine has the steai instruction. */
         if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
- -              set_bit(ETR_FLAG_STEAI, &etr_flags);
+ +              etr_steai_available = 1;
         setup_timer(&etr_timer, etr_timeout, 0UL);
- -      if (!etr_port0_online && !etr_port1_online)
- -              set_bit(ETR_FLAG_EACCES, &etr_flags);
         if (etr_port0_online) {
                 set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
                 schedule_work(&etr_work);
@@@ -487,8 -435,7 +487,8 @@@ void etr_switch_to_local(void
   {
         if (!etr_eacr.sl)
                 return;
- -      etr_disable_sync_clock(NULL);
+ +      if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+ +              disable_sync_clock(NULL);
         set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
         schedule_work(&etr_work);
   }
@@@ -503,21 -450,23 +503,21 @@@ void etr_sync_check(void
   {
         if (!etr_eacr.es)
                 return;
- -      etr_disable_sync_clock(NULL);
+ +      if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+ +              disable_sync_clock(NULL);
         set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
         schedule_work(&etr_work);
   }
   
   /*
- - * ETR external interrupt. There are two causes:
+ + * ETR timing alert. There are two causes:
    * 1) port state change, check the usability of the port
    * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
    *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
    *    or ETR-data word 4 (edf4) has changed.
    */
- -static void etr_ext_handler(__u16 code)
+ +static void etr_timing_alert(struct etr_irq_parm *intparm)
   {
- -      struct etr_interruption_parameter *intparm =
- -              (struct etr_interruption_parameter *) &S390_lowcore.ext_params;
- -
         if (intparm->pc0)
                 /* ETR port 0 state change. */
                 set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
@@@ -642,23 -591,58 +642,23 @@@ static int etr_aib_follows(struct etr_a
         return 1;
   }
   
- -/*
- - * The time is "clock". old is what we think the time is.
- - * Adjust the value by a multiple of jiffies and add the delta to ntp.
- - * "delay" is an approximation how long the synchronization took. If
- - * the time correction is positive, then "delay" is subtracted from
- - * the time difference and only the remaining part is passed to ntp.
- - */
- -static unsigned long long etr_adjust_time(unsigned long long old,
- -                                        unsigned long long clock,
- -                                        unsigned long long delay)
- -{
- -      unsigned long long delta, ticks;
- -      struct timex adjust;
- -
- -      if (clock > old) {
- -              /* It is later than we thought. */
- -              delta = ticks = clock - old;
- -              delta = ticks = (delta < delay) ? 0 : delta - delay;
- -              delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- -              adjust.offset = ticks * (1000000 / HZ);
- -      } else {
- -              /* It is earlier than we thought. */
- -              delta = ticks = old - clock;
- -              delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- -              delta = -delta;
- -              adjust.offset = -ticks * (1000000 / HZ);
- -      }
- -      jiffies_timer_cc += delta;
- -      if (adjust.offset != 0) {
- -              printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
- -                     adjust.offset);
- -              adjust.modes = ADJ_OFFSET_SINGLESHOT;
- -              do_adjtimex(&adjust);
- -      }
- -      return delta;
- -}
- -
- -static struct {
+ +struct clock_sync_data {
         int in_sync;
         unsigned long long fixup_cc;
- -} etr_sync;
+ +};
   
- -static void etr_sync_cpu_start(void *dummy)
+ +static void clock_sync_cpu_start(void *dummy)
   {
- -      etr_enable_sync_clock();
+ +      struct clock_sync_data *sync = dummy;
+ +
+ +      enable_sync_clock();
         /*
          * This looks like a busy wait loop but it isn't. etr_sync_cpus
          * is called on all other cpus while the TOD clocks is stopped.
          * __udelay will stop the cpu on an enabled wait psw until the
          * TOD is running again.
          */
- -      while (etr_sync.in_sync == 0) {
+ +      while (sync->in_sync == 0) {
                 __udelay(1);
                 /*
                  * A different cpu changes *in_sync. Therefore use
@@@ -666,17 -650,17 +666,17 @@@
                  */
                 barrier();
         }
- -      if (etr_sync.in_sync != 1)
+ +      if (sync->in_sync != 1)
                 /* Didn't work. Clear per-cpu in sync bit again. */
- -              etr_disable_sync_clock(NULL);
+ +              disable_sync_clock(NULL);
         /*
          * This round of TOD syncing is done. Set the clock comparator
          * to the next tick and let the processor continue.
          */
- -      fixup_clock_comparator(etr_sync.fixup_cc);
+ +      fixup_clock_comparator(sync->fixup_cc);
   }
   
- -static void etr_sync_cpu_end(void *dummy)
+ +static void clock_sync_cpu_end(void *dummy)
   {
   }
   
@@@ -688,7 -672,6 +688,7 @@@
   static int etr_sync_clock(struct etr_aib *aib, int port)
   {
         struct etr_aib *sync_port;
+ +      struct clock_sync_data etr_sync;
         unsigned long long clock, old_clock, delay, delta;
         int follows;
         int rc;
@@@ -707,9 -690,9 +707,9 @@@
          */
         memset(&etr_sync, 0, sizeof(etr_sync));
         preempt_disable();
-       smp_call_function(clock_sync_cpu_start, &etr_sync, 0, 0);
- -      smp_call_function(etr_sync_cpu_start, NULL, 0);
++      smp_call_function(clock_sync_cpu_start, &etr_sync, 0);
         local_irq_disable();
- -      etr_enable_sync_clock();
+ +      enable_sync_clock();
   
         /* Set clock to next OTE. */
         __ctl_set_bit(14, 21);
@@@ -724,13 -707,13 +724,13 @@@
                 /* Adjust Linux timing variables. */
                 delay = (unsigned long long)
                         (aib->edf2.etv - sync_port->edf2.etv) << 32;
- -              delta = etr_adjust_time(old_clock, clock, delay);
+ +              delta = adjust_time(old_clock, clock, delay);
                 etr_sync.fixup_cc = delta;
                 fixup_clock_comparator(delta);
                 /* Verify that the clock is properly set. */
                 if (!etr_aib_follows(sync_port, aib, port)) {
                         /* Didn't work. */
- -                      etr_disable_sync_clock(NULL);
+ +                      disable_sync_clock(NULL);
                         etr_sync.in_sync = -EAGAIN;
                         rc = -EAGAIN;
                 } else {
@@@ -741,12 -724,12 +741,12 @@@
                 /* Could not set the clock ?!? */
                 __ctl_clear_bit(0, 29);
                 __ctl_clear_bit(14, 21);
- -              etr_disable_sync_clock(NULL);
+ +              disable_sync_clock(NULL);
                 etr_sync.in_sync = -EAGAIN;
                 rc = -EAGAIN;
         }
         local_irq_enable();
-       smp_call_function(clock_sync_cpu_end, NULL, 0, 0);
- -      smp_call_function(etr_sync_cpu_end,NULL,0);
++      smp_call_function(clock_sync_cpu_end, NULL, 0);
         preempt_enable();
         return rc;
   }
@@@ -849,7 -832,7 +849,7 @@@ static struct etr_eacr etr_handle_updat
          * Do not try to get the alternate port aib if the clock
          * is not in sync yet.
          */
- -      if (!eacr.es)
+ +      if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags) && !eacr.es)
                 return eacr;
   
         /*
@@@ -857,7 -840,7 +857,7 @@@
          * the other port immediately. If only stetr is available the
          * data-port bit toggle has to be used.
          */
- -      if (test_bit(ETR_FLAG_STEAI, &etr_flags)) {
+ +      if (etr_steai_available) {
                 if (eacr.p0 && !etr_port0_uptodate) {
                         etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
                         etr_port0_uptodate = 1;
@@@ -926,10 -909,10 +926,10 @@@ static void etr_work_fn(struct work_str
         if (!eacr.ea) {
                 /* Both ports offline. Reset everything. */
                 eacr.dp = eacr.es = eacr.sl = 0;
-               on_each_cpu(disable_sync_clock, NULL, 0, 1);
- -              on_each_cpu(etr_disable_sync_clock, NULL, 1);
++              on_each_cpu(disable_sync_clock, NULL, 1);
                 del_timer_sync(&etr_timer);
                 etr_update_eacr(eacr);
- -              set_bit(ETR_FLAG_EACCES, &etr_flags);
+ +              clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
                 return;
         }
   
@@@ -970,6 -953,7 +970,6 @@@
                         eacr.e1 = 1;
                 sync_port = (etr_port0_uptodate &&
                              etr_port_valid(&etr_port0, 0)) ? 0 : -1;
- -              clear_bit(ETR_FLAG_EACCES, &etr_flags);
         } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
                 eacr.sl = 0;
                 eacr.e0 = 0;
@@@ -978,6 -962,7 +978,6 @@@
                         eacr.es = 0;
                 sync_port = (etr_port1_uptodate &&
                              etr_port_valid(&etr_port1, 1)) ? 1 : -1;
- -              clear_bit(ETR_FLAG_EACCES, &etr_flags);
         } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
                 eacr.sl = 1;
                 eacr.e0 = 1;
@@@ -991,6 -976,7 +991,6 @@@
                         eacr.e1 = 1;
                 sync_port = (etr_port0_uptodate &&
                              etr_port_valid(&etr_port0, 0)) ? 0 : -1;
- -              clear_bit(ETR_FLAG_EACCES, &etr_flags);
         } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
                 eacr.sl = 1;
                 eacr.e0 = 0;
@@@ -999,22 -985,19 +999,22 @@@
                         eacr.es = 0;
                 sync_port = (etr_port1_uptodate &&
                              etr_port_valid(&etr_port1, 1)) ? 1 : -1;
- -              clear_bit(ETR_FLAG_EACCES, &etr_flags);
         } else {
                 /* Both ports not usable. */
                 eacr.es = eacr.sl = 0;
                 sync_port = -1;
- -              set_bit(ETR_FLAG_EACCES, &etr_flags);
+ +              clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
         }
   
+ +      if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+ +              eacr.es = 0;
+ +
         /*
          * If the clock is in sync just update the eacr and return.
          * If there is no valid sync port wait for a port update.
          */
- -      if (eacr.es || sync_port < 0) {
+ +      if (test_bit(CLOCK_SYNC_STP, &clock_sync_flags) ||
+ +          eacr.es || sync_port < 0) {
                 etr_update_eacr(eacr);
                 etr_set_tolec_timeout(now);
                 return;
@@@ -1035,13 -1018,11 +1035,13 @@@
          * and set up a timer to try again after 0.5 seconds
          */
         etr_update_eacr(eacr);
+ +      set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
         if (now < etr_tolec + (1600000 << 12) ||
             etr_sync_clock(&aib, sync_port) != 0) {
                 /* Sync failed. Try again in 1/2 second. */
                 eacr.es = 0;
                 etr_update_eacr(eacr);
+ +              clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
                 etr_set_sync_timeout();
         } else
                 etr_set_tolec_timeout(now);
@@@ -1116,8 -1097,8 +1116,8 @@@ static ssize_t etr_online_store(struct 
         value = simple_strtoul(buf, NULL, 0);
         if (value != 0 && value != 1)
                 return -EINVAL;
- -      if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
- -              return -ENOSYS;
+ +      if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+ +              return -EOPNOTSUPP;
         if (dev == &etr_port0_dev) {
                 if (etr_port0_online == value)
                         return count;   /* Nothing to do. */
@@@ -1311,318 -1292,3 +1311,318 @@@ out
   }
   
   device_initcall(etr_init_sysfs);
+ +
+ +/*
+ + * Server Time Protocol (STP) code.
+ + */
+ +static int stp_online;
+ +static struct stp_sstpi stp_info;
+ +static void *stp_page;
+ +
+ +static void stp_work_fn(struct work_struct *work);
+ +static DECLARE_WORK(stp_work, stp_work_fn);
+ +
+ +static int __init early_parse_stp(char *p)
+ +{
+ +      if (strncmp(p, "off", 3) == 0)
+ +              stp_online = 0;
+ +      else if (strncmp(p, "on", 2) == 0)
+ +              stp_online = 1;
+ +      return 0;
+ +}
+ +early_param("stp", early_parse_stp);
+ +
+ +/*
+ + * Reset STP attachment.
+ + */
+ +static void stp_reset(void)
+ +{
+ +      int rc;
+ +
+ +      stp_page = alloc_bootmem_pages(PAGE_SIZE);
+ +      rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ +      if (rc == 1)
+ +              set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+ +      else if (stp_online) {
+ +              printk(KERN_WARNING "Running on non STP capable machine.\n");
+ +              free_bootmem((unsigned long) stp_page, PAGE_SIZE);
+ +              stp_page = NULL;
+ +              stp_online = 0;
+ +      }
+ +}
+ +
+ +static int __init stp_init(void)
+ +{
+ +      if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online)
+ +              schedule_work(&stp_work);
+ +      return 0;
+ +}
+ +
+ +arch_initcall(stp_init);
+ +
+ +/*
+ + * STP timing alert. There are three causes:
+ + * 1) timing status change
+ + * 2) link availability change
+ + * 3) time control parameter change
+ + * In all three cases we are only interested in the clock source state.
+ + * If a STP clock source is now available use it.
+ + */
+ +static void stp_timing_alert(struct stp_irq_parm *intparm)
+ +{
+ +      if (intparm->tsc || intparm->lac || intparm->tcpc)
+ +              schedule_work(&stp_work);
+ +}
+ +
+ +/*
+ + * STP sync check machine check. This is called when the timing state
+ + * changes from the synchronized state to the unsynchronized state.
+ + * After a STP sync check the clock is not in sync. The machine check
+ + * is broadcasted to all cpus at the same time.
+ + */
+ +void stp_sync_check(void)
+ +{
+ +      if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ +              return;
+ +      disable_sync_clock(NULL);
+ +      schedule_work(&stp_work);
+ +}
+ +
+ +/*
+ + * STP island condition machine check. This is called when an attached
+ + * server  attempts to communicate over an STP link and the servers
+ + * have matching CTN ids and have a valid stratum-1 configuration
+ + * but the configurations do not match.
+ + */
+ +void stp_island_check(void)
+ +{
+ +      if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ +              return;
+ +      disable_sync_clock(NULL);
+ +      schedule_work(&stp_work);
+ +}
+ +
+ +/*
+ + * STP tasklet. Check for the STP state and take over the clock
+ + * synchronization if the STP clock source is usable.
+ + */
+ +static void stp_work_fn(struct work_struct *work)
+ +{
+ +      struct clock_sync_data stp_sync;
+ +      unsigned long long old_clock, delta;
+ +      int rc;
+ +
+ +      if (!stp_online) {
+ +              chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ +              return;
+ +      }
+ +
+ +      rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+ +      if (rc)
+ +              return;
+ +
+ +      rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ +      if (rc || stp_info.c == 0)
+ +              return;
+ +
+ +      /*
+ +       * Catch all other cpus and make them wait until we have
+ +       * successfully synced the clock. smp_call_function will
+ +       * return after all other cpus are in clock_sync_cpu_start.
+ +       */
+ +      memset(&stp_sync, 0, sizeof(stp_sync));
+ +      preempt_disable();
+ +      smp_call_function(clock_sync_cpu_start, &stp_sync, 0, 0);
+ +      local_irq_disable();
+ +      enable_sync_clock();
+ +
+ +      set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ +      if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+ +              schedule_work(&etr_work);
+ +
+ +      rc = 0;
+ +      if (stp_info.todoff[0] || stp_info.todoff[1] ||
+ +          stp_info.todoff[2] || stp_info.todoff[3] ||
+ +          stp_info.tmd != 2) {
+ +              old_clock = get_clock();
+ +              rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+ +              if (rc == 0) {
+ +                      delta = adjust_time(old_clock, get_clock(), 0);
+ +                      fixup_clock_comparator(delta);
+ +                      rc = chsc_sstpi(stp_page, &stp_info,
+ +                                      sizeof(struct stp_sstpi));
+ +                      if (rc == 0 && stp_info.tmd != 2)
+ +                              rc = -EAGAIN;
+ +              }
+ +      }
+ +      if (rc) {
+ +              disable_sync_clock(NULL);
+ +              stp_sync.in_sync = -EAGAIN;
+ +              clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ +              if (etr_port0_online || etr_port1_online)
+ +                      schedule_work(&etr_work);
+ +      } else
+ +              stp_sync.in_sync = 1;
+ +
+ +      local_irq_enable();
+ +      smp_call_function(clock_sync_cpu_end, NULL, 0, 0);
+ +      preempt_enable();
+ +}
+ +
+ +/*
+ + * STP class sysfs interface functions
+ + */
+ +static struct sysdev_class stp_sysclass = {
+ +      .name   = "stp",
+ +};
+ +
+ +static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online)
+ +              return -ENODATA;
+ +      return sprintf(buf, "%016llx\n",
+ +                     *(unsigned long long *) stp_info.ctnid);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+ +
+ +static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online)
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", stp_info.ctn);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+ +
+ +static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online || !(stp_info.vbits & 0x2000))
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+ +
+ +static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online || !(stp_info.vbits & 0x8000))
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+ +
+ +static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online)
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL);
+ +
+ +static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online || !(stp_info.vbits & 0x0800))
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", (int) stp_info.tto);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+ +
+ +static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online || !(stp_info.vbits & 0x4000))
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(time_zone_offset, 0400,
+ +                       stp_time_zone_offset_show, NULL);
+ +
+ +static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online)
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", stp_info.tmd);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+ +
+ +static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
+ +{
+ +      if (!stp_online)
+ +              return -ENODATA;
+ +      return sprintf(buf, "%i\n", stp_info.tst);
+ +}
+ +
+ +static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+ +
+ +static ssize_t stp_online_show(struct sysdev_class *class, char *buf)
+ +{
+ +      return sprintf(buf, "%i\n", stp_online);
+ +}
+ +
+ +static ssize_t stp_online_store(struct sysdev_class *class,
+ +                              const char *buf, size_t count)
+ +{
+ +      unsigned int value;
+ +
+ +      value = simple_strtoul(buf, NULL, 0);
+ +      if (value != 0 && value != 1)
+ +              return -EINVAL;
+ +      if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ +              return -EOPNOTSUPP;
+ +      stp_online = value;
+ +      schedule_work(&stp_work);
+ +      return count;
+ +}
+ +
+ +/*
+ + * Can't use SYSDEV_CLASS_ATTR because the attribute should be named
+ + * stp/online but attr_online already exists in this file ..
+ + */
+ +static struct sysdev_class_attribute attr_stp_online = {
+ +      .attr = { .name = "online", .mode = 0600 },
+ +      .show   = stp_online_show,
+ +      .store  = stp_online_store,
+ +};
+ +
+ +static struct sysdev_class_attribute *stp_attributes[] = {
+ +      &attr_ctn_id,
+ +      &attr_ctn_type,
+ +      &attr_dst_offset,
+ +      &attr_leap_seconds,
+ +      &attr_stp_online,
+ +      &attr_stratum,
+ +      &attr_time_offset,
+ +      &attr_time_zone_offset,
+ +      &attr_timing_mode,
+ +      &attr_timing_state,
+ +      NULL
+ +};
+ +
+ +static int __init stp_init_sysfs(void)
+ +{
+ +      struct sysdev_class_attribute **attr;
+ +      int rc;
+ +
+ +      rc = sysdev_class_register(&stp_sysclass);
+ +      if (rc)
+ +              goto out;
+ +      for (attr = stp_attributes; *attr; attr++) {
+ +              rc = sysdev_class_create_file(&stp_sysclass, *attr);
+ +              if (rc)
+ +                      goto out_unreg;
+ +      }
+ +      return 0;
+ +out_unreg:
+ +      for (; attr >= stp_attributes; attr--)
+ +              sysdev_class_remove_file(&stp_sysclass, *attr);
+ +      sysdev_class_unregister(&stp_sysclass);
+ +out:
+ +      return rc;
+ +}
+ +
+ +device_initcall(stp_init_sysfs);
diff --combined arch/x86/Kconfig

index 2642b4bf41b9798ecc8663e2e8770c35ab55f8bc,2f3fbebf51d8e3d7e13904d87c99bbd5c2dd78eb..96e0c2ebc3885713a5d6290f5e8eb959d0d0d36e
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -23,8 -23,6 +23,8 @@@ config X8
         select HAVE_OPROFILE
         select HAVE_KPROBES
         select HAVE_KRETPROBES
+ +      select HAVE_DYNAMIC_FTRACE
+ +      select HAVE_FTRACE
         select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
         select HAVE_ARCH_KGDB if !X86_VOYAGER
   
@@@ -123,7 -121,7 +123,7 @@@ config ARCH_HAS_CACHE_LINE_SIZ
         def_bool y
   
   config HAVE_SETUP_PER_CPU_AREA
- -      def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
+ +      def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
   
   config HAVE_CPUMASK_OF_CPU_MAP
         def_bool X86_64_SMP
@@@ -170,6 -168,7 +170,7 @@@ config GENERIC_PENDING_IR
   config X86_SMP
         bool
         depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+       select USE_GENERIC_SMP_HELPERS
         default y
   
   config X86_32_SMP
@@@ -183,12 -182,12 +184,12 @@@ config X86_64_SM
   config X86_HT
         bool
         depends on SMP
- -      depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64
+ +      depends on (X86_32 && !X86_VOYAGER) || X86_64
         default y
   
   config X86_BIOS_REBOOT
         bool
- -      depends on !X86_VISWS && !X86_VOYAGER
+ +      depends on !X86_VOYAGER
         default y
   
   config X86_TRAMPOLINE
@@@ -232,26 -231,6 +233,26 @@@ config SM
   
           If you don't know what to do here, say N.
   
+ +config X86_FIND_SMP_CONFIG
+ +      def_bool y
+ +      depends on X86_MPPARSE || X86_VOYAGER
+ +
+ +if ACPI
+ +config X86_MPPARSE
+ +      def_bool y
+ +      bool "Enable MPS table"
+ +      depends on X86_LOCAL_APIC
+ +      help
+ +        For old smp systems that do not have proper acpi support. Newer systems
+ +        (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
+ +endif
+ +
+ +if !ACPI
+ +config X86_MPPARSE
+ +      def_bool y
+ +      depends on X86_LOCAL_APIC
+ +endif
+ +
   choice
         prompt "Subarchitecture Type"
         default X86_PC
@@@ -273,7 -252,7 +274,7 @@@ config X86_ELA
   
   config X86_VOYAGER
         bool "Voyager (NCR)"
- -      depends on X86_32 && (SMP || BROKEN)
+ +      depends on X86_32 && (SMP || BROKEN) && !PCI
         help
           Voyager is an MCA-based 32-way capable SMP architecture proprietary
           to NCR Corp.  Machine classes 345x/35xx/4100/51xx are Voyager-based.
@@@ -283,27 -262,16 +284,27 @@@
           If you do not specifically know you have a Voyager based machine,
           say N here, otherwise the kernel you build will not be bootable.
   
+ +config X86_GENERICARCH
+ +       bool "Generic architecture"
+ +      depends on X86_32
+ +       help
+ +          This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
+ +        subarchitectures.  It is intended for a generic binary kernel.
+ +        if you select them all, kernel will probe it one by one. and will
+ +        fallback to default.
+ +
+ +if X86_GENERICARCH
+ +
   config X86_NUMAQ
         bool "NUMAQ (IBM/Sequent)"
- -      depends on SMP && X86_32
+ +      depends on SMP && X86_32 && PCI && X86_MPPARSE
         select NUMA
         help
- -        This option is used for getting Linux to run on a (IBM/Sequent) NUMA
- -        multiquad box. This changes the way that processors are bootstrapped,
- -        and uses Clustered Logical APIC addressing mode instead of Flat Logical.
- -        You will need a new lynxer.elf file to flash your firmware with - send
- -        email to <Martin.Bligh@us.ibm.com>.
+ +        This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
+ +        NUMA multiquad box. This changes the way that processors are
+ +        bootstrapped, and uses Clustered Logical APIC addressing mode instead
+ +        of Flat Logical.  You will need a new lynxer.elf file to flash your
+ +        firmware with - send email to <Martin.Bligh@us.ibm.com>.
   
   config X86_SUMMIT
         bool "Summit/EXA (IBM x440)"
@@@ -312,21 -280,46 +313,21 @@@
           This option is needed for IBM systems that use the Summit/EXA chipset.
           In particular, it is needed for the x440.
   
- -        If you don't have one of these computers, you should say N here.
- -        If you want to build a NUMA kernel, you must select ACPI.
+ +config X86_ES7000
+ +      bool "Support for Unisys ES7000 IA32 series"
+ +      depends on X86_32 && SMP
+ +      help
+ +        Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
+ +        supposed to run on an IA32-based Unisys ES7000 system.
   
   config X86_BIGSMP
- -      bool "Support for other sub-arch SMP systems with more than 8 CPUs"
+ +      bool "Support for big SMP systems with more than 8 CPUs"
         depends on X86_32 && SMP
         help
           This option is needed for the systems that have more than 8 CPUs
           and if the system is not of any sub-arch type above.
   
- -        If you don't have such a system, you should say N here.
- -
- -config X86_VISWS
- -      bool "SGI 320/540 (Visual Workstation)"
- -      depends on X86_32
- -      help
- -        The SGI Visual Workstation series is an IA32-based workstation
- -        based on SGI systems chips with some legacy PC hardware attached.
- -
- -        Say Y here to create a kernel to run on the SGI 320 or 540.
- -
- -        A kernel compiled for the Visual Workstation will not run on PCs
- -        and vice versa. See <file:Documentation/sgi-visws.txt> for details.
- -
- -config X86_GENERICARCH
- -       bool "Generic architecture (Summit, bigsmp, ES7000, default)"
- -      depends on X86_32
- -       help
- -          This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
- -        It is intended for a generic binary kernel.
- -        If you want a NUMA kernel, select ACPI.   We need SRAT for NUMA.
- -
- -config X86_ES7000
- -      bool "Support for Unisys ES7000 IA32 series"
- -      depends on X86_32 && SMP
- -      help
- -        Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
- -        supposed to run on an IA32-based Unisys ES7000 system.
- -        Only choose this option if you have such a system, otherwise you
- -        should say N here.
+ +endif
   
   config X86_RDC321X
         bool "RDC R-321x SoC"
@@@ -345,7 -338,7 +346,7 @@@
   config X86_VSMP
         bool "Support for ScaleMP vSMP"
         select PARAVIRT
- -      depends on X86_64
+ +      depends on X86_64 && PCI
         help
           Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
           supposed to run on these EM64T-based machines.  Only choose this option
@@@ -353,18 -346,6 +354,18 @@@
   
   endchoice
   
+ +config X86_VISWS
+ +      bool "SGI 320/540 (Visual Workstation)"
+ +      depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT
+ +      help
+ +        The SGI Visual Workstation series is an IA32-based workstation
+ +        based on SGI systems chips with some legacy PC hardware attached.
+ +
+ +        Say Y here to create a kernel to run on the SGI 320 or 540.
+ +
+ +        A kernel compiled for the Visual Workstation will run on general
+ +        PCs as well. See <file:Documentation/sgi-visws.txt> for details.
+ +
   config SCHED_NO_NO_OMIT_FRAME_POINTER
         def_bool y
         prompt "Single-depth WCHAN output"
@@@ -393,7 -374,7 +394,7 @@@ config VM
         bool "VMI Guest support"
         select PARAVIRT
         depends on X86_32
- -      depends on !(X86_VISWS || X86_VOYAGER)
+ +      depends on !X86_VOYAGER
         help
           VMI provides a paravirtualized interface to the VMware ESX server
           (it could be used by other hypervisors in theory too, but is not
@@@ -404,7 -385,7 +405,7 @@@ config KVM_CLOC
         bool "KVM paravirtualized clock"
         select PARAVIRT
         select PARAVIRT_CLOCK
- -      depends on !(X86_VISWS || X86_VOYAGER)
+ +      depends on !X86_VOYAGER
         help
           Turning on this option will allow you to run a paravirtualized clock
           when running over the KVM hypervisor. Instead of relying on a PIT
@@@ -415,7 -396,7 +416,7 @@@
   config KVM_GUEST
         bool "KVM Guest support"
         select PARAVIRT
- -      depends on !(X86_VISWS || X86_VOYAGER)
+ +      depends on !X86_VOYAGER
         help
          This option enables various optimizations for running under the KVM
          hypervisor.
@@@ -424,7 -405,7 +425,7 @@@ source "arch/x86/lguest/Kconfig
   
   config PARAVIRT
         bool "Enable paravirtualization code"
- -      depends on !(X86_VISWS || X86_VOYAGER)
+ +      depends on !X86_VOYAGER
         help
           This changes the kernel so it can modify itself when it is run
           under a hypervisor, potentially improving performance significantly
@@@ -437,32 -418,51 +438,32 @@@ config PARAVIRT_CLOC
   
   endif
   
- -config MEMTEST_BOOTPARAM
- -      bool "Memtest boot parameter"
+ +config PARAVIRT_DEBUG
+ +       bool "paravirt-ops debugging"
+ +       depends on PARAVIRT && DEBUG_KERNEL
+ +       help
+ +         Enable to debug paravirt_ops internals.  Specifically, BUG if
+ +       a paravirt_op is missing when it is called.
+ +
+ +config MEMTEST
+ +      bool "Memtest"
         depends on X86_64
- -      default y
         help
           This option adds a kernel parameter 'memtest', which allows memtest
- -        to be disabled at boot.  If this option is selected, memtest
- -        functionality can be disabled with memtest=0 on the kernel
- -        command line.  The purpose of this option is to allow a single
- -        kernel image to be distributed with memtest built in, but not
- -        necessarily enabled.
- -
- -        If you are unsure how to answer this question, answer Y.
- -
- -config MEMTEST_BOOTPARAM_VALUE
- -      int "Memtest boot parameter default value (0-4)"
- -      depends on MEMTEST_BOOTPARAM
- -      range 0 4
- -      default 0
- -      help
- -        This option sets the default value for the kernel parameter
- -        'memtest', which allows memtest to be disabled at boot.  If this
- -        option is set to 0 (zero), the memtest kernel parameter will
- -        default to 0, disabling memtest at bootup.  If this option is
- -        set to 4, the memtest kernel parameter will default to 4,
- -        enabling memtest at bootup, and use that as pattern number.
- -
- -        If you are unsure how to answer this question, answer 0.
- -
- -config ACPI_SRAT
- -      def_bool y
- -      depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
- -      select ACPI_NUMA
- -
- -config HAVE_ARCH_PARSE_SRAT
- -      def_bool y
- -      depends on ACPI_SRAT
+ +        to be set.
+ +              memtest=0, mean disabled; -- default
+ +              memtest=1, mean do 1 test pattern;
+ +              ...
+ +              memtest=4, mean do 4 test patterns.
+ +        If you are unsure how to answer this question, answer N.
   
   config X86_SUMMIT_NUMA
         def_bool y
- -      depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH)
+ +      depends on X86_32 && NUMA && X86_GENERICARCH
   
   config X86_CYCLONE_TIMER
         def_bool y
- -      depends on X86_32 && X86_SUMMIT || X86_GENERICARCH
+ +      depends on X86_GENERICARCH
   
   config ES7000_CLUSTERED_APIC
         def_bool y
@@@ -550,21 -550,6 +551,21 @@@ config CALGARY_IOMMU_ENABLED_BY_DEFAUL
           Calgary anyway, pass 'iommu=calgary' on the kernel command line.
           If unsure, say Y.
   
+ +config AMD_IOMMU
+ +      bool "AMD IOMMU support"
+ +      select SWIOTLB
+ +      depends on X86_64 && PCI && ACPI
+ +      help
+ +        With this option you can enable support for AMD IOMMU hardware in
+ +        your system. An IOMMU is a hardware component which provides
+ +        remapping of DMA memory accesses from devices. With an AMD IOMMU you
+ +        can isolate the the DMA memory of different devices and protect the
+ +        system from misbehaving device drivers or hardware.
+ +
+ +        You can find out if your system has an AMD IOMMU if you look into
+ +        your BIOS for an option to enable it or if you have an IVRS ACPI
+ +        table.
+ +
   # need this always selected by IOMMU for the VIA workaround
   config SWIOTLB
         bool
@@@ -576,36 -561,21 +577,36 @@@
           3 GB of memory. If unsure, say Y.
   
   config IOMMU_HELPER
- -      def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
+ +      def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+ +config MAXSMP
+ +      bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+ +      depends on X86_64 && SMP
+ +      default n
+ +      help
+ +        Configure maximum number of CPUS and NUMA Nodes for this architecture.
+ +        If unsure, say N.
   
+ +if MAXSMP
   config NR_CPUS
- -      int "Maximum number of CPUs (2-255)"
- -      range 2 255
+ +      int
+ +      default "4096"
+ +endif
+ +
+ +if !MAXSMP
+ +config NR_CPUS
+ +      int "Maximum number of CPUs (2-4096)"
+ +      range 2 4096
         depends on SMP
         default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
         default "8"
         help
           This allows you to specify the maximum number of CPUs which this
- -        kernel will support.  The maximum supported value is 255 and the
+ +        kernel will support.  The maximum supported value is 4096 and the
           minimum value which makes sense is 2.
   
           This is purely to save memory - each supported CPU adds
           approximately eight kilobytes to the kernel image.
+ +endif
   
   config SCHED_SMT
         bool "SMT (Hyperthreading) scheduler support"
@@@ -629,7 -599,7 +630,7 @@@ source "kernel/Kconfig.preempt
   
   config X86_UP_APIC
         bool "Local APIC support on uniprocessors"
- -      depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
+ +      depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
         help
           A local APIC (Advanced Programmable Interrupt Controller) is an
           integrated interrupt controller in the CPU. If you have a single-CPU
@@@ -654,11 -624,11 +655,11 @@@ config X86_UP_IOAPI
   
   config X86_LOCAL_APIC
         def_bool y
- -      depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH))
+ +      depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
   
   config X86_IO_APIC
         def_bool y
- -      depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH))
+ +      depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
   
   config X86_VISWS_APIC
         def_bool y
@@@ -712,7 -682,7 +713,7 @@@ config X86_MCE_NONFATA
   
   config X86_MCE_P4THERMAL
         bool "check for P4 thermal throttling interrupt."
- -      depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS
+ +      depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
         help
           Enabling this feature will cause a message to be printed when the P4
           enters thermal throttling.
@@@ -942,9 -912,9 +943,9 @@@ config X86_PA
   config NUMA
         bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
         depends on SMP
- -      depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL)
+ +      depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
         default n if X86_PC
- -      default y if (X86_NUMAQ || X86_SUMMIT)
+ +      default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
         help
           Enable NUMA (Non Uniform Memory Access) support.
           The kernel will try to allocate memory used by a CPU on the
@@@ -996,25 -966,13 +997,25 @@@ config NUMA_EM
           into virtual nodes when booted with "numa=fake=N", where N is the
           number of nodes. This is only useful for debugging.
   
+ +if MAXSMP
+ +
   config NODES_SHIFT
- -      int "Max num nodes shift(1-15)"
- -      range 1 15  if X86_64
+ +      int
+ +      default "9"
+ +endif
+ +
+ +if !MAXSMP
+ +config NODES_SHIFT
+ +      int "Maximum NUMA Nodes (as a power of 2)"
+ +      range 1 9   if X86_64
         default "6" if X86_64
         default "4" if X86_NUMAQ
         default "3"
         depends on NEED_MULTIPLE_NODES
+ +      help
+ +        Specify the maximum number of NUMA Nodes available on the target
+ +        system.  Increases memory reserved to accomodate various tables.
+ +endif
   
   config HAVE_ARCH_BOOTMEM_NODE
         def_bool y
@@@ -1133,37 -1091,6 +1134,37 @@@ config MTR
   
           See <file:Documentation/mtrr.txt> for more information.
   
+ +config MTRR_SANITIZER
+ +      bool
+ +      prompt "MTRR cleanup support"
+ +      depends on MTRR
+ +      help
+ +        Convert MTRR layout from continuous to discrete, so X drivers can
+ +        add writeback entries.
+ +
+ +        Can be disabled with disable_mtrr_cleanup on the kernel command line.
+ +        The largest mtrr entry size for a continous block can be set with
+ +        mtrr_chunk_size.
+ +
+ +        If unsure, say N.
+ +
+ +config MTRR_SANITIZER_ENABLE_DEFAULT
+ +      int "MTRR cleanup enable value (0-1)"
+ +      range 0 1
+ +      default "0"
+ +      depends on MTRR_SANITIZER
+ +      help
+ +        Enable mtrr cleanup default value
+ +
+ +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+ +      int "MTRR cleanup spare reg num (0-7)"
+ +      range 0 7
+ +      default "1"
+ +      depends on MTRR_SANITIZER
+ +      help
+ +        mtrr cleanup spare entries default, it can be changed via
+ +        mtrr_spare_reg_nr=N on the kernel command line.
+ +
   config X86_PAT
         bool
         prompt "x86 PAT support"
@@@ -1264,6 -1191,7 +1265,6 @@@ config KEXE
   
   config CRASH_DUMP
         bool "kernel crash dumps (EXPERIMENTAL)"
- -      depends on EXPERIMENTAL
         depends on X86_64 || (X86_32 && HIGHMEM)
         help
           Generate crash dump after being started by kexec.
@@@ -1412,7 -1340,7 +1413,7 @@@ config X86_APM_BOO
   
   menuconfig APM
         tristate "APM (Advanced Power Management) BIOS support"
- -      depends on X86_32 && PM_SLEEP && !X86_VISWS
+ +      depends on X86_32 && PM_SLEEP
         ---help---
           APM is a BIOS specification for saving power using several different
           techniques. This is mostly useful for battery powered laptops with
@@@ -1548,7 -1476,8 +1549,7 @@@ endmen
   menu "Bus options (PCI etc.)"
   
   config PCI
- -      bool "PCI support" if !X86_VISWS && !X86_VSMP
- -      depends on !X86_VOYAGER
+ +      bool "PCI support"
         default y
         select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
         help
@@@ -1559,7 -1488,7 +1560,7 @@@
   
   choice
         prompt "PCI access mode"
- -      depends on X86_32 && PCI && !X86_VISWS
+ +      depends on X86_32 && PCI
         default PCI_GOANY
         ---help---
           On PCI systems, the BIOS can be used to detect the PCI devices and
@@@ -1596,12 -1525,12 +1597,12 @@@ endchoic
   
   config PCI_BIOS
         def_bool y
- -      depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+ +      depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY)
   
   # x86-64 doesn't support PCI BIOS access from long mode so always go direct.
   config PCI_DIRECT
         def_bool y
- -      depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS)
+ +      depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC))
   
   config PCI_MMCONFIG
         def_bool y
@@@ -1661,7 -1590,7 +1662,7 @@@ if X86_3
   
   config ISA
         bool "ISA support"
- -      depends on !(X86_VOYAGER || X86_VISWS)
+ +      depends on !X86_VOYAGER
         help
           Find out whether you have ISA slots on your motherboard.  ISA is the
           name of a bus system, i.e. the way the CPU talks to the other stuff
@@@ -1688,7 -1617,7 +1689,7 @@@ config EIS
   source "drivers/eisa/Kconfig"
   
   config MCA
- -      bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+ +      bool "MCA support" if !X86_VOYAGER
         default y if X86_VOYAGER
         help
           MicroChannel Architecture is found in some IBM PS/2 machines and
diff --combined arch/x86/kernel/apic_32.c

index 3e58b676d23b8fd96a87823e30fcc4566d60aad6,71017f71f4bc547f2cb92fae69cb0b3f1b765a57..a437d027f20b6d8d7ba3dc88400220e796afe41e
--- 1/arch/x86/kernel/apic_32.c
--- 2/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@@ -52,41 -52,30 +52,41 @@@
   
   unsigned long mp_lapic_addr;
   
- -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
- -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
- -
   /*
    * Knob to control our willingness to enable the local APIC.
    *
- - * -1=force-disable, +1=force-enable
+ + * +1=force-enable
    */
- -static int enable_local_apic __initdata;
+ +static int force_enable_local_apic;
+ +int disable_apic;
   
   /* Local APIC timer verification ok */
   static int local_apic_timer_verify_ok;
- -/* Disable local APIC timer from the kernel commandline or via dmi quirk
- -   or using CPU MSR check */
- -int local_apic_timer_disabled;
+ +/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+ +static int local_apic_timer_disabled;
   /* Local APIC timer works in C2 */
   int local_apic_timer_c2_ok;
   EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
   
+ +int first_system_vector = 0xfe;
+ +
+ +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+ +
   /*
    * Debug level, exported for io_apic.c
    */
   int apic_verbosity;
   
+ +int pic_mode;
+ +
+ +/* Have we found an MP table */
+ +int smp_found_config;
+ +
+ +static struct resource lapic_resource = {
+ +      .name = "Local APIC",
+ +      .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+ +};
+ +
   static unsigned int calibration_result;
   
   static int lapic_next_event(unsigned long delta,
@@@ -556,7 -545,7 +556,7 @@@ void __init setup_boot_APIC_clock(void
                         lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
                 else
                         printk(KERN_WARNING "APIC timer registered as dummy,"
- -                             " due to nmi_watchdog=1!\n");
+ +                              " due to nmi_watchdog=%d!\n", nmi_watchdog);
         }
   
         /* Setup the lapic or request the broadcast */
@@@ -974,7 -963,7 +974,7 @@@ void __cpuinit setup_local_APIC(void
          * Double-check whether this APIC is really registered.
          */
         if (!apic_id_registered())
- -              BUG();
+ +              WARN_ON_ONCE(1);
   
         /*
          * Intel recommends to set DFR, LDR and TPR before enabling
@@@ -1105,7 -1094,7 +1105,7 @@@ static int __init detect_init_APIC(void
         u32 h, l, features;
   
         /* Disabled by kernel option? */
- -      if (enable_local_apic < 0)
+ +      if (disable_apic)
                 return -1;
   
         switch (boot_cpu_data.x86_vendor) {
@@@ -1128,7 -1117,7 +1128,7 @@@
                  * Over-ride BIOS and try to enable the local APIC only if
                  * "lapic" specified.
                  */
- -              if (enable_local_apic <= 0) {
+ +              if (!force_enable_local_apic) {
                         printk(KERN_INFO "Local APIC disabled by BIOS -- "
                                "you can enable it with \"lapic\"\n");
                         return -1;
@@@ -1165,6 -1154,9 +1165,6 @@@
         if (l & MSR_IA32_APICBASE_ENABLE)
                 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
   
- -      if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED)
- -              nmi_watchdog = NMI_LOCAL_APIC;
- -
         printk(KERN_INFO "Found and enabled local APIC!\n");
   
         apic_pm_activate();
@@@ -1203,6 -1195,36 +1203,6 @@@ void __init init_apic_mappings(void
         if (boot_cpu_physical_apicid == -1U)
                 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
   
- -#ifdef CONFIG_X86_IO_APIC
- -      {
- -              unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
- -              int i;
- -
- -              for (i = 0; i < nr_ioapics; i++) {
- -                      if (smp_found_config) {
- -                              ioapic_phys = mp_ioapics[i].mpc_apicaddr;
- -                              if (!ioapic_phys) {
- -                                      printk(KERN_ERR
- -                                             "WARNING: bogus zero IO-APIC "
- -                                             "address found in MPTABLE, "
- -                                             "disabling IO/APIC support!\n");
- -                                      smp_found_config = 0;
- -                                      skip_ioapic_setup = 1;
- -                                      goto fake_ioapic_page;
- -                              }
- -                      } else {
- -fake_ioapic_page:
- -                              ioapic_phys = (unsigned long)
- -                                            alloc_bootmem_pages(PAGE_SIZE);
- -                              ioapic_phys = __pa(ioapic_phys);
- -                      }
- -                      set_fixmap_nocache(idx, ioapic_phys);
- -                      printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
- -                             __fix_to_virt(idx), ioapic_phys);
- -                      idx++;
- -              }
- -      }
- -#endif
   }
   
   /*
@@@ -1214,7 -1236,7 +1214,7 @@@ int apic_version[MAX_APICS]
   
   int __init APIC_init_uniprocessor(void)
   {
- -      if (enable_local_apic < 0)
+ +      if (disable_apic)
                 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
   
         if (!smp_found_config && !cpu_has_apic)
@@@ -1243,14 -1265,10 +1243,14 @@@
   #ifdef CONFIG_CRASH_DUMP
         boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
   #endif
- -      phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+ +      physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
   
         setup_local_APIC();
   
+ +#ifdef CONFIG_X86_IO_APIC
+ +      if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
+ +#endif
+ +              localise_nmi_watchdog();
         end_local_APIC_setup();
   #ifdef CONFIG_X86_IO_APIC
         if (smp_found_config)
@@@ -1333,13 -1351,17 +1333,17 @@@ void __init smp_intr_init(void
          * The reschedule interrupt is a CPU-to-CPU reschedule-helper
          * IPI, driven by wakeup.
          */
- -      set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+ +      alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
   
         /* IPI for invalidation */
- -      set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
   
         /* IPI for generic function call */
- -      set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ +      alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ 
+       /* IPI for single call function */
+       set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+                               call_function_single_interrupt);
   }
   #endif
   
@@@ -1352,15 -1374,15 +1356,15 @@@ void __init apic_intr_init(void
         smp_intr_init();
   #endif
         /* self generated IPI for local APIC timer */
- -      set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ +      alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
   
         /* IPI vectors for APIC spurious and error interrupts */
- -      set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- -      set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+ +      alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ +      alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
   
         /* thermal monitor LVT interrupt */
   #ifdef CONFIG_X86_MCE_P4THERMAL
- -      set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+ +      alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
   #endif
   }
   
@@@ -1495,9 -1517,6 +1499,9 @@@ void __cpuinit generic_processor_info(i
                  */
                 cpu = 0;
   
+ +      if (apicid > max_physical_apicid)
+ +              max_physical_apicid = apicid;
+ +
         /*
          * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
          * but we need to work other dependencies like SMP_SUSPEND etc
@@@ -1505,7 -1524,7 +1509,7 @@@
          * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
          *       - Ashok Raj <ashok.raj@intel.com>
          */
- -      if (num_processors > 8) {
+ +      if (max_physical_apicid >= 8) {
                 switch (boot_cpu_data.x86_vendor) {
                 case X86_VENDOR_INTEL:
                         if (!APIC_XAPIC(version)) {
@@@ -1519,9 -1538,9 +1523,9 @@@
         }
   #ifdef CONFIG_SMP
         /* are we being called early in kernel startup? */
- -      if (x86_cpu_to_apicid_early_ptr) {
- -              u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
- -              u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+ +      if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+ +              u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+ +              u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
   
                 cpu_to_apicid[cpu] = apicid;
                 bios_cpu_apicid[cpu] = apicid;
@@@ -1688,14 -1707,14 +1692,14 @@@ static void apic_pm_activate(void) { 
    */
   static int __init parse_lapic(char *arg)
   {
- -      enable_local_apic = 1;
+ +      force_enable_local_apic = 1;
         return 0;
   }
   early_param("lapic", parse_lapic);
   
   static int __init parse_nolapic(char *arg)
   {
- -      enable_local_apic = -1;
+ +      disable_apic = 1;
         clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
         return 0;
   }
@@@ -1725,21 -1744,3 +1729,21 @@@ static int __init apic_set_verbosity(ch
   }
   __setup("apic=", apic_set_verbosity);
   
+ +static int __init lapic_insert_resource(void)
+ +{
+ +      if (!apic_phys)
+ +              return -1;
+ +
+ +      /* Put local APIC into the resource map. */
+ +      lapic_resource.start = apic_phys;
+ +      lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+ +      insert_resource(&iomem_resource, &lapic_resource);
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * need call insert after e820_reserve_resources()
+ + * that is using request_resource
+ + */
+ +late_initcall(lapic_insert_resource);
diff --combined arch/x86/kernel/cpu/mcheck/mce_64.c

index 9874107451829614e0f57d122a4d1729498a6532,43b7cb594912a92016c943fd75521f775ed82b6f..c4a7ec31394c1b9e4abb9971d7d3cfb62228c1bc
--- 1/arch/x86/kernel/cpu/mcheck/mce_64.c
--- 2/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@@ -9,7 -9,6 +9,7 @@@
   #include <linux/types.h>
   #include <linux/kernel.h>
   #include <linux/sched.h>
+ +#include <linux/smp_lock.h>
   #include <linux/string.h>
   #include <linux/rcupdate.h>
   #include <linux/kallsyms.h>
@@@ -32,7 -31,7 +32,7 @@@
   #include <asm/idle.h>
   
   #define MISC_MCELOG_MINOR 227
- -#define NR_BANKS 6
+ +#define NR_SYSFS_BANKS 6
   
   atomic_t mce_entry;
   
@@@ -47,7 -46,7 +47,7 @@@ static int mce_dont_init
    */
   static int tolerant = 1;
   static int banks;
- -static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+ +static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
   static unsigned long notify_user;
   static int rip_msr;
   static int mce_bootlog = -1;
@@@ -210,7 -209,7 +210,7 @@@ void do_machine_check(struct pt_regs * 
         barrier();
   
         for (i = 0; i < banks; i++) {
- -              if (!bank[i])
+ +              if (i < NR_SYSFS_BANKS && !bank[i])
                         continue;
   
                 m.misc = 0;
@@@ -364,7 -363,7 +364,7 @@@ static void mcheck_check_cpu(void *info
   
   static void mcheck_timer(struct work_struct *work)
   {
-       on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+       on_each_cpu(mcheck_check_cpu, NULL, 1);
   
         /*
          * Alert userspace if needed.  If we logged an MCE, reduce the
@@@ -445,10 -444,9 +445,10 @@@ static void mce_init(void *dummy
   
         rdmsrl(MSR_IA32_MCG_CAP, cap);
         banks = cap & 0xff;
- -      if (banks > NR_BANKS) {
- -              printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
- -              banks = NR_BANKS;
+ +      if (banks > MCE_EXTENDED_BANK) {
+ +              banks = MCE_EXTENDED_BANK;
+ +              printk(KERN_INFO "MCE: warning: using only %d banks\n",
+ +                     MCE_EXTENDED_BANK);
         }
         /* Use accurate RIP reporting if available. */
         if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@@ -464,11 -462,7 +464,11 @@@
                 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
   
         for (i = 0; i < banks; i++) {
- -              wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ +              if (i < NR_SYSFS_BANKS)
+ +                      wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ +              else
+ +                      wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
+ +
                 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
         }
   }
@@@ -533,12 -527,10 +533,12 @@@ static int open_exclu;  /* already open 
   
   static int mce_open(struct inode *inode, struct file *file)
   {
+ +      lock_kernel();
         spin_lock(&mce_state_lock);
   
         if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
                 spin_unlock(&mce_state_lock);
+ +              unlock_kernel();
                 return -EBUSY;
         }
   
@@@ -547,7 -539,6 +547,7 @@@
         open_count++;
   
         spin_unlock(&mce_state_lock);
+ +      unlock_kernel();
   
         return nonseekable_open(inode, file);
   }
@@@ -621,7 -612,7 +621,7 @@@ static ssize_t mce_read(struct file *fi
          * Collect entries that were still getting written before the
          * synchronize.
          */
-       on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+       on_each_cpu(collect_tscs, cpu_tsc, 1);
         for (i = next; i < MCE_LOG_LEN; i++) {
                 if (mcelog.entry[i].finished &&
                     mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@@ -746,7 -737,7 +746,7 @@@ static void mce_restart(void
         if (next_interval)
                 cancel_delayed_work(&mcheck_work);
         /* Timer race is harmless here */
-       on_each_cpu(mce_init, NULL, 1, 1);
+       on_each_cpu(mce_init, NULL, 1);
         next_interval = check_interval * HZ;
         if (next_interval)
                 schedule_delayed_work(&mcheck_work,
@@@ -775,10 -766,7 +775,10 @@@ DEFINE_PER_CPU(struct sys_device, devic
         }                                                               \
         static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
   
- -/* TBD should generate these dynamically based on number of available banks */
+ +/*
+ + * TBD should generate these dynamically based on number of available banks.
+ + * Have only 6 contol banks in /sysfs until then.
+ + */
   ACCESSOR(bank0ctl,bank[0],mce_restart())
   ACCESSOR(bank1ctl,bank[1],mce_restart())
   ACCESSOR(bank2ctl,bank[2],mce_restart())
diff --combined arch/x86/kernel/cpu/mtrr/main.c

index 105afe12beb0730f1c5fbb02443adae516794a08,290652cefddb89529943b73def7ac491a14ff6bf..6f23969c8fafe4f1dd2b4304a910c11483833f2d
--- 1/arch/x86/kernel/cpu/mtrr/main.c
--- 2/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@@ -37,7 -37,6 +37,7 @@@
   #include <linux/smp.h>
   #include <linux/cpu.h>
   #include <linux/mutex.h>
+ +#include <linux/sort.h>
   
   #include <asm/e820.h>
   #include <asm/mtrr.h>
@@@ -223,7 -222,7 +223,7 @@@ static void set_mtrr(unsigned int reg, 
         atomic_set(&data.gate,0);
   
         /*  Start the ball rolling on other CPUs  */
-       if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+       if (smp_call_function(ipi_handler, &data, 0) != 0)
                 panic("mtrr: timed out waiting for other CPUs\n");
   
         local_irq_save(flags);
@@@ -610,787 -609,6 +610,787 @@@ static struct sysdev_driver mtrr_sysdev
         .resume         = mtrr_restore,
   };
   
+ +/* should be related to MTRR_VAR_RANGES nums */
+ +#define RANGE_NUM 256
+ +
+ +struct res_range {
+ +      unsigned long start;
+ +      unsigned long end;
+ +};
+ +
+ +static int __init
+ +add_range(struct res_range *range, int nr_range, unsigned long start,
+ +                            unsigned long end)
+ +{
+ +      /* out of slots */
+ +      if (nr_range >= RANGE_NUM)
+ +              return nr_range;
+ +
+ +      range[nr_range].start = start;
+ +      range[nr_range].end = end;
+ +
+ +      nr_range++;
+ +
+ +      return nr_range;
+ +}
+ +
+ +static int __init
+ +add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+ +                            unsigned long end)
+ +{
+ +      int i;
+ +
+ +      /* try to merge it with old one */
+ +      for (i = 0; i < nr_range; i++) {
+ +              unsigned long final_start, final_end;
+ +              unsigned long common_start, common_end;
+ +
+ +              if (!range[i].end)
+ +                      continue;
+ +
+ +              common_start = max(range[i].start, start);
+ +              common_end = min(range[i].end, end);
+ +              if (common_start > common_end + 1)
+ +                      continue;
+ +
+ +              final_start = min(range[i].start, start);
+ +              final_end = max(range[i].end, end);
+ +
+ +              range[i].start = final_start;
+ +              range[i].end =  final_end;
+ +              return nr_range;
+ +      }
+ +
+ +      /* need to add that */
+ +      return add_range(range, nr_range, start, end);
+ +}
+ +
+ +static void __init
+ +subtract_range(struct res_range *range, unsigned long start, unsigned long end)
+ +{
+ +      int i, j;
+ +
+ +      for (j = 0; j < RANGE_NUM; j++) {
+ +              if (!range[j].end)
+ +                      continue;
+ +
+ +              if (start <= range[j].start && end >= range[j].end) {
+ +                      range[j].start = 0;
+ +                      range[j].end = 0;
+ +                      continue;
+ +              }
+ +
+ +              if (start <= range[j].start && end < range[j].end &&
+ +                  range[j].start < end + 1) {
+ +                      range[j].start = end + 1;
+ +                      continue;
+ +              }
+ +
+ +
+ +              if (start > range[j].start && end >= range[j].end &&
+ +                  range[j].end > start - 1) {
+ +                      range[j].end = start - 1;
+ +                      continue;
+ +              }
+ +
+ +              if (start > range[j].start && end < range[j].end) {
+ +                      /* find the new spare */
+ +                      for (i = 0; i < RANGE_NUM; i++) {
+ +                              if (range[i].end == 0)
+ +                                      break;
+ +                      }
+ +                      if (i < RANGE_NUM) {
+ +                              range[i].end = range[j].end;
+ +                              range[i].start = end + 1;
+ +                      } else {
+ +                              printk(KERN_ERR "run of slot in ranges\n");
+ +                      }
+ +                      range[j].end = start - 1;
+ +                      continue;
+ +              }
+ +      }
+ +}
+ +
+ +static int __init cmp_range(const void *x1, const void *x2)
+ +{
+ +      const struct res_range *r1 = x1;
+ +      const struct res_range *r2 = x2;
+ +      long start1, start2;
+ +
+ +      start1 = r1->start;
+ +      start2 = r2->start;
+ +
+ +      return start1 - start2;
+ +}
+ +
+ +struct var_mtrr_range_state {
+ +      unsigned long base_pfn;
+ +      unsigned long size_pfn;
+ +      mtrr_type type;
+ +};
+ +
+ +struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+ +static int __initdata debug_print;
+ +
+ +static int __init
+ +x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+ +                     unsigned long extra_remove_base,
+ +                     unsigned long extra_remove_size)
+ +{
+ +      unsigned long i, base, size;
+ +      mtrr_type type;
+ +
+ +      for (i = 0; i < num_var_ranges; i++) {
+ +              type = range_state[i].type;
+ +              if (type != MTRR_TYPE_WRBACK)
+ +                      continue;
+ +              base = range_state[i].base_pfn;
+ +              size = range_state[i].size_pfn;
+ +              nr_range = add_range_with_merge(range, nr_range, base,
+ +                                              base + size - 1);
+ +      }
+ +      if (debug_print) {
+ +              printk(KERN_DEBUG "After WB checking\n");
+ +              for (i = 0; i < nr_range; i++)
+ +                      printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+ +                               range[i].start, range[i].end + 1);
+ +      }
+ +
+ +      /* take out UC ranges */
+ +      for (i = 0; i < num_var_ranges; i++) {
+ +              type = range_state[i].type;
+ +              if (type != MTRR_TYPE_UNCACHABLE)
+ +                      continue;
+ +              size = range_state[i].size_pfn;
+ +              if (!size)
+ +                      continue;
+ +              base = range_state[i].base_pfn;
+ +              subtract_range(range, base, base + size - 1);
+ +      }
+ +      if (extra_remove_size)
+ +              subtract_range(range, extra_remove_base,
+ +                               extra_remove_base + extra_remove_size  - 1);
+ +
+ +      /* get new range num */
+ +      nr_range = 0;
+ +      for (i = 0; i < RANGE_NUM; i++) {
+ +              if (!range[i].end)
+ +                      continue;
+ +              nr_range++;
+ +      }
+ +      if  (debug_print) {
+ +              printk(KERN_DEBUG "After UC checking\n");
+ +              for (i = 0; i < nr_range; i++)
+ +                      printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+ +                               range[i].start, range[i].end + 1);
+ +      }
+ +
+ +      /* sort the ranges */
+ +      sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+ +      if  (debug_print) {
+ +              printk(KERN_DEBUG "After sorting\n");
+ +              for (i = 0; i < nr_range; i++)
+ +                      printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+ +                               range[i].start, range[i].end + 1);
+ +      }
+ +
+ +      /* clear those is not used */
+ +      for (i = nr_range; i < RANGE_NUM; i++)
+ +              memset(&range[i], 0, sizeof(range[i]));
+ +
+ +      return nr_range;
+ +}
+ +
+ +static struct res_range __initdata range[RANGE_NUM];
+ +
+ +#ifdef CONFIG_MTRR_SANITIZER
+ +
+ +static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+ +{
+ +      unsigned long sum;
+ +      int i;
+ +
+ +      sum = 0;
+ +      for (i = 0; i < nr_range; i++)
+ +              sum += range[i].end + 1 - range[i].start;
+ +
+ +      return sum;
+ +}
+ +
+ +static int enable_mtrr_cleanup __initdata =
+ +      CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+ +
+ +static int __init disable_mtrr_cleanup_setup(char *str)
+ +{
+ +      if (enable_mtrr_cleanup != -1)
+ +              enable_mtrr_cleanup = 0;
+ +      return 0;
+ +}
+ +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+ +
+ +static int __init enable_mtrr_cleanup_setup(char *str)
+ +{
+ +      if (enable_mtrr_cleanup != -1)
+ +              enable_mtrr_cleanup = 1;
+ +      return 0;
+ +}
+ +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+ +
+ +struct var_mtrr_state {
+ +      unsigned long   range_startk;
+ +      unsigned long   range_sizek;
+ +      unsigned long   chunk_sizek;
+ +      unsigned long   gran_sizek;
+ +      unsigned int    reg;
+ +};
+ +
+ +static void __init
+ +set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+ +              unsigned char type, unsigned int address_bits)
+ +{
+ +      u32 base_lo, base_hi, mask_lo, mask_hi;
+ +      u64 base, mask;
+ +
+ +      if (!sizek) {
+ +              fill_mtrr_var_range(reg, 0, 0, 0, 0);
+ +              return;
+ +      }
+ +
+ +      mask = (1ULL << address_bits) - 1;
+ +      mask &= ~((((u64)sizek) << 10) - 1);
+ +
+ +      base  = ((u64)basek) << 10;
+ +
+ +      base |= type;
+ +      mask |= 0x800;
+ +
+ +      base_lo = base & ((1ULL<<32) - 1);
+ +      base_hi = base >> 32;
+ +
+ +      mask_lo = mask & ((1ULL<<32) - 1);
+ +      mask_hi = mask >> 32;
+ +
+ +      fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+ +}
+ +
+ +static void __init
+ +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+ +              unsigned char type)
+ +{
+ +      range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+ +      range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+ +      range_state[reg].type = type;
+ +}
+ +
+ +static void __init
+ +set_var_mtrr_all(unsigned int address_bits)
+ +{
+ +      unsigned long basek, sizek;
+ +      unsigned char type;
+ +      unsigned int reg;
+ +
+ +      for (reg = 0; reg < num_var_ranges; reg++) {
+ +              basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+ +              sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+ +              type = range_state[reg].type;
+ +
+ +              set_var_mtrr(reg, basek, sizek, type, address_bits);
+ +      }
+ +}
+ +
+ +static unsigned int __init
+ +range_to_mtrr(unsigned int reg, unsigned long range_startk,
+ +            unsigned long range_sizek, unsigned char type)
+ +{
+ +      if (!range_sizek || (reg >= num_var_ranges))
+ +              return reg;
+ +
+ +      while (range_sizek) {
+ +              unsigned long max_align, align;
+ +              unsigned long sizek;
+ +
+ +              /* Compute the maximum size I can make a range */
+ +              if (range_startk)
+ +                      max_align = ffs(range_startk) - 1;
+ +              else
+ +                      max_align = 32;
+ +              align = fls(range_sizek) - 1;
+ +              if (align > max_align)
+ +                      align = max_align;
+ +
+ +              sizek = 1 << align;
+ +              if (debug_print)
+ +                      printk(KERN_DEBUG "Setting variable MTRR %d, "
+ +                              "base: %ldMB, range: %ldMB, type %s\n",
+ +                              reg, range_startk >> 10, sizek >> 10,
+ +                              (type == MTRR_TYPE_UNCACHABLE)?"UC":
+ +                                  ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+ +                              );
+ +              save_var_mtrr(reg++, range_startk, sizek, type);
+ +              range_startk += sizek;
+ +              range_sizek -= sizek;
+ +              if (reg >= num_var_ranges)
+ +                      break;
+ +      }
+ +      return reg;
+ +}
+ +
+ +static unsigned __init
+ +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+ +                      unsigned long sizek)
+ +{
+ +      unsigned long hole_basek, hole_sizek;
+ +      unsigned long second_basek, second_sizek;
+ +      unsigned long range0_basek, range0_sizek;
+ +      unsigned long range_basek, range_sizek;
+ +      unsigned long chunk_sizek;
+ +      unsigned long gran_sizek;
+ +
+ +      hole_basek = 0;
+ +      hole_sizek = 0;
+ +      second_basek = 0;
+ +      second_sizek = 0;
+ +      chunk_sizek = state->chunk_sizek;
+ +      gran_sizek = state->gran_sizek;
+ +
+ +      /* align with gran size, prevent small block used up MTRRs */
+ +      range_basek = ALIGN(state->range_startk, gran_sizek);
+ +      if ((range_basek > basek) && basek)
+ +              return second_sizek;
+ +      state->range_sizek -= (range_basek - state->range_startk);
+ +      range_sizek = ALIGN(state->range_sizek, gran_sizek);
+ +
+ +      while (range_sizek > state->range_sizek) {
+ +              range_sizek -= gran_sizek;
+ +              if (!range_sizek)
+ +                      return 0;
+ +      }
+ +      state->range_sizek = range_sizek;
+ +
+ +      /* try to append some small hole */
+ +      range0_basek = state->range_startk;
+ +      range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+ +      if (range0_sizek == state->range_sizek) {
+ +              if (debug_print)
+ +                      printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
+ +                              range0_basek<<10,
+ +                              (range0_basek + state->range_sizek)<<10);
+ +              state->reg = range_to_mtrr(state->reg, range0_basek,
+ +                              state->range_sizek, MTRR_TYPE_WRBACK);
+ +              return 0;
+ +      }
+ +
+ +      range0_sizek -= chunk_sizek;
+ +      if (range0_sizek && sizek) {
+ +          while (range0_basek + range0_sizek > (basek + sizek)) {
+ +              range0_sizek -= chunk_sizek;
+ +              if (!range0_sizek)
+ +                      break;
+ +          }
+ +      }
+ +
+ +      if (range0_sizek) {
+ +              if (debug_print)
+ +                      printk(KERN_DEBUG "range0: %016lx - %016lx\n",
+ +                              range0_basek<<10,
+ +                              (range0_basek + range0_sizek)<<10);
+ +              state->reg = range_to_mtrr(state->reg, range0_basek,
+ +                              range0_sizek, MTRR_TYPE_WRBACK);
+ +
+ +      }
+ +
+ +      range_basek = range0_basek + range0_sizek;
+ +      range_sizek = chunk_sizek;
+ +
+ +      if (range_basek + range_sizek > basek &&
+ +          range_basek + range_sizek <= (basek + sizek)) {
+ +              /* one hole */
+ +              second_basek = basek;
+ +              second_sizek = range_basek + range_sizek - basek;
+ +      }
+ +
+ +      /* if last piece, only could one hole near end */
+ +      if ((second_basek || !basek) &&
+ +          range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
+ +          (chunk_sizek >> 1)) {
+ +              /*
+ +               * one hole in middle (second_sizek is 0) or at end
+ +               * (second_sizek is 0 )
+ +               */
+ +              hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+ +                               - second_sizek;
+ +              hole_basek = range_basek + range_sizek - hole_sizek
+ +                               - second_sizek;
+ +      } else {
+ +              /* fallback for big hole, or several holes */
+ +              range_sizek = state->range_sizek - range0_sizek;
+ +              second_basek = 0;
+ +              second_sizek = 0;
+ +      }
+ +
+ +      if (debug_print)
+ +              printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+ +                       (range_basek + range_sizek)<<10);
+ +      state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+ +                                       MTRR_TYPE_WRBACK);
+ +      if (hole_sizek) {
+ +              if (debug_print)
+ +                      printk(KERN_DEBUG "hole: %016lx - %016lx\n",
+ +                               hole_basek<<10, (hole_basek + hole_sizek)<<10);
+ +              state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+ +                                               MTRR_TYPE_UNCACHABLE);
+ +
+ +      }
+ +
+ +      return second_sizek;
+ +}
+ +
+ +static void __init
+ +set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+ +                 unsigned long size_pfn)
+ +{
+ +      unsigned long basek, sizek;
+ +      unsigned long second_sizek = 0;
+ +
+ +      if (state->reg >= num_var_ranges)
+ +              return;
+ +
+ +      basek = base_pfn << (PAGE_SHIFT - 10);
+ +      sizek = size_pfn << (PAGE_SHIFT - 10);
+ +
+ +      /* See if I can merge with the last range */
+ +      if ((basek <= 1024) ||
+ +          (state->range_startk + state->range_sizek == basek)) {
+ +              unsigned long endk = basek + sizek;
+ +              state->range_sizek = endk - state->range_startk;
+ +              return;
+ +      }
+ +      /* Write the range mtrrs */
+ +      if (state->range_sizek != 0)
+ +              second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
+ +
+ +      /* Allocate an msr */
+ +      state->range_startk = basek + second_sizek;
+ +      state->range_sizek  = sizek - second_sizek;
+ +}
+ +
+ +/* mininum size of mtrr block that can take hole */
+ +static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+ +
+ +static int __init parse_mtrr_chunk_size_opt(char *p)
+ +{
+ +      if (!p)
+ +              return -EINVAL;
+ +      mtrr_chunk_size = memparse(p, &p);
+ +      return 0;
+ +}
+ +early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+ +
+ +/* granity of mtrr of block */
+ +static u64 mtrr_gran_size __initdata;
+ +
+ +static int __init parse_mtrr_gran_size_opt(char *p)
+ +{
+ +      if (!p)
+ +              return -EINVAL;
+ +      mtrr_gran_size = memparse(p, &p);
+ +      return 0;
+ +}
+ +early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+ +
+ +static int nr_mtrr_spare_reg __initdata =
+ +                               CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+ +
+ +static int __init parse_mtrr_spare_reg(char *arg)
+ +{
+ +      if (arg)
+ +              nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+ +      return 0;
+ +}
+ +
+ +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+ +
+ +static int __init
+ +x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+ +                  u64 chunk_size, u64 gran_size)
+ +{
+ +      struct var_mtrr_state var_state;
+ +      int i;
+ +      int num_reg;
+ +
+ +      var_state.range_startk  = 0;
+ +      var_state.range_sizek   = 0;
+ +      var_state.reg           = 0;
+ +      var_state.chunk_sizek   = chunk_size >> 10;
+ +      var_state.gran_sizek    = gran_size >> 10;
+ +
+ +      memset(range_state, 0, sizeof(range_state));
+ +
+ +      /* Write the range etc */
+ +      for (i = 0; i < nr_range; i++)
+ +              set_var_mtrr_range(&var_state, range[i].start,
+ +                                 range[i].end - range[i].start + 1);
+ +
+ +      /* Write the last range */
+ +      if (var_state.range_sizek != 0)
+ +              range_to_mtrr_with_hole(&var_state, 0, 0);
+ +
+ +      num_reg = var_state.reg;
+ +      /* Clear out the extra MTRR's */
+ +      while (var_state.reg < num_var_ranges) {
+ +              save_var_mtrr(var_state.reg, 0, 0, 0);
+ +              var_state.reg++;
+ +      }
+ +
+ +      return num_reg;
+ +}
+ +
+ +struct mtrr_cleanup_result {
+ +      unsigned long gran_sizek;
+ +      unsigned long chunk_sizek;
+ +      unsigned long lose_cover_sizek;
+ +      unsigned int num_reg;
+ +      int bad;
+ +};
+ +
+ +/*
+ + * gran_size: 1M, 2M, ..., 2G
+ + * chunk size: gran_size, ..., 4G
+ + * so we need (2+13)*6
+ + */
+ +#define NUM_RESULT    90
+ +#define PSHIFT                (PAGE_SHIFT - 10)
+ +
+ +static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+ +static struct res_range __initdata range_new[RANGE_NUM];
+ +static unsigned long __initdata min_loss_pfn[RANGE_NUM];
+ +
+ +static int __init mtrr_cleanup(unsigned address_bits)
+ +{
+ +      unsigned long extra_remove_base, extra_remove_size;
+ +      unsigned long i, base, size, def, dummy;
+ +      mtrr_type type;
+ +      int nr_range, nr_range_new;
+ +      u64 chunk_size, gran_size;
+ +      unsigned long range_sums, range_sums_new;
+ +      int index_good;
+ +      int num_reg_good;
+ +
+ +      /* extra one for all 0 */
+ +      int num[MTRR_NUM_TYPES + 1];
+ +
+ +      if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+ +              return 0;
+ +      rdmsr(MTRRdefType_MSR, def, dummy);
+ +      def &= 0xff;
+ +      if (def != MTRR_TYPE_UNCACHABLE)
+ +              return 0;
+ +
+ +      /* get it and store it aside */
+ +      memset(range_state, 0, sizeof(range_state));
+ +      for (i = 0; i < num_var_ranges; i++) {
+ +              mtrr_if->get(i, &base, &size, &type);
+ +              range_state[i].base_pfn = base;
+ +              range_state[i].size_pfn = size;
+ +              range_state[i].type = type;
+ +      }
+ +
+ +      /* check entries number */
+ +      memset(num, 0, sizeof(num));
+ +      for (i = 0; i < num_var_ranges; i++) {
+ +              type = range_state[i].type;
+ +              size = range_state[i].size_pfn;
+ +              if (type >= MTRR_NUM_TYPES)
+ +                      continue;
+ +              if (!size)
+ +                      type = MTRR_NUM_TYPES;
+ +              num[type]++;
+ +      }
+ +
+ +      /* check if we got UC entries */
+ +      if (!num[MTRR_TYPE_UNCACHABLE])
+ +              return 0;
+ +
+ +      /* check if we only had WB and UC */
+ +      if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+ +              num_var_ranges - num[MTRR_NUM_TYPES])
+ +              return 0;
+ +
+ +      memset(range, 0, sizeof(range));
+ +      extra_remove_size = 0;
+ +      if (mtrr_tom2) {
+ +              extra_remove_base = 1 << (32 - PAGE_SHIFT);
+ +              extra_remove_size =
+ +                      (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+ +      }
+ +      nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+ +                                        extra_remove_size);
+ +      range_sums = sum_ranges(range, nr_range);
+ +      printk(KERN_INFO "total RAM coverred: %ldM\n",
+ +             range_sums >> (20 - PAGE_SHIFT));
+ +
+ +      if (mtrr_chunk_size && mtrr_gran_size) {
+ +              int num_reg;
+ +
+ +              debug_print = 1;
+ +              /* convert ranges to var ranges state */
+ +              num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+ +                                            mtrr_gran_size);
+ +
+ +              /* we got new setting in range_state, check it */
+ +              memset(range_new, 0, sizeof(range_new));
+ +              nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+ +                                                    extra_remove_base,
+ +                                                    extra_remove_size);
+ +              range_sums_new = sum_ranges(range_new, nr_range_new);
+ +
+ +              i = 0;
+ +              result[i].chunk_sizek = mtrr_chunk_size >> 10;
+ +              result[i].gran_sizek = mtrr_gran_size >> 10;
+ +              result[i].num_reg = num_reg;
+ +              if (range_sums < range_sums_new) {
+ +                      result[i].lose_cover_sizek =
+ +                              (range_sums_new - range_sums) << PSHIFT;
+ +                      result[i].bad = 1;
+ +              } else
+ +                      result[i].lose_cover_sizek =
+ +                              (range_sums - range_sums_new) << PSHIFT;
+ +
+ +              printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+ +                       result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
+ +                       result[i].chunk_sizek >> 10);
+ +              printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+ +                       result[i].num_reg, result[i].bad?"-":"",
+ +                       result[i].lose_cover_sizek >> 10);
+ +              if (!result[i].bad) {
+ +                      set_var_mtrr_all(address_bits);
+ +                      return 1;
+ +              }
+ +              printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
+ +                     "will find optimal one\n");
+ +              debug_print = 0;
+ +              memset(result, 0, sizeof(result[0]));
+ +      }
+ +
+ +      i = 0;
+ +      memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+ +      memset(result, 0, sizeof(result));
+ +      for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+ +              for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+ +                   chunk_size <<= 1) {
+ +                      int num_reg;
+ +
+ +                      if (debug_print)
+ +                              printk(KERN_INFO
+ +                             "\ngran_size: %lldM   chunk_size_size: %lldM\n",
+ +                                     gran_size >> 20, chunk_size >> 20);
+ +                      if (i >= NUM_RESULT)
+ +                              continue;
+ +
+ +                      /* convert ranges to var ranges state */
+ +                      num_reg = x86_setup_var_mtrrs(range, nr_range,
+ +                                                       chunk_size, gran_size);
+ +
+ +                      /* we got new setting in range_state, check it */
+ +                      memset(range_new, 0, sizeof(range_new));
+ +                      nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+ +                                       extra_remove_base, extra_remove_size);
+ +                      range_sums_new = sum_ranges(range_new, nr_range_new);
+ +
+ +                      result[i].chunk_sizek = chunk_size >> 10;
+ +                      result[i].gran_sizek = gran_size >> 10;
+ +                      result[i].num_reg = num_reg;
+ +                      if (range_sums < range_sums_new) {
+ +                              result[i].lose_cover_sizek =
+ +                                      (range_sums_new - range_sums) << PSHIFT;
+ +                              result[i].bad = 1;
+ +                      } else
+ +                              result[i].lose_cover_sizek =
+ +                                      (range_sums - range_sums_new) << PSHIFT;
+ +
+ +                      /* double check it */
+ +                      if (!result[i].bad && !result[i].lose_cover_sizek) {
+ +                              if (nr_range_new != nr_range ||
+ +                                      memcmp(range, range_new, sizeof(range)))
+ +                                              result[i].bad = 1;
+ +                      }
+ +
+ +                      if (!result[i].bad && (range_sums - range_sums_new <
+ +                                             min_loss_pfn[num_reg])) {
+ +                              min_loss_pfn[num_reg] =
+ +                                      range_sums - range_sums_new;
+ +                      }
+ +                      i++;
+ +              }
+ +      }
+ +
+ +      /* print out all */
+ +      for (i = 0; i < NUM_RESULT; i++) {
+ +              printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+ +                     result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+ +                     result[i].chunk_sizek >> 10);
+ +              printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
+ +                     result[i].num_reg, result[i].bad?"-":"",
+ +                     result[i].lose_cover_sizek >> 10);
+ +      }
+ +
+ +      /* try to find the optimal index */
+ +      if (nr_mtrr_spare_reg >= num_var_ranges)
+ +              nr_mtrr_spare_reg = num_var_ranges - 1;
+ +      num_reg_good = -1;
+ +      for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+ +              if (!min_loss_pfn[i]) {
+ +                      num_reg_good = i;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      index_good = -1;
+ +      if (num_reg_good != -1) {
+ +              for (i = 0; i < NUM_RESULT; i++) {
+ +                      if (!result[i].bad &&
+ +                          result[i].num_reg == num_reg_good &&
+ +                          !result[i].lose_cover_sizek) {
+ +                              index_good = i;
+ +                              break;
+ +                      }
+ +              }
+ +      }
+ +
+ +      if (index_good != -1) {
+ +              printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+ +              i = index_good;
+ +              printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
+ +                              result[i].gran_sizek >> 10,
+ +                              result[i].chunk_sizek >> 10);
+ +              printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
+ +                              result[i].num_reg,
+ +                              result[i].lose_cover_sizek >> 10);
+ +              /* convert ranges to var ranges state */
+ +              chunk_size = result[i].chunk_sizek;
+ +              chunk_size <<= 10;
+ +              gran_size = result[i].gran_sizek;
+ +              gran_size <<= 10;
+ +              debug_print = 1;
+ +              x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+ +              set_var_mtrr_all(address_bits);
+ +              return 1;
+ +      }
+ +
+ +      printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+ +      printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+ +
+ +      return 0;
+ +}
+ +#else
+ +static int __init mtrr_cleanup(unsigned address_bits)
+ +{
+ +      return 0;
+ +}
+ +#endif
+ +
+ +static int __initdata changed_by_mtrr_cleanup;
+ +
   static int disable_mtrr_trim;
   
   static int __init disable_mtrr_trim_setup(char *str)
@@@ -1430,19 -648,6 +1430,19 @@@ int __init amd_special_default_mtrr(voi
         return 0;
   }
   
+ +static u64 __init real_trim_memory(unsigned long start_pfn,
+ +                                 unsigned long limit_pfn)
+ +{
+ +      u64 trim_start, trim_size;
+ +      trim_start = start_pfn;
+ +      trim_start <<= PAGE_SHIFT;
+ +      trim_size = limit_pfn;
+ +      trim_size <<= PAGE_SHIFT;
+ +      trim_size -= trim_start;
+ +
+ +      return e820_update_range(trim_start, trim_size, E820_RAM,
+ +                              E820_RESERVED);
+ +}
   /**
    * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
    * @end_pfn: ending page frame number
@@@ -1458,11 -663,8 +1458,11 @@@ int __init mtrr_trim_uncached_memory(un
   {
         unsigned long i, base, size, highest_pfn = 0, def, dummy;
         mtrr_type type;
- -      u64 trim_start, trim_size;
+ +      int nr_range;
+ +      u64 total_trim_size;
   
+ +      /* extra one for all 0 */
+ +      int num[MTRR_NUM_TYPES + 1];
         /*
          * Make sure we only trim uncachable memory on machines that
          * support the Intel MTRR architecture:
@@@ -1474,22 -676,14 +1474,22 @@@
         if (def != MTRR_TYPE_UNCACHABLE)
                 return 0;
   
- -      if (amd_special_default_mtrr())
- -              return 0;
+ +      /* get it and store it aside */
+ +      memset(range_state, 0, sizeof(range_state));
+ +      for (i = 0; i < num_var_ranges; i++) {
+ +              mtrr_if->get(i, &base, &size, &type);
+ +              range_state[i].base_pfn = base;
+ +              range_state[i].size_pfn = size;
+ +              range_state[i].type = type;
+ +      }
   
         /* Find highest cached pfn */
         for (i = 0; i < num_var_ranges; i++) {
- -              mtrr_if->get(i, &base, &size, &type);
+ +              type = range_state[i].type;
                 if (type != MTRR_TYPE_WRBACK)
                         continue;
+ +              base = range_state[i].base_pfn;
+ +              size = range_state[i].size_pfn;
                 if (highest_pfn < base + size)
                         highest_pfn = base + size;
         }
@@@ -1504,65 -698,22 +1504,65 @@@
                 return 0;
         }
   
- -      if (highest_pfn < end_pfn) {
+ +      /* check entries number */
+ +      memset(num, 0, sizeof(num));
+ +      for (i = 0; i < num_var_ranges; i++) {
+ +              type = range_state[i].type;
+ +              if (type >= MTRR_NUM_TYPES)
+ +                      continue;
+ +              size = range_state[i].size_pfn;
+ +              if (!size)
+ +                      type = MTRR_NUM_TYPES;
+ +              num[type]++;
+ +      }
+ +
+ +      /* no entry for WB? */
+ +      if (!num[MTRR_TYPE_WRBACK])
+ +              return 0;
+ +
+ +      /* check if we only had WB and UC */
+ +      if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+ +              num_var_ranges - num[MTRR_NUM_TYPES])
+ +              return 0;
+ +
+ +      memset(range, 0, sizeof(range));
+ +      nr_range = 0;
+ +      if (mtrr_tom2) {
+ +              range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+ +              range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+ +              if (highest_pfn < range[nr_range].end + 1)
+ +                      highest_pfn = range[nr_range].end + 1;
+ +              nr_range++;
+ +      }
+ +      nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+ +
+ +      total_trim_size = 0;
+ +      /* check the head */
+ +      if (range[0].start)
+ +              total_trim_size += real_trim_memory(0, range[0].start);
+ +      /* check the holes */
+ +      for (i = 0; i < nr_range - 1; i++) {
+ +              if (range[i].end + 1 < range[i+1].start)
+ +                      total_trim_size += real_trim_memory(range[i].end + 1,
+ +                                                          range[i+1].start);
+ +      }
+ +      /* check the top */
+ +      i = nr_range - 1;
+ +      if (range[i].end + 1 < end_pfn)
+ +              total_trim_size += real_trim_memory(range[i].end + 1,
+ +                                                       end_pfn);
+ +
+ +      if (total_trim_size) {
                 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- -                      " all of memory, losing %luMB of RAM.\n",
- -                      (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
+ +                      " all of memory, losing %lluMB of RAM.\n",
+ +                      total_trim_size >> 20);
   
- -              WARN_ON(1);
+ +              if (!changed_by_mtrr_cleanup)
+ +                      WARN_ON(1);
   
                 printk(KERN_INFO "update e820 for mtrr\n");
- -              trim_start = highest_pfn;
- -              trim_start <<= PAGE_SHIFT;
- -              trim_size = end_pfn;
- -              trim_size <<= PAGE_SHIFT;
- -              trim_size -= trim_start;
- -              update_memory_range(trim_start, trim_size, E820_RAM,
- -                                      E820_RESERVED);
                 update_e820();
+ +
                 return 1;
         }
   
@@@ -1578,21 -729,18 +1578,21 @@@
    */
   void __init mtrr_bp_init(void)
   {
+ +      u32 phys_addr;
         init_ifs();
   
+ +      phys_addr = 32;
+ +
         if (cpu_has_mtrr) {
                 mtrr_if = &generic_mtrr_ops;
                 size_or_mask = 0xff000000;      /* 36 bits */
                 size_and_mask = 0x00f00000;
+ +              phys_addr = 36;
   
                 /* This is an AMD specific MSR, but we assume(hope?) that
                    Intel will implement it to when they extend the address
                    bus of the Xeon. */
                 if (cpuid_eax(0x80000000) >= 0x80000008) {
- -                      u32 phys_addr;
                         phys_addr = cpuid_eax(0x80000008) & 0xff;
                         /* CPUID workaround for Intel 0F33/0F34 CPU */
                         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@@ -1610,7 -758,6 +1610,7 @@@
                            don't support PAE */
                         size_or_mask = 0xfff00000;      /* 32 bits */
                         size_and_mask = 0;
+ +                      phys_addr = 32;
                 }
         } else {
                 switch (boot_cpu_data.x86_vendor) {
@@@ -1644,15 -791,8 +1644,15 @@@
         if (mtrr_if) {
                 set_num_var_ranges();
                 init_table();
- -              if (use_intel())
+ +              if (use_intel()) {
                         get_mtrr_state();
+ +
+ +                      if (mtrr_cleanup(phys_addr)) {
+ +                              changed_by_mtrr_cleanup = 1;
+ +                              mtrr_if->set_all();
+ +                      }
+ +
+ +              }
         }
   }
   
@@@ -1682,17 -822,16 +1682,17 @@@ void mtrr_ap_init(void
    */
   void mtrr_save_state(void)
   {
-       smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+       smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
   }
   
   static int __init mtrr_init_finialize(void)
   {
         if (!mtrr_if)
                 return 0;
- -      if (use_intel())
- -              mtrr_state_warn();
- -      else {
+ +      if (use_intel()) {
+ +              if (!changed_by_mtrr_cleanup)
+ +                      mtrr_state_warn();
+ +      } else {
                 /* The CPUs haven't MTRR and seem to not support SMP. They have
                  * specific drivers, we use a tricky method to support
                  * suspend/resume for them.
diff --combined arch/x86/kernel/cpu/perfctr-watchdog.c

index 2e9bef6e3aa3d8ee0f778164725f024b6b7c388f,58043f06d7e22ed2f4ad727c7b6a9e9b5fb902fb..6d4bdc02388a2abf86eed64b2d596138f96225c4
--- 1/arch/x86/kernel/cpu/perfctr-watchdog.c
--- 2/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@@ -1,15 -1,11 +1,15 @@@
- -/* local apic based NMI watchdog for various CPUs.
- -   This file also handles reservation of performance counters for coordination
- -   with other users (like oprofile).
- -
- -   Note that these events normally don't tick when the CPU idles. This means
- -   the frequency varies with CPU load.
- -
- -   Original code for K7/P6 written by Keith Owens */
+ +/*
+ + * local apic based NMI watchdog for various CPUs.
+ + *
+ + * This file also handles reservation of performance counters for coordination
+ + * with other users (like oprofile).
+ + *
+ + * Note that these events normally don't tick when the CPU idles. This means
+ + * the frequency varies with CPU load.
+ + *
+ + * Original code for K7/P6 written by Keith Owens
+ + *
+ + */
   
   #include <linux/percpu.h>
   #include <linux/module.h>
@@@ -40,16 -36,12 +40,16 @@@ struct wd_ops 
   
   static const struct wd_ops *wd_ops;
   
- -/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
- - * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
+ +/*
+ + * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ + * offset from MSR_P4_BSU_ESCR0.
+ + *
+ + * It will be the max for all platforms (for now)
    */
   #define NMI_MAX_COUNTER_BITS 66
   
- -/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ +/*
+ + * perfctr_nmi_owner tracks the ownership of the perfctr registers:
    * evtsel_nmi_owner tracks the ownership of the event selection
    * - different performance counters/ event selection may be reserved for
    *   different subsystems this reservation system just tries to coordinate
@@@ -81,10 -73,8 +81,10 @@@ static inline unsigned int nmi_perfctr_
         return 0;
   }
   
- -/* converts an msr to an appropriate reservation bit */
- -/* returns the bit offset of the event selection register */
+ +/*
+ + * converts an msr to an appropriate reservation bit
+ + * returns the bit offset of the event selection register
+ + */
   static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
   {
         /* returns the bit offset of the event selection register */
@@@ -124,7 -114,6 +124,7 @@@ int avail_to_resrv_perfctr_nmi(unsigne
   
         return (!test_bit(counter, perfctr_nmi_owner));
   }
+ +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
   
   int reserve_perfctr_nmi(unsigned int msr)
   {
@@@ -139,7 -128,6 +139,7 @@@
                 return 1;
         return 0;
   }
+ +EXPORT_SYMBOL(reserve_perfctr_nmi);
   
   void release_perfctr_nmi(unsigned int msr)
   {
@@@ -152,7 -140,6 +152,7 @@@
   
         clear_bit(counter, perfctr_nmi_owner);
   }
+ +EXPORT_SYMBOL(release_perfctr_nmi);
   
   int reserve_evntsel_nmi(unsigned int msr)
   {
@@@ -167,7 -154,6 +167,7 @@@
                 return 1;
         return 0;
   }
+ +EXPORT_SYMBOL(reserve_evntsel_nmi);
   
   void release_evntsel_nmi(unsigned int msr)
   {
@@@ -180,6 -166,11 +180,6 @@@
   
         clear_bit(counter, evntsel_nmi_owner);
   }
- -
- -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
- -EXPORT_SYMBOL(reserve_perfctr_nmi);
- -EXPORT_SYMBOL(release_perfctr_nmi);
- -EXPORT_SYMBOL(reserve_evntsel_nmi);
   EXPORT_SYMBOL(release_evntsel_nmi);
   
   void disable_lapic_nmi_watchdog(void)
@@@ -189,10 -180,8 +189,10 @@@
         if (atomic_read(&nmi_active) <= 0)
                 return;
   
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
- -      wd_ops->unreserve();
+ +
+ +      if (wd_ops)
+ +              wd_ops->unreserve();
   
         BUG_ON(atomic_read(&nmi_active) != 0);
   }
@@@ -213,7 -202,7 +213,7 @@@ void enable_lapic_nmi_watchdog(void
                 return;
         }
   
-       on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+       on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
         touch_nmi_watchdog();
   }
   
@@@ -243,8 -232,8 +243,8 @@@ static unsigned int adjust_for_32bit_ct
         return retval;
   }
   
- -static void
- -write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
+ +static void write_watchdog_counter(unsigned int perfctr_msr,
+ +                              const char *descr, unsigned nmi_hz)
   {
         u64 count = (u64)cpu_khz * 1000;
   
@@@ -255,7 -244,7 +255,7 @@@
   }
   
   static void write_watchdog_counter32(unsigned int perfctr_msr,
- -              const char *descr, unsigned nmi_hz)
+ +                              const char *descr, unsigned nmi_hz)
   {
         u64 count = (u64)cpu_khz * 1000;
   
@@@ -265,10 -254,9 +265,10 @@@
         wrmsr(perfctr_msr, (u32)(-count), 0);
   }
   
- -/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
- -   nicely stable so there is not much variety */
- -
+ +/*
+ + * AMD K7/K8/Family10h/Family11h support.
+ + * AMD keeps this interface nicely stable so there is not much variety
+ + */
   #define K7_EVNTSEL_ENABLE     (1 << 22)
   #define K7_EVNTSEL_INT                (1 << 20)
   #define K7_EVNTSEL_OS         (1 << 17)
@@@ -301,7 -289,7 +301,7 @@@ static int setup_k7_watchdog(unsigned n
   
         wd->perfctr_msr = perfctr_msr;
         wd->evntsel_msr = evntsel_msr;
- -      wd->cccr_msr = 0;  //unused
+ +      wd->cccr_msr = 0;  /* unused */
         return 1;
   }
   
@@@ -337,19 -325,18 +337,19 @@@ static void single_msr_rearm(struct nmi
   }
   
   static const struct wd_ops k7_wd_ops = {
- -      .reserve = single_msr_reserve,
- -      .unreserve = single_msr_unreserve,
- -      .setup = setup_k7_watchdog,
- -      .rearm = single_msr_rearm,
- -      .stop = single_msr_stop_watchdog,
- -      .perfctr = MSR_K7_PERFCTR0,
- -      .evntsel = MSR_K7_EVNTSEL0,
- -      .checkbit = 1ULL<<47,
+ +      .reserve        = single_msr_reserve,
+ +      .unreserve      = single_msr_unreserve,
+ +      .setup          = setup_k7_watchdog,
+ +      .rearm          = single_msr_rearm,
+ +      .stop           = single_msr_stop_watchdog,
+ +      .perfctr        = MSR_K7_PERFCTR0,
+ +      .evntsel        = MSR_K7_EVNTSEL0,
+ +      .checkbit       = 1ULL << 47,
   };
   
- -/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
- -
+ +/*
+ + * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
+ + */
   #define P6_EVNTSEL0_ENABLE    (1 << 22)
   #define P6_EVNTSEL_INT                (1 << 20)
   #define P6_EVNTSEL_OS         (1 << 17)
@@@ -385,58 -372,52 +385,58 @@@ static int setup_p6_watchdog(unsigned n
   
         wd->perfctr_msr = perfctr_msr;
         wd->evntsel_msr = evntsel_msr;
- -      wd->cccr_msr = 0;  //unused
+ +      wd->cccr_msr = 0;  /* unused */
         return 1;
   }
   
   static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
   {
- -      /* P6 based Pentium M need to re-unmask
+ +      /*
+ +       * P6 based Pentium M need to re-unmask
          * the apic vector but it doesn't hurt
          * other P6 variant.
- -       * ArchPerfom/Core Duo also needs this */
+ +       * ArchPerfom/Core Duo also needs this
+ +       */
         apic_write(APIC_LVTPC, APIC_DM_NMI);
+ +
         /* P6/ARCH_PERFMON has 32 bit counter write */
         write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
   }
   
   static const struct wd_ops p6_wd_ops = {
- -      .reserve = single_msr_reserve,
- -      .unreserve = single_msr_unreserve,
- -      .setup = setup_p6_watchdog,
- -      .rearm = p6_rearm,
- -      .stop = single_msr_stop_watchdog,
- -      .perfctr = MSR_P6_PERFCTR0,
- -      .evntsel = MSR_P6_EVNTSEL0,
- -      .checkbit = 1ULL<<39,
+ +      .reserve        = single_msr_reserve,
+ +      .unreserve      = single_msr_unreserve,
+ +      .setup          = setup_p6_watchdog,
+ +      .rearm          = p6_rearm,
+ +      .stop           = single_msr_stop_watchdog,
+ +      .perfctr        = MSR_P6_PERFCTR0,
+ +      .evntsel        = MSR_P6_EVNTSEL0,
+ +      .checkbit       = 1ULL << 39,
   };
   
- -/* Intel P4 performance counters. By far the most complicated of all. */
- -
- -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
- -#define P4_ESCR_EVENT_SELECT(N)       ((N)<<25)
- -#define P4_ESCR_OS            (1<<3)
- -#define P4_ESCR_USR           (1<<2)
- -#define P4_CCCR_OVF_PMI0      (1<<26)
- -#define P4_CCCR_OVF_PMI1      (1<<27)
- -#define P4_CCCR_THRESHOLD(N)  ((N)<<20)
- -#define P4_CCCR_COMPLEMENT    (1<<19)
- -#define P4_CCCR_COMPARE               (1<<18)
- -#define P4_CCCR_REQUIRED      (3<<16)
- -#define P4_CCCR_ESCR_SELECT(N)        ((N)<<13)
- -#define P4_CCCR_ENABLE                (1<<12)
- -#define P4_CCCR_OVF           (1<<31)
- -
- -/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- -   CRU_ESCR0 (with any non-null event selector) through a complemented
- -   max threshold. [IA32-Vol3, Section 14.9.9] */
+ +/*
+ + * Intel P4 performance counters.
+ + * By far the most complicated of all.
+ + */
+ +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
+ +#define P4_ESCR_EVENT_SELECT(N)       ((N) << 25)
+ +#define P4_ESCR_OS            (1 << 3)
+ +#define P4_ESCR_USR           (1 << 2)
+ +#define P4_CCCR_OVF_PMI0      (1 << 26)
+ +#define P4_CCCR_OVF_PMI1      (1 << 27)
+ +#define P4_CCCR_THRESHOLD(N)  ((N) << 20)
+ +#define P4_CCCR_COMPLEMENT    (1 << 19)
+ +#define P4_CCCR_COMPARE               (1 << 18)
+ +#define P4_CCCR_REQUIRED      (3 << 16)
+ +#define P4_CCCR_ESCR_SELECT(N)        ((N) << 13)
+ +#define P4_CCCR_ENABLE                (1 << 12)
+ +#define P4_CCCR_OVF           (1 << 31)
   
+ +/*
+ + * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+ + * CRU_ESCR0 (with any non-null event selector) through a complemented
+ + * max threshold. [IA32-Vol3, Section 14.9.9]
+ + */
   static int setup_p4_watchdog(unsigned nmi_hz)
   {
         unsigned int perfctr_msr, evntsel_msr, cccr_msr;
@@@ -461,8 -442,7 +461,8 @@@
   #endif
                 ht_num = 0;
   
- -      /* performance counters are shared resources
+ +      /*
+ +       * performance counters are shared resources
          * assign each hyperthread its own set
          * (re-use the ESCR0 register, seems safe
          * and keeps the cccr_val the same)
@@@ -560,21 -540,20 +560,21 @@@ static void p4_rearm(struct nmi_watchdo
   }
   
   static const struct wd_ops p4_wd_ops = {
- -      .reserve = p4_reserve,
- -      .unreserve = p4_unreserve,
- -      .setup = setup_p4_watchdog,
- -      .rearm = p4_rearm,
- -      .stop = stop_p4_watchdog,
+ +      .reserve        = p4_reserve,
+ +      .unreserve      = p4_unreserve,
+ +      .setup          = setup_p4_watchdog,
+ +      .rearm          = p4_rearm,
+ +      .stop           = stop_p4_watchdog,
         /* RED-PEN this is wrong for the other sibling */
- -      .perfctr = MSR_P4_BPU_PERFCTR0,
- -      .evntsel = MSR_P4_BSU_ESCR0,
- -      .checkbit = 1ULL<<39,
+ +      .perfctr        = MSR_P4_BPU_PERFCTR0,
+ +      .evntsel        = MSR_P4_BSU_ESCR0,
+ +      .checkbit       = 1ULL << 39,
   };
   
- -/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
- -   all future Intel CPUs. */
- -
+ +/*
+ + * Watchdog using the Intel architected PerfMon.
+ + * Used for Core2 and hopefully all future Intel CPUs.
+ + */
   #define ARCH_PERFMON_NMI_EVENT_SEL    ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
   #define ARCH_PERFMON_NMI_EVENT_UMASK  ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
   
@@@ -620,19 -599,19 +620,19 @@@ static int setup_intel_arch_watchdog(un
   
         wd->perfctr_msr = perfctr_msr;
         wd->evntsel_msr = evntsel_msr;
- -      wd->cccr_msr = 0;  //unused
+ +      wd->cccr_msr = 0;  /* unused */
         intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
         return 1;
   }
   
   static struct wd_ops intel_arch_wd_ops __read_mostly = {
- -      .reserve = single_msr_reserve,
- -      .unreserve = single_msr_unreserve,
- -      .setup = setup_intel_arch_watchdog,
- -      .rearm = p6_rearm,
- -      .stop = single_msr_stop_watchdog,
- -      .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
- -      .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
+ +      .reserve        = single_msr_reserve,
+ +      .unreserve      = single_msr_unreserve,
+ +      .setup          = setup_intel_arch_watchdog,
+ +      .rearm          = p6_rearm,
+ +      .stop           = single_msr_stop_watchdog,
+ +      .perfctr        = MSR_ARCH_PERFMON_PERFCTR1,
+ +      .evntsel        = MSR_ARCH_PERFMON_EVENTSEL1,
   };
   
   static void probe_nmi_watchdog(void)
@@@ -645,10 -624,8 +645,10 @@@
                 wd_ops = &k7_wd_ops;
                 break;
         case X86_VENDOR_INTEL:
- -              /* Work around Core Duo (Yonah) errata AE49 where perfctr1
- -                 doesn't have a working enable bit. */
+ +              /*
+ +               * Work around Core Duo (Yonah) errata AE49 where perfctr1
+ +               * doesn't have a working enable bit.
+ +               */
                 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
                         intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
                         intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
@@@ -659,7 -636,7 +659,7 @@@
                 }
                 switch (boot_cpu_data.x86) {
                 case 6:
- -                      if (boot_cpu_data.x86_model > 0xd)
+ +                      if (boot_cpu_data.x86_model > 13)
                                 return;
   
                         wd_ops = &p6_wd_ops;
@@@ -720,11 -697,10 +720,11 @@@ int lapic_wd_event(unsigned nmi_hz
   {
         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
         u64 ctr;
+ +
         rdmsrl(wd->perfctr_msr, ctr);
- -      if (ctr & wd_ops->checkbit) { /* perfctr still running? */
+ +      if (ctr & wd_ops->checkbit) /* perfctr still running? */
                 return 0;
- -      }
+ +
         wd_ops->rearm(wd, nmi_hz);
         return 1;
   }
diff --combined arch/x86/kernel/cpuid.c

index 71f1c2654bec27096042a66d2e5f1dfe7729a4b5,336dd43c915811943f4cc879d7e8990f9e7f16d3..2de5fa2bbf77061c31d686035e0d48ca266c49cf
--- 1/arch/x86/kernel/cpuid.c
--- 2/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <linux/init.h>
   #include <linux/poll.h>
   #include <linux/smp.h>
+ +#include <linux/smp_lock.h>
   #include <linux/major.h>
   #include <linux/fs.h>
   #include <linux/smp_lock.h>
@@@ -96,7 -95,7 +96,7 @@@ static ssize_t cpuid_read(struct file *
         for (; count; count -= 16) {
                 cmd.eax = pos;
                 cmd.ecx = pos >> 32;
-               smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+               smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
                 if (copy_to_user(tmp, &cmd, 16))
                         return -EFAULT;
                 tmp += 16;
@@@ -108,23 -107,15 +108,23 @@@
   
   static int cpuid_open(struct inode *inode, struct file *file)
   {
- -      unsigned int cpu = iminor(file->f_path.dentry->d_inode);
- -      struct cpuinfo_x86 *c = &cpu_data(cpu);
- -
- -      if (cpu >= NR_CPUS || !cpu_online(cpu))
- -              return -ENXIO;  /* No such CPU */
+ +      unsigned int cpu;
+ +      struct cpuinfo_x86 *c;
+ +      int ret = 0;
+ +      
+ +      lock_kernel();
+ +
+ +      cpu = iminor(file->f_path.dentry->d_inode);
+ +      if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ +              ret = -ENXIO;   /* No such CPU */
+ +              goto out;
+ +      }
+ +      c = &cpu_data(cpu);
         if (c->cpuid_level < 0)
- -              return -EIO;    /* CPUID not supported */
- -
- -      return 0;
+ +              ret = -EIO;     /* CPUID not supported */
+ +out:
+ +      unlock_kernel();
+ +      return ret;
   }
   
   /*
diff --combined arch/x86/kernel/entry_64.S

index ba41bf42748d7657548b8e39191d9496f86a9480,6d1fe270a96dd8656a77a4c9697a611d9d0c49f5..ae63e584c340cbafd342af95e18bce946b474525
--- 1/arch/x86/kernel/entry_64.S
--- 2/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -51,121 -51,16 +51,121 @@@
   #include <asm/page.h>
   #include <asm/irqflags.h>
   #include <asm/paravirt.h>
+ +#include <asm/ftrace.h>
   
         .code64
   
+ +#ifdef CONFIG_FTRACE
+ +#ifdef CONFIG_DYNAMIC_FTRACE
+ +ENTRY(mcount)
+ +
+ +      subq $0x38, %rsp
+ +      movq %rax, (%rsp)
+ +      movq %rcx, 8(%rsp)
+ +      movq %rdx, 16(%rsp)
+ +      movq %rsi, 24(%rsp)
+ +      movq %rdi, 32(%rsp)
+ +      movq %r8, 40(%rsp)
+ +      movq %r9, 48(%rsp)
+ +
+ +      movq 0x38(%rsp), %rdi
+ +      subq $MCOUNT_INSN_SIZE, %rdi
+ +
+ +.globl mcount_call
+ +mcount_call:
+ +      call ftrace_stub
+ +
+ +      movq 48(%rsp), %r9
+ +      movq 40(%rsp), %r8
+ +      movq 32(%rsp), %rdi
+ +      movq 24(%rsp), %rsi
+ +      movq 16(%rsp), %rdx
+ +      movq 8(%rsp), %rcx
+ +      movq (%rsp), %rax
+ +      addq $0x38, %rsp
+ +
+ +      retq
+ +END(mcount)
+ +
+ +ENTRY(ftrace_caller)
+ +
+ +      /* taken from glibc */
+ +      subq $0x38, %rsp
+ +      movq %rax, (%rsp)
+ +      movq %rcx, 8(%rsp)
+ +      movq %rdx, 16(%rsp)
+ +      movq %rsi, 24(%rsp)
+ +      movq %rdi, 32(%rsp)
+ +      movq %r8, 40(%rsp)
+ +      movq %r9, 48(%rsp)
+ +
+ +      movq 0x38(%rsp), %rdi
+ +      movq 8(%rbp), %rsi
+ +      subq $MCOUNT_INSN_SIZE, %rdi
+ +
+ +.globl ftrace_call
+ +ftrace_call:
+ +      call ftrace_stub
+ +
+ +      movq 48(%rsp), %r9
+ +      movq 40(%rsp), %r8
+ +      movq 32(%rsp), %rdi
+ +      movq 24(%rsp), %rsi
+ +      movq 16(%rsp), %rdx
+ +      movq 8(%rsp), %rcx
+ +      movq (%rsp), %rax
+ +      addq $0x38, %rsp
+ +
+ +.globl ftrace_stub
+ +ftrace_stub:
+ +      retq
+ +END(ftrace_caller)
+ +
+ +#else /* ! CONFIG_DYNAMIC_FTRACE */
+ +ENTRY(mcount)
+ +      cmpq $ftrace_stub, ftrace_trace_function
+ +      jnz trace
+ +.globl ftrace_stub
+ +ftrace_stub:
+ +      retq
+ +
+ +trace:
+ +      /* taken from glibc */
+ +      subq $0x38, %rsp
+ +      movq %rax, (%rsp)
+ +      movq %rcx, 8(%rsp)
+ +      movq %rdx, 16(%rsp)
+ +      movq %rsi, 24(%rsp)
+ +      movq %rdi, 32(%rsp)
+ +      movq %r8, 40(%rsp)
+ +      movq %r9, 48(%rsp)
+ +
+ +      movq 0x38(%rsp), %rdi
+ +      movq 8(%rbp), %rsi
+ +      subq $MCOUNT_INSN_SIZE, %rdi
+ +
+ +      call   *ftrace_trace_function
+ +
+ +      movq 48(%rsp), %r9
+ +      movq 40(%rsp), %r8
+ +      movq 32(%rsp), %rdi
+ +      movq 24(%rsp), %rsi
+ +      movq 16(%rsp), %rdx
+ +      movq 8(%rsp), %rcx
+ +      movq (%rsp), %rax
+ +      addq $0x38, %rsp
+ +
+ +      jmp ftrace_stub
+ +END(mcount)
+ +#endif /* CONFIG_DYNAMIC_FTRACE */
+ +#endif /* CONFIG_FTRACE */
+ +
   #ifndef CONFIG_PREEMPT
   #define retint_kernel retint_restore_args
   #endif        
   
   #ifdef CONFIG_PARAVIRT
- -ENTRY(native_irq_enable_syscall_ret)
- -      movq    %gs:pda_oldrsp,%rsp
+ +ENTRY(native_usergs_sysret64)
         swapgs
         sysretq
   #endif /* CONFIG_PARAVIRT */
@@@ -209,7 -104,7 +209,7 @@@
         .macro FAKE_STACK_FRAME child_rip
         /* push in order ss, rsp, eflags, cs, rip */
         xorl %eax, %eax
- -      pushq %rax /* ss */
+ +      pushq $__KERNEL_DS /* ss */
         CFI_ADJUST_CFA_OFFSET   8
         /*CFI_REL_OFFSET        ss,0*/
         pushq %rax /* rsp */
@@@ -274,13 -169,13 +274,13 @@@ ENTRY(ret_from_fork
         CFI_ADJUST_CFA_OFFSET -4
         call schedule_tail
         GET_THREAD_INFO(%rcx)
- -      testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+ +      testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
         jnz rff_trace
   rff_action:   
         RESTORE_REST
         testl $3,CS-ARGOFFSET(%rsp)     # from kernel_thread?
         je   int_ret_from_sys_call
- -      testl $_TIF_IA32,threadinfo_flags(%rcx)
+ +      testl $_TIF_IA32,TI_flags(%rcx)
         jnz  int_ret_from_sys_call
         RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
         jmp ret_from_sys_call
@@@ -349,8 -244,7 +349,8 @@@ ENTRY(system_call_after_swapgs
         movq  %rcx,RIP-ARGOFFSET(%rsp)
         CFI_REL_OFFSET rip,RIP-ARGOFFSET
         GET_THREAD_INFO(%rcx)
- -      testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+ +      testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
+ +              TI_flags(%rcx)
         jnz tracesys
         cmpq $__NR_syscall_max,%rax
         ja badsys
@@@ -369,7 -263,7 +369,7 @@@ sysret_check
         GET_THREAD_INFO(%rcx)
         DISABLE_INTERRUPTS(CLBR_NONE)
         TRACE_IRQS_OFF
- -      movl threadinfo_flags(%rcx),%edx
+ +      movl TI_flags(%rcx),%edx
         andl %edi,%edx
         jnz  sysret_careful 
         CFI_REMEMBER_STATE
@@@ -381,8 -275,7 +381,8 @@@
         CFI_REGISTER    rip,rcx
         RESTORE_ARGS 0,-ARG_SKIP,1
         /*CFI_REGISTER  rflags,r11*/
- -      ENABLE_INTERRUPTS_SYSCALL_RET
+ +      movq    %gs:pda_oldrsp, %rsp
+ +      USERGS_SYSRET64
   
         CFI_RESTORE_STATE
         /* Handle reschedules */
@@@ -412,7 -305,7 +412,7 @@@ sysret_signal
         leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
         xorl %esi,%esi # oldset -> arg2
         call ptregscall_common
- -1:    movl $_TIF_NEED_RESCHED,%edi
+ +1:    movl $_TIF_WORK_MASK,%edi
         /* Use IRET because user could have changed frame. This
            works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
         DISABLE_INTERRUPTS(CLBR_NONE)
@@@ -454,10 -347,10 +454,10 @@@ int_ret_from_sys_call
   int_with_check:
         LOCKDEP_SYS_EXIT_IRQ
         GET_THREAD_INFO(%rcx)
- -      movl threadinfo_flags(%rcx),%edx
+ +      movl TI_flags(%rcx),%edx
         andl %edi,%edx
         jnz   int_careful
- -      andl    $~TS_COMPAT,threadinfo_status(%rcx)
+ +      andl    $~TS_COMPAT,TI_status(%rcx)
         jmp   retint_swapgs
   
         /* Either reschedule or signal or syscall exit tracking needed. */
@@@ -500,7 -393,7 +500,7 @@@ int_signal
         movq %rsp,%rdi          # &ptregs -> arg1
         xorl %esi,%esi          # oldset -> arg2
         call do_notify_resume
- -1:    movl $_TIF_NEED_RESCHED,%edi    
+ +1:    movl $_TIF_WORK_MASK,%edi
   int_restore_rest:
         RESTORE_REST
         DISABLE_INTERRUPTS(CLBR_NONE)
@@@ -527,6 -420,7 +527,6 @@@ END(\label
         PTREGSCALL stub_clone, sys_clone, %r8
         PTREGSCALL stub_fork, sys_fork, %rdi
         PTREGSCALL stub_vfork, sys_vfork, %rdi
- -      PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
         PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
         PTREGSCALL stub_iopl, sys_iopl, %rsi
   
@@@ -665,7 -559,7 +665,7 @@@ retint_with_reschedule
         movl $_TIF_WORK_MASK,%edi
   retint_check:
         LOCKDEP_SYS_EXIT_IRQ
- -      movl threadinfo_flags(%rcx),%edx
+ +      movl TI_flags(%rcx),%edx
         andl %edi,%edx
         CFI_REMEMBER_STATE
         jnz  retint_careful
@@@ -753,16 -647,17 +753,16 @@@ retint_signal
         RESTORE_REST
         DISABLE_INTERRUPTS(CLBR_NONE)
         TRACE_IRQS_OFF
- -      movl $_TIF_NEED_RESCHED,%edi
         GET_THREAD_INFO(%rcx)
- -      jmp retint_check
+ +      jmp retint_with_reschedule
   
   #ifdef CONFIG_PREEMPT
         /* Returning to kernel space. Check if we need preemption */
         /* rcx:  threadinfo. interrupts off. */
   ENTRY(retint_kernel)
- -      cmpl $0,threadinfo_preempt_count(%rcx)
+ +      cmpl $0,TI_preempt_count(%rcx)
         jnz  retint_restore_args
- -      bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+ +      bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
         jnc  retint_restore_args
         bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
         jnc  retint_restore_args
@@@ -816,6 -711,9 +816,9 @@@ END(invalidate_interrupt\num
   ENTRY(call_function_interrupt)
         apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
   END(call_function_interrupt)
+ ENTRY(call_function_single_interrupt)
+       apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+ END(call_function_single_interrupt)
   ENTRY(irq_move_cleanup_interrupt)
         apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
   END(irq_move_cleanup_interrupt)
@@@ -825,10 -723,6 +828,10 @@@ ENTRY(apic_timer_interrupt
         apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
   END(apic_timer_interrupt)
   
+ +ENTRY(uv_bau_message_intr1)
+ +      apicinterrupt 220,uv_bau_message_interrupt
+ +END(uv_bau_message_intr1)
+ +
   ENTRY(error_interrupt)
         apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
   END(error_interrupt)
@@@ -842,7 -736,6 +845,7 @@@ END(spurious_interrupt
    */           
         .macro zeroentry sym
         INTR_FRAME
+ +      PARAVIRT_ADJUST_EXCEPTION_FRAME
         pushq $0        /* push error code/oldrax */ 
         CFI_ADJUST_CFA_OFFSET 8
         pushq %rax      /* push real oldrax to the rdi slot */ 
@@@ -855,7 -748,6 +858,7 @@@
   
         .macro errorentry sym
         XCPT_FRAME
+ +      PARAVIRT_ADJUST_EXCEPTION_FRAME
         pushq %rax
         CFI_ADJUST_CFA_OFFSET 8
         CFI_REL_OFFSET rax,0
@@@ -925,7 -817,7 +928,7 @@@ paranoid_restore\trace
         jmp irq_return
   paranoid_userspace\trace:
         GET_THREAD_INFO(%rcx)
- -      movl threadinfo_flags(%rcx),%ebx
+ +      movl TI_flags(%rcx),%ebx
         andl $_TIF_WORK_MASK,%ebx
         jz paranoid_swapgs\trace
         movq %rsp,%rdi                  /* &pt_regs */
@@@ -1023,7 -915,7 +1026,7 @@@ error_exit
         testl %eax,%eax
         jne  retint_kernel
         LOCKDEP_SYS_EXIT_IRQ
- -      movl  threadinfo_flags(%rcx),%edx
+ +      movl  TI_flags(%rcx),%edx
         movl  $_TIF_WORK_MASK,%edi
         andl  %edi,%edx
         jnz  retint_careful
@@@ -1037,11 -929,11 +1040,11 @@@ error_kernelspace
            iret run with kernel gs again, so don't set the user space flag.
            B stepping K8s sometimes report an truncated RIP for IRET 
            exceptions returning to compat mode. Check for these here too. */
- -      leaq irq_return(%rip),%rbp
- -      cmpq %rbp,RIP(%rsp) 
+ +      leaq irq_return(%rip),%rcx
+ +      cmpq %rcx,RIP(%rsp)
         je   error_swapgs
- -      movl %ebp,%ebp  /* zero extend */
- -      cmpq %rbp,RIP(%rsp) 
+ +      movl %ecx,%ecx  /* zero extend */
+ +      cmpq %rcx,RIP(%rsp)
         je   error_swapgs
         cmpq $gs_change,RIP(%rsp)
           je   error_swapgs
@@@ -1050,7 -942,7 +1053,7 @@@ KPROBE_END(error_entry
         
          /* Reload gs selector with exception handling */
          /* edi:  new selector */ 
- -ENTRY(load_gs_index)
+ +ENTRY(native_load_gs_index)
         CFI_STARTPROC
         pushf
         CFI_ADJUST_CFA_OFFSET 8
@@@ -1064,7 -956,7 +1067,7 @@@ gs_change
         CFI_ADJUST_CFA_OFFSET -8
           ret
         CFI_ENDPROC
- -ENDPROC(load_gs_index)
+ +ENDPROC(native_load_gs_index)
          
           .section __ex_table,"a"
           .align 8
@@@ -1231,6 -1123,10 +1234,6 @@@ ENTRY(coprocessor_segment_overrun
         zeroentry do_coprocessor_segment_overrun
   END(coprocessor_segment_overrun)
   
- -ENTRY(reserved)
- -      zeroentry do_reserved
- -END(reserved)
- -
         /* runs on exception stack */
   ENTRY(double_fault)
         XCPT_FRAME
diff --combined arch/x86/kernel/io_apic_32.c

index 603261a5885cf2a1e1f55be2845197641e1a2d61,720640ff36ca0676614b659aa9176ea67fc6676f..558abf4c796afa0d7dd7ad2622e3bd42f28e8d39
--- 1/arch/x86/kernel/io_apic_32.c
--- 2/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@@ -25,7 -25,6 +25,7 @@@
   #include <linux/init.h>
   #include <linux/delay.h>
   #include <linux/sched.h>
+ +#include <linux/bootmem.h>
   #include <linux/mc146818rtc.h>
   #include <linux/compiler.h>
   #include <linux/acpi.h>
@@@ -59,7 -58,7 +59,7 @@@ static struct { int pin, apic; } ioapic
   static DEFINE_SPINLOCK(ioapic_lock);
   static DEFINE_SPINLOCK(vector_lock);
   
- -int timer_over_8254 __initdata = 1;
+ +int timer_through_8259 __initdata;
   
   /*
    *    Is the SiS APIC rmw bug present ?
@@@ -73,21 -72,15 +73,21 @@@ int sis_apic_bug = -1
   int nr_ioapic_registers[MAX_IO_APICS];
   
   /* I/O APIC entries */
- -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+ +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
   int nr_ioapics;
   
   /* MP IRQ source entries */
- -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
   
   /* # of MP IRQ source entries */
   int mp_irq_entries;
   
+ +#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+ +int mp_bus_id_to_type[MAX_MP_BUSSES];
+ +#endif
+ +
+ +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+ +
   static int disable_timer_pin_1 __initdata;
   
   /*
@@@ -117,7 -110,7 +117,7 @@@ struct io_apic 
   static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
   {
         return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
- -              + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+ +              + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
   }
   
   static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@@ -246,7 -239,7 +246,7 @@@ static void __init replace_pin_at_irq(u
         }
   }
   
- -static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
+ +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
   {
         struct irq_pin_list *entry = irq_2_pin + irq;
         unsigned int pin, reg;
@@@ -266,32 -259,30 +266,32 @@@
   }
   
   /* mask = 1 */
- -static void __mask_IO_APIC_irq (unsigned int irq)
+ +static void __mask_IO_APIC_irq(unsigned int irq)
   {
- -      __modify_IO_APIC_irq(irq, 0x00010000, 0);
+ +      __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
   }
   
   /* mask = 0 */
- -static void __unmask_IO_APIC_irq (unsigned int irq)
+ +static void __unmask_IO_APIC_irq(unsigned int irq)
   {
- -      __modify_IO_APIC_irq(irq, 0, 0x00010000);
+ +      __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
   }
   
   /* mask = 1, trigger = 0 */
- -static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+ +static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
   {
- -      __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+ +      __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
+ +                              IO_APIC_REDIR_LEVEL_TRIGGER);
   }
   
   /* mask = 0, trigger = 1 */
- -static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+ +static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
   {
- -      __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
+ +      __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
+ +                              IO_APIC_REDIR_MASKED);
   }
   
- -static void mask_IO_APIC_irq (unsigned int irq)
+ +static void mask_IO_APIC_irq(unsigned int irq)
   {
         unsigned long flags;
   
@@@ -300,7 -291,7 +300,7 @@@
         spin_unlock_irqrestore(&ioapic_lock, flags);
   }
   
- -static void unmask_IO_APIC_irq (unsigned int irq)
+ +static void unmask_IO_APIC_irq(unsigned int irq)
   {
         unsigned long flags;
   
@@@ -312,7 -303,7 +312,7 @@@
   static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
   {
         struct IO_APIC_route_entry entry;
- -      
+ +
         /* Check delivery_mode to be sure we're not clearing an SMI pin */
         entry = ioapic_read_entry(apic, pin);
         if (entry.delivery_mode == dest_SMI)
@@@ -324,7 -315,7 +324,7 @@@
         ioapic_mask_entry(apic, pin);
   }
   
- -static void clear_IO_APIC (void)
+ +static void clear_IO_APIC(void)
   {
         int apic, pin;
   
@@@ -341,7 -332,7 +341,7 @@@ static void set_ioapic_affinity_irq(uns
         struct irq_pin_list *entry = irq_2_pin + irq;
         unsigned int apicid_value;
         cpumask_t tmp;
- -      
+ +
         cpus_and(tmp, cpumask, cpu_online_map);
         if (cpus_empty(tmp))
                 tmp = TARGET_CPUS;
@@@ -370,7 -361,7 +370,7 @@@
   # include <linux/kernel_stat.h>       /* kstat */
   # include <linux/slab.h>              /* kmalloc() */
   # include <linux/timer.h>
- - 
+ +
   #define IRQBALANCE_CHECK_ARCH -999
   #define MAX_BALANCED_IRQ_INTERVAL     (5*HZ)
   #define MIN_BALANCED_IRQ_INTERVAL     (HZ/2)
@@@ -382,14 -373,14 +382,14 @@@ static int physical_balance __read_most
   static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
   
   static struct irq_cpu_info {
- -      unsigned long * last_irq;
- -      unsigned long * irq_delta;
+ +      unsigned long *last_irq;
+ +      unsigned long *irq_delta;
         unsigned long irq;
   } irq_cpu_data[NR_CPUS];
   
   #define CPU_IRQ(cpu)          (irq_cpu_data[cpu].irq)
- -#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
- -#define IRQ_DELTA(cpu,irq)    (irq_cpu_data[cpu].irq_delta[irq])
+ +#define LAST_CPU_IRQ(cpu, irq)   (irq_cpu_data[cpu].last_irq[irq])
+ +#define IRQ_DELTA(cpu, irq)   (irq_cpu_data[cpu].irq_delta[irq])
   
   #define IDLE_ENOUGH(cpu,now) \
         (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
@@@ -428,8 -419,8 +428,8 @@@ inside
                         if (cpu == -1)
                                 cpu = NR_CPUS-1;
                 }
- -      } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
- -                      (search_idle && !IDLE_ENOUGH(cpu,now)));
+ +      } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
+ +                      (search_idle && !IDLE_ENOUGH(cpu, now)));
   
         return cpu;
   }
@@@ -439,14 -430,15 +439,14 @@@ static inline void balance_irq(int cpu
         unsigned long now = jiffies;
         cpumask_t allowed_mask;
         unsigned int new_cpu;
- -              
+ +
         if (irqbalance_disabled)
- -              return; 
+ +              return;
   
         cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
         new_cpu = move(cpu, allowed_mask, now, 1);
- -      if (cpu != new_cpu) {
+ +      if (cpu != new_cpu)
                 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
- -      }
   }
   
   static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
@@@ -458,14 -450,14 +458,14 @@@
                         if (!irq_desc[j].action)
                                 continue;
                         /* Is it a significant load ?  */
- -                      if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+ +                      if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
                                                 useful_load_threshold)
                                 continue;
                         balance_irq(i, j);
                 }
         }
         balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- -              balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+ +              balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
         return;
   }
   
@@@ -494,22 -486,22 +494,22 @@@ static void do_irq_balance(void
                         /* Is this an active IRQ or balancing disabled ? */
                         if (!irq_desc[j].action || irq_balancing_disabled(j))
                                 continue;
- -                      if ( package_index == i )
- -                              IRQ_DELTA(package_index,j) = 0;
+ +                      if (package_index == i)
+ +                              IRQ_DELTA(package_index, j) = 0;
                         /* Determine the total count per processor per IRQ */
                         value_now = (unsigned long) kstat_cpu(i).irqs[j];
   
                         /* Determine the activity per processor per IRQ */
- -                      delta = value_now - LAST_CPU_IRQ(i,j);
+ +                      delta = value_now - LAST_CPU_IRQ(i, j);
   
                         /* Update last_cpu_irq[][] for the next time */
- -                      LAST_CPU_IRQ(i,j) = value_now;
+ +                      LAST_CPU_IRQ(i, j) = value_now;
   
                         /* Ignore IRQs whose rate is less than the clock */
                         if (delta < useful_load_threshold)
                                 continue;
                         /* update the load for the processor or package total */
- -                      IRQ_DELTA(package_index,j) += delta;
+ +                      IRQ_DELTA(package_index, j) += delta;
   
                         /* Keep track of the higher numbered sibling as well */
                         if (i != package_index)
@@@ -535,8 -527,7 +535,8 @@@
         max_cpu_irq = ULONG_MAX;
   
   tryanothercpu:
- -      /* Look for heaviest loaded processor.
+ +      /*
+ +       * Look for heaviest loaded processor.
          * We may come back to get the next heaviest loaded processor.
          * Skip processors with trivial loads.
          */
@@@ -545,7 -536,7 +545,7 @@@
         for_each_online_cpu(i) {
                 if (i != CPU_TO_PACKAGEINDEX(i))
                         continue;
- -              if (max_cpu_irq <= CPU_IRQ(i)) 
+ +              if (max_cpu_irq <= CPU_IRQ(i))
                         continue;
                 if (tmp_cpu_irq < CPU_IRQ(i)) {
                         tmp_cpu_irq = CPU_IRQ(i);
@@@ -554,9 -545,8 +554,9 @@@
         }
   
         if (tmp_loaded == -1) {
- -       /* In the case of small number of heavy interrupt sources, 
- -        * loading some of the cpus too much. We use Ingo's original 
+ +       /*
+ +        * In the case of small number of heavy interrupt sources,
+ +        * loading some of the cpus too much. We use Ingo's original
           * approach to rotate them around.
           */
                 if (!first_attempt && imbalance >= useful_load_threshold) {
@@@ -565,14 -555,13 +565,14 @@@
                 }
                 goto not_worth_the_effort;
         }
- -      
+ +
         first_attempt = 0;              /* heaviest search */
         max_cpu_irq = tmp_cpu_irq;      /* load */
         max_loaded = tmp_loaded;        /* processor */
         imbalance = (max_cpu_irq - min_cpu_irq) / 2;
- -      
- -      /* if imbalance is less than approx 10% of max load, then
+ +
+ +      /*
+ +       * if imbalance is less than approx 10% of max load, then
          * observe diminishing returns action. - quit
          */
         if (imbalance < (max_cpu_irq >> 3))
@@@ -588,25 -577,26 +588,25 @@@ tryanotherirq
                 /* Is this an active IRQ? */
                 if (!irq_desc[j].action)
                         continue;
- -              if (imbalance <= IRQ_DELTA(max_loaded,j))
+ +              if (imbalance <= IRQ_DELTA(max_loaded, j))
                         continue;
                 /* Try to find the IRQ that is closest to the imbalance
                  * without going over.
                  */
- -              if (move_this_load < IRQ_DELTA(max_loaded,j)) {
- -                      move_this_load = IRQ_DELTA(max_loaded,j);
+ +              if (move_this_load < IRQ_DELTA(max_loaded, j)) {
+ +                      move_this_load = IRQ_DELTA(max_loaded, j);
                         selected_irq = j;
                 }
         }
- -      if (selected_irq == -1) {
+ +      if (selected_irq == -1)
                 goto tryanothercpu;
- -      }
   
         imbalance = move_this_load;
- -      
+ +
         /* For physical_balance case, we accumulated both load
          * values in the one of the siblings cpu_irq[],
          * to use the same code for physical and logical processors
- -       * as much as possible. 
+ +       * as much as possible.
          *
          * NOTE: the cpu_irq[] array holds the sum of the load for
          * sibling A and sibling B in the slot for the lowest numbered
@@@ -635,11 -625,11 +635,11 @@@
                 /* mark for change destination */
                 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
   
- -              /* Since we made a change, come back sooner to 
+ +              /* Since we made a change, come back sooner to
                  * check for more variation.
                  */
                 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- -                      balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+ +                      balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
                 return;
         }
         goto tryanotherirq;
@@@ -650,7 -640,7 +650,7 @@@ not_worth_the_effort
          * upward
          */
         balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
- -              balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);       
+ +              balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
         return;
   }
   
@@@ -689,13 -679,13 +689,13 @@@ static int __init balanced_irq_init(voi
         cpumask_t tmp;
   
         cpus_shift_right(tmp, cpu_online_map, 2);
- -        c = &boot_cpu_data;
+ +      c = &boot_cpu_data;
         /* When not overwritten by the command line ask subarchitecture. */
         if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
                 irqbalance_disabled = NO_BALANCE_IRQ;
         if (irqbalance_disabled)
                 return 0;
- -      
+ +
          /* disable irqbalance completely if there is only one processor online */
         if (num_online_cpus() < 2) {
                 irqbalance_disabled = 1;
@@@ -709,14 -699,16 +709,14 @@@
                 physical_balance = 1;
   
         for_each_online_cpu(i) {
- -              irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- -              irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+ +              irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+ +              irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
                 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
                         printk(KERN_ERR "balanced_irq_init: out of memory");
                         goto failed;
                 }
- -              memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
- -              memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
         }
- -      
+ +
         printk(KERN_INFO "Starting balanced_irq\n");
         if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
                 return 0;
@@@ -809,10 -801,10 +809,10 @@@ static int find_irq_entry(int apic, in
         int i;
   
         for (i = 0; i < mp_irq_entries; i++)
- -              if (mp_irqs[i].mpc_irqtype == type &&
- -                  (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- -                   mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- -                  mp_irqs[i].mpc_dstirq == pin)
+ +              if (mp_irqs[i].mp_irqtype == type &&
+ +                  (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+ +                   mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+ +                  mp_irqs[i].mp_dstirq == pin)
                         return i;
   
         return -1;
@@@ -826,13 -818,13 +826,13 @@@ static int __init find_isa_irq_pin(int 
         int i;
   
         for (i = 0; i < mp_irq_entries; i++) {
- -              int lbus = mp_irqs[i].mpc_srcbus;
+ +              int lbus = mp_irqs[i].mp_srcbus;
   
                 if (test_bit(lbus, mp_bus_not_pci) &&
- -                  (mp_irqs[i].mpc_irqtype == type) &&
- -                  (mp_irqs[i].mpc_srcbusirq == irq))
+ +                  (mp_irqs[i].mp_irqtype == type) &&
+ +                  (mp_irqs[i].mp_srcbusirq == irq))
   
- -                      return mp_irqs[i].mpc_dstirq;
+ +                      return mp_irqs[i].mp_dstirq;
         }
         return -1;
   }
@@@ -842,17 -834,17 +842,17 @@@ static int __init find_isa_irq_apic(in
         int i;
   
         for (i = 0; i < mp_irq_entries; i++) {
- -              int lbus = mp_irqs[i].mpc_srcbus;
+ +              int lbus = mp_irqs[i].mp_srcbus;
   
                 if (test_bit(lbus, mp_bus_not_pci) &&
- -                  (mp_irqs[i].mpc_irqtype == type) &&
- -                  (mp_irqs[i].mpc_srcbusirq == irq))
+ +                  (mp_irqs[i].mp_irqtype == type) &&
+ +                  (mp_irqs[i].mp_srcbusirq == irq))
                         break;
         }
         if (i < mp_irq_entries) {
                 int apic;
- -              for(apic = 0; apic < nr_ioapics; apic++) {
- -                      if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+ +              for (apic = 0; apic < nr_ioapics; apic++) {
+ +                      if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
                                 return apic;
                 }
         }
@@@ -872,28 -864,28 +872,28 @@@ int IO_APIC_get_PCI_irq_vector(int bus
   
         apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
                 "slot:%d, pin:%d.\n", bus, slot, pin);
- -      if (mp_bus_id_to_pci_bus[bus] == -1) {
+ +      if (test_bit(bus, mp_bus_not_pci)) {
                 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                 return -1;
         }
         for (i = 0; i < mp_irq_entries; i++) {
- -              int lbus = mp_irqs[i].mpc_srcbus;
+ +              int lbus = mp_irqs[i].mp_srcbus;
   
                 for (apic = 0; apic < nr_ioapics; apic++)
- -                      if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- -                          mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ +                      if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+ +                          mp_irqs[i].mp_dstapic == MP_APIC_ALL)
                                 break;
   
                 if (!test_bit(lbus, mp_bus_not_pci) &&
- -                  !mp_irqs[i].mpc_irqtype &&
+ +                  !mp_irqs[i].mp_irqtype &&
                     (bus == lbus) &&
- -                  (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- -                      int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+ +                  (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+ +                      int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
   
                         if (!(apic || IO_APIC_IRQ(irq)))
                                 continue;
   
- -                      if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ +                      if (pin == (mp_irqs[i].mp_srcbusirq & 3))
                                 return irq;
                         /*
                          * Use the first all-but-pin matching entry as a
@@@ -908,7 -900,7 +908,7 @@@
   EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
   
   /*
- - * This function currently is only a helper for the i386 smp boot process where 
+ + * This function currently is only a helper for the i386 smp boot process where
    * we need to reprogram the ioredtbls to cater for the cpus which have come online
    * so mask in all cases should simply be TARGET_CPUS
    */
@@@ -960,7 -952,7 +960,7 @@@ static int EISA_ELCR(unsigned int irq
    * EISA conforming in the MP table, that means its trigger type must
    * be read in from the ELCR */
   
- -#define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+ +#define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
   #define default_EISA_polarity(idx)    default_ISA_polarity(idx)
   
   /* PCI interrupts are always polarity one level triggered,
@@@ -977,115 -969,118 +977,115 @@@
   
   static int MPBIOS_polarity(int idx)
   {
- -      int bus = mp_irqs[idx].mpc_srcbus;
+ +      int bus = mp_irqs[idx].mp_srcbus;
         int polarity;
   
         /*
          * Determine IRQ line polarity (high active or low active):
          */
- -      switch (mp_irqs[idx].mpc_irqflag & 3)
+ +      switch (mp_irqs[idx].mp_irqflag & 3) {
+ +      case 0: /* conforms, ie. bus-type dependent polarity */
         {
- -              case 0: /* conforms, ie. bus-type dependent polarity */
- -              {
- -                      polarity = test_bit(bus, mp_bus_not_pci)?
- -                              default_ISA_polarity(idx):
- -                              default_PCI_polarity(idx);
- -                      break;
- -              }
- -              case 1: /* high active */
- -              {
- -                      polarity = 0;
- -                      break;
- -              }
- -              case 2: /* reserved */
- -              {
- -                      printk(KERN_WARNING "broken BIOS!!\n");
- -                      polarity = 1;
- -                      break;
- -              }
- -              case 3: /* low active */
- -              {
- -                      polarity = 1;
- -                      break;
- -              }
- -              default: /* invalid */
- -              {
- -                      printk(KERN_WARNING "broken BIOS!!\n");
- -                      polarity = 1;
- -                      break;
- -              }
+ +              polarity = test_bit(bus, mp_bus_not_pci)?
+ +                      default_ISA_polarity(idx):
+ +                      default_PCI_polarity(idx);
+ +              break;
+ +      }
+ +      case 1: /* high active */
+ +      {
+ +              polarity = 0;
+ +              break;
+ +      }
+ +      case 2: /* reserved */
+ +      {
+ +              printk(KERN_WARNING "broken BIOS!!\n");
+ +              polarity = 1;
+ +              break;
+ +      }
+ +      case 3: /* low active */
+ +      {
+ +              polarity = 1;
+ +              break;
+ +      }
+ +      default: /* invalid */
+ +      {
+ +              printk(KERN_WARNING "broken BIOS!!\n");
+ +              polarity = 1;
+ +              break;
+ +      }
         }
         return polarity;
   }
   
   static int MPBIOS_trigger(int idx)
   {
- -      int bus = mp_irqs[idx].mpc_srcbus;
+ +      int bus = mp_irqs[idx].mp_srcbus;
         int trigger;
   
         /*
          * Determine IRQ trigger mode (edge or level sensitive):
          */
- -      switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ +      switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
+ +      case 0: /* conforms, ie. bus-type dependent */
         {
- -              case 0: /* conforms, ie. bus-type dependent */
- -              {
- -                      trigger = test_bit(bus, mp_bus_not_pci)?
- -                                      default_ISA_trigger(idx):
- -                                      default_PCI_trigger(idx);
+ +              trigger = test_bit(bus, mp_bus_not_pci)?
+ +                              default_ISA_trigger(idx):
+ +                              default_PCI_trigger(idx);
   #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
- -                      switch (mp_bus_id_to_type[bus])
- -                      {
- -                              case MP_BUS_ISA: /* ISA pin */
- -                              {
- -                                      /* set before the switch */
- -                                      break;
- -                              }
- -                              case MP_BUS_EISA: /* EISA pin */
- -                              {
- -                                      trigger = default_EISA_trigger(idx);
- -                                      break;
- -                              }
- -                              case MP_BUS_PCI: /* PCI pin */
- -                              {
- -                                      /* set before the switch */
- -                                      break;
- -                              }
- -                              case MP_BUS_MCA: /* MCA pin */
- -                              {
- -                                      trigger = default_MCA_trigger(idx);
- -                                      break;
- -                              }
- -                              default:
- -                              {
- -                                      printk(KERN_WARNING "broken BIOS!!\n");
- -                                      trigger = 1;
- -                                      break;
- -                              }
- -                      }
- -#endif
+ +              switch (mp_bus_id_to_type[bus]) {
+ +              case MP_BUS_ISA: /* ISA pin */
+ +              {
+ +                      /* set before the switch */
                         break;
                 }
- -              case 1: /* edge */
+ +              case MP_BUS_EISA: /* EISA pin */
                 {
- -                      trigger = 0;
+ +                      trigger = default_EISA_trigger(idx);
                         break;
                 }
- -              case 2: /* reserved */
+ +              case MP_BUS_PCI: /* PCI pin */
                 {
- -                      printk(KERN_WARNING "broken BIOS!!\n");
- -                      trigger = 1;
+ +                      /* set before the switch */
                         break;
                 }
- -              case 3: /* level */
+ +              case MP_BUS_MCA: /* MCA pin */
                 {
- -                      trigger = 1;
+ +                      trigger = default_MCA_trigger(idx);
                         break;
                 }
- -              default: /* invalid */
+ +              default:
                 {
                         printk(KERN_WARNING "broken BIOS!!\n");
- -                      trigger = 0;
+ +                      trigger = 1;
                         break;
                 }
         }
+ +#endif
+ +              break;
+ +      }
+ +      case 1: /* edge */
+ +      {
+ +              trigger = 0;
+ +              break;
+ +      }
+ +      case 2: /* reserved */
+ +      {
+ +              printk(KERN_WARNING "broken BIOS!!\n");
+ +              trigger = 1;
+ +              break;
+ +      }
+ +      case 3: /* level */
+ +      {
+ +              trigger = 1;
+ +              break;
+ +      }
+ +      default: /* invalid */
+ +      {
+ +              printk(KERN_WARNING "broken BIOS!!\n");
+ +              trigger = 0;
+ +              break;
+ +      }
+ +      }
         return trigger;
   }
   
@@@ -1102,16 -1097,16 +1102,16 @@@ static inline int irq_trigger(int idx
   static int pin_2_irq(int idx, int apic, int pin)
   {
         int irq, i;
- -      int bus = mp_irqs[idx].mpc_srcbus;
+ +      int bus = mp_irqs[idx].mp_srcbus;
   
         /*
          * Debugging check, we are in big trouble if this message pops up!
          */
- -      if (mp_irqs[idx].mpc_dstirq != pin)
+ +      if (mp_irqs[idx].mp_dstirq != pin)
                 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
   
         if (test_bit(bus, mp_bus_not_pci))
- -              irq = mp_irqs[idx].mpc_srcbusirq;
+ +              irq = mp_irqs[idx].mp_srcbusirq;
         else {
                 /*
                  * PCI IRQs are mapped in order
@@@ -1153,8 -1148,8 +1153,8 @@@ static inline int IO_APIC_irq_trigger(i
   
         for (apic = 0; apic < nr_ioapics; apic++) {
                 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- -                      idx = find_irq_entry(apic,pin,mp_INT);
- -                      if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ +                      idx = find_irq_entry(apic, pin, mp_INT);
+ +                      if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
                                 return irq_trigger(idx);
                 }
         }
@@@ -1169,7 -1164,7 +1169,7 @@@ static u8 irq_vector[NR_IRQ_VECTORS] __
   
   static int __assign_irq_vector(int irq)
   {
- -      static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+ +      static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
         int vector, offset;
   
         BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
@@@ -1181,7 -1176,7 +1181,7 @@@
         offset = current_offset;
   next:
         vector += 8;
- -      if (vector >= FIRST_SYSTEM_VECTOR) {
+ +      if (vector >= first_system_vector) {
                 offset = (offset + 1) % 8;
                 vector = FIRST_DEVICE_VECTOR + offset;
         }
@@@ -1208,11 -1203,6 +1208,11 @@@ static int assign_irq_vector(int irq
   
         return vector;
   }
+ +
+ +void setup_vector_irq(int cpu)
+ +{
+ +}
+ +
   static struct irq_chip ioapic_chip;
   
   #define IOAPIC_AUTO   -1
@@@ -1247,25 -1237,25 +1247,25 @@@ static void __init setup_IO_APIC_irqs(v
                 /*
                  * add it to the IO-APIC irq-routing table:
                  */
- -              memset(&entry,0,sizeof(entry));
+ +              memset(&entry, 0, sizeof(entry));
   
                 entry.delivery_mode = INT_DELIVERY_MODE;
                 entry.dest_mode = INT_DEST_MODE;
                 entry.mask = 0;                         /* enable IRQ */
- -              entry.dest.logical.logical_dest = 
+ +              entry.dest.logical.logical_dest =
                                         cpu_mask_to_apicid(TARGET_CPUS);
   
- -              idx = find_irq_entry(apic,pin,mp_INT);
+ +              idx = find_irq_entry(apic, pin, mp_INT);
                 if (idx == -1) {
                         if (first_notcon) {
                                 apic_printk(APIC_VERBOSE, KERN_DEBUG
                                                 " IO-APIC (apicid-pin) %d-%d",
- -                                              mp_ioapics[apic].mpc_apicid,
+ +                                              mp_ioapics[apic].mp_apicid,
                                                 pin);
                                 first_notcon = 0;
                         } else
                                 apic_printk(APIC_VERBOSE, ", %d-%d",
- -                                      mp_ioapics[apic].mpc_apicid, pin);
+ +                                      mp_ioapics[apic].mp_apicid, pin);
                         continue;
                 }
   
@@@ -1299,7 -1289,7 +1299,7 @@@
                         vector = assign_irq_vector(irq);
                         entry.vector = vector;
                         ioapic_register_intr(irq, vector, IOAPIC_AUTO);
- -              
+ +
                         if (!apic && (irq < 16))
                                 disable_8259A_irq(irq);
                 }
@@@ -1312,21 -1302,25 +1312,21 @@@
   }
   
   /*
- - * Set up the 8259A-master output pin:
+ + * Set up the timer pin, possibly with the 8259A-master behind.
    */
- -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
+ +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+ +                                      int vector)
   {
         struct IO_APIC_route_entry entry;
   
- -      memset(&entry,0,sizeof(entry));
- -
- -      disable_8259A_irq(0);
- -
- -      /* mask LVT0 */
- -      apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ +      memset(&entry, 0, sizeof(entry));
   
         /*
          * We use logical delivery to get the timer IRQ
          * to the first CPU.
          */
         entry.dest_mode = INT_DEST_MODE;
- -      entry.mask = 0;                                 /* unmask IRQ now */
+ +      entry.mask = 1;                                 /* mask IRQ now */
         entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
         entry.delivery_mode = INT_DELIVERY_MODE;
         entry.polarity = 0;
@@@ -1335,14 -1329,17 +1335,14 @@@
   
         /*
          * The timer IRQ doesn't have to know that behind the
- -       * scene we have a 8259A-master in AEOI mode ...
+ +       * scene we may have a 8259A-master in AEOI mode ...
          */
- -      irq_desc[0].chip = &ioapic_chip;
- -      set_irq_handler(0, handle_edge_irq);
+ +      ioapic_register_intr(0, vector, IOAPIC_EDGE);
   
         /*
          * Add it to the IO-APIC irq-routing table:
          */
         ioapic_write_entry(apic, pin, entry);
- -
- -      enable_8259A_irq(0);
   }
   
   void __init print_IO_APIC(void)
@@@ -1357,10 -1354,10 +1357,10 @@@
         if (apic_verbosity == APIC_QUIET)
                 return;
   
- -      printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ +      printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
         for (i = 0; i < nr_ioapics; i++)
                 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- -                     mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+ +                     mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
   
         /*
          * We are a bit conservative about what we expect.  We have to
@@@ -1379,7 -1376,7 +1379,7 @@@
                 reg_03.raw = io_apic_read(apic, 3);
         spin_unlock_irqrestore(&ioapic_lock, flags);
   
- -      printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ +      printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
         printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
         printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
         printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@@ -1462,7 -1459,7 +1462,7 @@@
   
   #if 0
   
- -static void print_APIC_bitfield (int base)
+ +static void print_APIC_bitfield(int base)
   {
         unsigned int v;
         int i, j;
@@@ -1483,7 -1480,7 +1483,7 @@@
         }
   }
   
- -void /*__init*/ print_local_APIC(void * dummy)
+ +void /*__init*/ print_local_APIC(void *dummy)
   {
         unsigned int v, ver, maxlvt;
   
@@@ -1492,7 -1489,6 +1492,7 @@@
   
         printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                 smp_processor_id(), hard_smp_processor_id());
+ +      v = apic_read(APIC_ID);
         printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
                         GET_APIC_ID(read_apic_id()));
         v = apic_read(APIC_LVR);
@@@ -1567,9 -1563,9 +1567,9 @@@
         printk("\n");
   }
   
- -void print_all_local_APICs (void)
+ +void print_all_local_APICs(void)
   {
-       on_each_cpu(print_local_APIC, NULL, 1, 1);
+       on_each_cpu(print_local_APIC, NULL, 1);
   }
   
   void /*__init*/ print_PIC(void)
@@@ -1590,11 -1586,11 +1590,11 @@@
         v = inb(0xa0) << 8 | inb(0x20);
         printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
   
- -      outb(0x0b,0xa0);
- -      outb(0x0b,0x20);
+ +      outb(0x0b, 0xa0);
+ +      outb(0x0b, 0x20);
         v = inb(0xa0) << 8 | inb(0x20);
- -      outb(0x0a,0xa0);
- -      outb(0x0a,0x20);
+ +      outb(0x0a, 0xa0);
+ +      outb(0x0a, 0x20);
   
         spin_unlock_irqrestore(&i8259A_lock, flags);
   
@@@ -1630,7 -1626,7 +1630,7 @@@ static void __init enable_IO_APIC(void
                 spin_unlock_irqrestore(&ioapic_lock, flags);
                 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
         }
- -      for(apic = 0; apic < nr_ioapics; apic++) {
+ +      for (apic = 0; apic < nr_ioapics; apic++) {
                 int pin;
                 /* See if any of the pins is in ExtINT mode */
                 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@@ -1720,6 -1716,7 +1720,6 @@@ void disable_IO_APIC(void
    * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
    */
   
- -#ifndef CONFIG_X86_NUMAQ
   static void __init setup_ioapic_ids_from_mpc(void)
   {
         union IO_APIC_reg_00 reg_00;
@@@ -1729,11 -1726,6 +1729,11 @@@
         unsigned char old_id;
         unsigned long flags;
   
+ +#ifdef CONFIG_X86_NUMAQ
+ +      if (found_numaq)
+ +              return;
+ +#endif
+ +
         /*
          * Don't check I/O APIC IDs for xAPIC systems.  They have
          * no meaning without the serial APIC bus.
@@@ -1756,15 -1748,15 +1756,15 @@@
                 spin_lock_irqsave(&ioapic_lock, flags);
                 reg_00.raw = io_apic_read(apic, 0);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
- -              
- -              old_id = mp_ioapics[apic].mpc_apicid;
   
- -              if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+ +              old_id = mp_ioapics[apic].mp_apicid;
+ +
+ +              if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
                         printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- -                              apic, mp_ioapics[apic].mpc_apicid);
+ +                              apic, mp_ioapics[apic].mp_apicid);
                         printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                 reg_00.bits.ID);
- -                      mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+ +                      mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
                 }
   
                 /*
@@@ -1773,9 -1765,9 +1773,9 @@@
                  * 'stuck on smp_invalidate_needed IPI wait' messages.
                  */
                 if (check_apicid_used(phys_id_present_map,
- -                                      mp_ioapics[apic].mpc_apicid)) {
+ +                                      mp_ioapics[apic].mp_apicid)) {
                         printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- -                              apic, mp_ioapics[apic].mpc_apicid);
+ +                              apic, mp_ioapics[apic].mp_apicid);
                         for (i = 0; i < get_physical_broadcast(); i++)
                                 if (!physid_isset(i, phys_id_present_map))
                                         break;
@@@ -1784,13 -1776,13 +1784,13 @@@
                         printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                 i);
                         physid_set(i, phys_id_present_map);
- -                      mp_ioapics[apic].mpc_apicid = i;
+ +                      mp_ioapics[apic].mp_apicid = i;
                 } else {
                         physid_mask_t tmp;
- -                      tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+ +                      tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
                         apic_printk(APIC_VERBOSE, "Setting %d in the "
                                         "phys_id_present_map\n",
- -                                      mp_ioapics[apic].mpc_apicid);
+ +                                      mp_ioapics[apic].mp_apicid);
                         physids_or(phys_id_present_map, phys_id_present_map, tmp);
                 }
   
@@@ -1799,21 -1791,21 +1799,21 @@@
                  * We need to adjust the IRQ routing table
                  * if the ID changed.
                  */
- -              if (old_id != mp_ioapics[apic].mpc_apicid)
+ +              if (old_id != mp_ioapics[apic].mp_apicid)
                         for (i = 0; i < mp_irq_entries; i++)
- -                              if (mp_irqs[i].mpc_dstapic == old_id)
- -                                      mp_irqs[i].mpc_dstapic
- -                                              = mp_ioapics[apic].mpc_apicid;
+ +                              if (mp_irqs[i].mp_dstapic == old_id)
+ +                                      mp_irqs[i].mp_dstapic
+ +                                              = mp_ioapics[apic].mp_apicid;
   
                 /*
                  * Read the right value from the MPC table and
                  * write it into the ID register.
- -               */
+ +               */
                 apic_printk(APIC_VERBOSE, KERN_INFO
                         "...changing IO-APIC physical APIC ID to %d ...",
- -                      mp_ioapics[apic].mpc_apicid);
+ +                      mp_ioapics[apic].mp_apicid);
   
- -              reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+ +              reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
                 spin_lock_irqsave(&ioapic_lock, flags);
                 io_apic_write(apic, 0, reg_00.raw);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
@@@ -1824,12 -1816,15 +1824,12 @@@
                 spin_lock_irqsave(&ioapic_lock, flags);
                 reg_00.raw = io_apic_read(apic, 0);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
- -              if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+ +              if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
                         printk("could not set ID!\n");
                 else
                         apic_printk(APIC_VERBOSE, " ok.\n");
         }
   }
- -#else
- -static void __init setup_ioapic_ids_from_mpc(void) { }
- -#endif
   
   int no_timer_check __initdata;
   
@@@ -2020,12 -2015,12 +2020,12 @@@ static inline void init_IO_APIC_traps(v
    * The local APIC irq-chip implementation:
    */
   
- -static void ack_apic(unsigned int irq)
+ +static void ack_lapic_irq(unsigned int irq)
   {
         ack_APIC_irq();
   }
   
- -static void mask_lapic_irq (unsigned int irq)
+ +static void mask_lapic_irq(unsigned int irq)
   {
         unsigned long v;
   
@@@ -2033,7 -2028,7 +2033,7 @@@
         apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
   }
   
- -static void unmask_lapic_irq (unsigned int irq)
+ +static void unmask_lapic_irq(unsigned int irq)
   {
         unsigned long v;
   
@@@ -2042,31 -2037,23 +2042,31 @@@
   }
   
   static struct irq_chip lapic_chip __read_mostly = {
- -      .name           = "local-APIC-edge",
+ +      .name           = "local-APIC",
         .mask           = mask_lapic_irq,
         .unmask         = unmask_lapic_irq,
- -      .eoi            = ack_apic,
+ +      .ack            = ack_lapic_irq,
   };
   
+ +static void lapic_register_intr(int irq, int vector)
+ +{
+ +      irq_desc[irq].status &= ~IRQ_LEVEL;
+ +      set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ +                                    "edge");
+ +      set_intr_gate(vector, interrupt[irq]);
+ +}
+ +
   static void __init setup_nmi(void)
   {
         /*
- -       * Dirty trick to enable the NMI watchdog ...
+ +       * Dirty trick to enable the NMI watchdog ...
          * We put the 8259A master into AEOI mode and
          * unmask on all local APICs LVT0 as NMI.
          *
          * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
          * is from Maciej W. Rozycki - so we do not have to EOI from
          * the NMI handler or the timer interrupt.
- -       */ 
+ +       */
         apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
   
         enable_NMI_through_LVT0();
@@@ -2142,16 -2129,11 +2142,16 @@@ static inline void __init unlock_ExtINT
   static inline void __init check_timer(void)
   {
         int apic1, pin1, apic2, pin2;
+ +      int no_pin1 = 0;
         int vector;
+ +      unsigned int ver;
         unsigned long flags;
   
         local_irq_save(flags);
   
+ +      ver = apic_read(APIC_LVR);
+ +      ver = GET_APIC_VERSION(ver);
+ +
         /*
          * get/set the timer IRQ vector:
          */
@@@ -2160,17 -2142,17 +2160,17 @@@
         set_intr_gate(vector, interrupt[0]);
   
         /*
- -       * Subtle, code in do_timer_interrupt() expects an AEOI
- -       * mode for the 8259A whenever interrupts are routed
- -       * through I/O APICs.  Also IRQ0 has to be enabled in
- -       * the 8259A which implies the virtual wire has to be
- -       * disabled in the local APIC.
+ +       * As IRQ0 is to be enabled in the 8259A, the virtual
+ +       * wire has to be disabled in the local APIC.  Also
+ +       * timer interrupts need to be acknowledged manually in
+ +       * the 8259A for the i82489DX when using the NMI
+ +       * watchdog as that APIC treats NMIs as level-triggered.
+ +       * The AEOI mode will finish them in the 8259A
+ +       * automatically.
          */
         apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
         init_8259A(1);
- -      timer_ack = 1;
- -      if (timer_over_8254 > 0)
- -              enable_8259A_irq(0);
+ +      timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
   
         pin1  = find_isa_irq_pin(0, mp_INT);
         apic1 = find_isa_irq_apic(0, mp_INT);
@@@ -2180,33 -2162,14 +2180,33 @@@
         printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
                 vector, apic1, pin1, apic2, pin2);
   
+ +      /*
+ +       * Some BIOS writers are clueless and report the ExtINTA
+ +       * I/O APIC input from the cascaded 8259A as the timer
+ +       * interrupt input.  So just in case, if only one pin
+ +       * was found above, try it both directly and through the
+ +       * 8259A.
+ +       */
+ +      if (pin1 == -1) {
+ +              pin1 = pin2;
+ +              apic1 = apic2;
+ +              no_pin1 = 1;
+ +      } else if (pin2 == -1) {
+ +              pin2 = pin1;
+ +              apic2 = apic1;
+ +      }
+ +
         if (pin1 != -1) {
                 /*
                  * Ok, does IRQ0 through the IOAPIC work?
                  */
+ +              if (no_pin1) {
+ +                      add_pin_to_irq(0, apic1, pin1);
+ +                      setup_timer_IRQ0_pin(apic1, pin1, vector);
+ +              }
                 unmask_IO_APIC_irq(0);
                 if (timer_irq_works()) {
                         if (nmi_watchdog == NMI_IO_APIC) {
- -                              disable_8259A_irq(0);
                                 setup_nmi();
                                 enable_8259A_irq(0);
                         }
@@@ -2215,47 -2178,45 +2215,47 @@@
                         goto out;
                 }
                 clear_IO_APIC_pin(apic1, pin1);
- -              printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
- -                              "IO-APIC\n");
- -      }
+ +              if (!no_pin1)
+ +                      printk(KERN_ERR "..MP-BIOS bug: "
+ +                             "8254 timer not connected to IO-APIC\n");
   
- -      printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
- -      if (pin2 != -1) {
+ +              printk(KERN_INFO "...trying to set up timer (IRQ0) "
+ +                     "through the 8259A ... ");
                 printk("\n..... (found pin %d) ...", pin2);
                 /*
                  * legacy devices should be connected to IO APIC #0
                  */
- -              setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
+ +              replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ +              setup_timer_IRQ0_pin(apic2, pin2, vector);
+ +              unmask_IO_APIC_irq(0);
+ +              enable_8259A_irq(0);
                 if (timer_irq_works()) {
                         printk("works.\n");
- -                      if (pin1 != -1)
- -                              replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
- -                      else
- -                              add_pin_to_irq(0, apic2, pin2);
+ +                      timer_through_8259 = 1;
                         if (nmi_watchdog == NMI_IO_APIC) {
+ +                              disable_8259A_irq(0);
                                 setup_nmi();
+ +                              enable_8259A_irq(0);
                         }
                         goto out;
                 }
                 /*
                  * Cleanup, just in case ...
                  */
+ +              disable_8259A_irq(0);
                 clear_IO_APIC_pin(apic2, pin2);
+ +              printk(" failed.\n");
         }
- -      printk(" failed.\n");
   
         if (nmi_watchdog == NMI_IO_APIC) {
                 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
- -              nmi_watchdog = 0;
+ +              nmi_watchdog = NMI_NONE;
         }
+ +      timer_ack = 0;
   
         printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
   
- -      disable_8259A_irq(0);
- -      set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
- -                                    "fasteoi");
+ +      lapic_register_intr(0, vector);
         apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
         enable_8259A_irq(0);
   
@@@ -2263,12 -2224,12 +2263,12 @@@
                 printk(" works.\n");
                 goto out;
         }
+ +      disable_8259A_irq(0);
         apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
         printk(" failed.\n");
   
         printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
   
- -      timer_ack = 0;
         init_8259A(0);
         make_8259A_irq(0);
         apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
@@@ -2287,21 -2248,11 +2287,21 @@@ out
   }
   
   /*
- - *
- - * IRQ's that are handled by the PIC in the MPS IOAPIC case.
- - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- - *   Linux doesn't really care, as it's not actually used
- - *   for any interrupt handling anyway.
+ + * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ + * to devices.  However there may be an I/O APIC pin available for
+ + * this interrupt regardless.  The pin may be left unconnected, but
+ + * typically it will be reused as an ExtINT cascade interrupt for
+ + * the master 8259A.  In the MPS case such a pin will normally be
+ + * reported as an ExtINT interrupt in the MP table.  With ACPI
+ + * there is no provision for ExtINT interrupts, and in the absence
+ + * of an override it would be treated as an ordinary ISA I/O APIC
+ + * interrupt, that is edge-triggered and unmasked by default.  We
+ + * used to do this, but it caused problems on some systems because
+ + * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ + * the same ExtINT cascade interrupt to drive the local APIC of the
+ + * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ + * the I/O APIC in all cases now.  No actual device should request
+ + * it anyway.  --macro
    */
   #define PIC_IRQS      (1 << PIC_CASCADE_IR)
   
@@@ -2310,12 -2261,15 +2310,12 @@@ void __init setup_IO_APIC(void
         int i;
   
         /* Reserve all the system vectors. */
- -      for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
+ +      for (i = first_system_vector; i < NR_VECTORS; i++)
                 set_bit(i, used_vectors);
   
         enable_IO_APIC();
   
- -      if (acpi_ioapic)
- -              io_apic_irqs = ~0;      /* all IRQs go through IOAPIC */
- -      else
- -              io_apic_irqs = ~PIC_IRQS;
+ +      io_apic_irqs = ~PIC_IRQS;
   
         printk("ENABLING IO-APIC IRQs\n");
   
@@@ -2332,14 -2286,28 +2332,14 @@@
                 print_IO_APIC();
   }
   
- -static int __init setup_disable_8254_timer(char *s)
- -{
- -      timer_over_8254 = -1;
- -      return 1;
- -}
- -static int __init setup_enable_8254_timer(char *s)
- -{
- -      timer_over_8254 = 2;
- -      return 1;
- -}
- -
- -__setup("disable_8254_timer", setup_disable_8254_timer);
- -__setup("enable_8254_timer", setup_enable_8254_timer);
- -
   /*
    *    Called after all the initialization is done. If we didnt find any
    *    APIC bugs then we can allow the modify fast path
    */
- - 
+ +
   static int __init io_apic_bug_finalize(void)
   {
- -      if(sis_apic_bug == -1)
+ +      if (sis_apic_bug == -1)
                 sis_apic_bug = 0;
         return 0;
   }
@@@ -2350,17 -2318,17 +2350,17 @@@ struct sysfs_ioapic_data 
         struct sys_device dev;
         struct IO_APIC_route_entry entry[0];
   };
- -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+ +static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
   
   static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
   {
         struct IO_APIC_route_entry *entry;
         struct sysfs_ioapic_data *data;
         int i;
- -      
+ +
         data = container_of(dev, struct sysfs_ioapic_data, dev);
         entry = data->entry;
- -      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+ +      for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
                 entry[i] = ioapic_read_entry(dev->id, i);
   
         return 0;
@@@ -2373,18 -2341,18 +2373,18 @@@ static int ioapic_resume(struct sys_dev
         unsigned long flags;
         union IO_APIC_reg_00 reg_00;
         int i;
- -      
+ +
         data = container_of(dev, struct sysfs_ioapic_data, dev);
         entry = data->entry;
   
         spin_lock_irqsave(&ioapic_lock, flags);
         reg_00.raw = io_apic_read(dev->id, 0);
- -      if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
- -              reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ +      if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+ +              reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
                 io_apic_write(dev->id, 0, reg_00.raw);
         }
         spin_unlock_irqrestore(&ioapic_lock, flags);
- -      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+ +      for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
                 ioapic_write_entry(dev->id, i, entry[i]);
   
         return 0;
@@@ -2398,23 -2366,24 +2398,23 @@@ static struct sysdev_class ioapic_sysde
   
   static int __init ioapic_init_sysfs(void)
   {
- -      struct sys_device * dev;
+ +      struct sys_device *dev;
         int i, size, error = 0;
   
         error = sysdev_class_register(&ioapic_sysdev_class);
         if (error)
                 return error;
   
- -      for (i = 0; i < nr_ioapics; i++ ) {
- -              size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
+ +      for (i = 0; i < nr_ioapics; i++) {
+ +              size = sizeof(struct sys_device) + nr_ioapic_registers[i]
                         * sizeof(struct IO_APIC_route_entry);
- -              mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+ +              mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
                 if (!mp_ioapic_data[i]) {
                         printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
                         continue;
                 }
- -              memset(mp_ioapic_data[i], 0, size);
                 dev = &mp_ioapic_data[i]->dev;
- -              dev->id = i; 
+ +              dev->id = i;
                 dev->cls = &ioapic_sysdev_class;
                 error = sysdev_register(dev);
                 if (error) {
@@@ -2489,7 -2458,7 +2489,7 @@@ static int msi_compose_msg(struct pci_d
                 msg->address_lo =
                         MSI_ADDR_BASE_LO |
                         ((INT_DEST_MODE == 0) ?
- -                              MSI_ADDR_DEST_MODE_PHYSICAL:
+ +MSI_ADDR_DEST_MODE_PHYSICAL:
                                 MSI_ADDR_DEST_MODE_LOGICAL) |
                         ((INT_DELIVERY_MODE != dest_LowestPrio) ?
                                 MSI_ADDR_REDIRECTION_CPU:
@@@ -2500,7 -2469,7 +2500,7 @@@
                         MSI_DATA_TRIGGER_EDGE |
                         MSI_DATA_LEVEL_ASSERT |
                         ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- -                              MSI_DATA_DELIVERY_FIXED:
+ +MSI_DATA_DELIVERY_FIXED:
                                 MSI_DATA_DELIVERY_LOWPRI) |
                         MSI_DATA_VECTOR(vector);
         }
@@@ -2671,12 -2640,12 +2671,12 @@@ int arch_setup_ht_irq(unsigned int irq
   #endif /* CONFIG_HT_IRQ */
   
   /* --------------------------------------------------------------------------
- -                          ACPI-based IOAPIC Configuration
+ +                      ACPI-based IOAPIC Configuration
      -------------------------------------------------------------------------- */
   
   #ifdef CONFIG_ACPI
   
- -int __init io_apic_get_unique_id (int ioapic, int apic_id)
+ +int __init io_apic_get_unique_id(int ioapic, int apic_id)
   {
         union IO_APIC_reg_00 reg_00;
         static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@@ -2685,10 -2654,10 +2685,10 @@@
         int i = 0;
   
         /*
- -       * The P4 platform supports up to 256 APIC IDs on two separate APIC 
- -       * buses (one for LAPICs, one for IOAPICs), where predecessors only 
+ +       * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ +       * buses (one for LAPICs, one for IOAPICs), where predecessors only
          * supports up to 16 on one shared APIC bus.
- -       * 
+ +       *
          * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
          *      advantage of new APIC bus architecture.
          */
@@@ -2707,7 -2676,7 +2707,7 @@@
         }
   
         /*
- -       * Every APIC in a system must have a unique ID or we get lots of nice 
+ +       * Every APIC in a system must have a unique ID or we get lots of nice
          * 'stuck on smp_invalidate_needed IPI wait' messages.
          */
         if (check_apicid_used(apic_id_map, apic_id)) {
@@@ -2724,7 -2693,7 +2724,7 @@@
                         "trying %d\n", ioapic, apic_id, i);
   
                 apic_id = i;
- -      } 
+ +      }
   
         tmp = apicid_to_cpu_present(apic_id);
         physids_or(apic_id_map, apic_id_map, tmp);
@@@ -2751,7 -2720,7 +2751,7 @@@
   }
   
   
- -int __init io_apic_get_version (int ioapic)
+ +int __init io_apic_get_version(int ioapic)
   {
         union IO_APIC_reg_01    reg_01;
         unsigned long flags;
@@@ -2764,7 -2733,7 +2764,7 @@@
   }
   
   
- -int __init io_apic_get_redir_entries (int ioapic)
+ +int __init io_apic_get_redir_entries(int ioapic)
   {
         union IO_APIC_reg_01    reg_01;
         unsigned long flags;
@@@ -2777,7 -2746,7 +2777,7 @@@
   }
   
   
- -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+ +int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
   {
         struct IO_APIC_route_entry entry;
   
@@@ -2793,7 -2762,7 +2793,7 @@@
          * corresponding device driver registers for this IRQ.
          */
   
- -      memset(&entry,0,sizeof(entry));
+ +      memset(&entry, 0, sizeof(entry));
   
         entry.delivery_mode = INT_DELIVERY_MODE;
         entry.dest_mode = INT_DEST_MODE;
@@@ -2812,7 -2781,7 +2812,7 @@@
   
         apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
                 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
- -              mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+ +              mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
                 edge_level, active_high_low);
   
         ioapic_register_intr(irq, entry.vector, edge_level);
@@@ -2833,8 -2802,8 +2833,8 @@@ int acpi_get_override_irq(int bus_irq, 
                 return -1;
   
         for (i = 0; i < mp_irq_entries; i++)
- -              if (mp_irqs[i].mpc_irqtype == mp_INT &&
- -                  mp_irqs[i].mpc_srcbusirq == bus_irq)
+ +              if (mp_irqs[i].mp_irqtype == mp_INT &&
+ +                  mp_irqs[i].mp_srcbusirq == bus_irq)
                         break;
         if (i >= mp_irq_entries)
                 return -1;
@@@ -2867,34 -2836,3 +2867,34 @@@ static int __init parse_noapic(char *ar
         return 0;
   }
   early_param("noapic", parse_noapic);
+ +
+ +void __init ioapic_init_mappings(void)
+ +{
+ +      unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+ +      int i;
+ +
+ +      for (i = 0; i < nr_ioapics; i++) {
+ +              if (smp_found_config) {
+ +                      ioapic_phys = mp_ioapics[i].mp_apicaddr;
+ +                      if (!ioapic_phys) {
+ +                              printk(KERN_ERR
+ +                                     "WARNING: bogus zero IO-APIC "
+ +                                     "address found in MPTABLE, "
+ +                                     "disabling IO/APIC support!\n");
+ +                              smp_found_config = 0;
+ +                              skip_ioapic_setup = 1;
+ +                              goto fake_ioapic_page;
+ +                      }
+ +              } else {
+ +fake_ioapic_page:
+ +                      ioapic_phys = (unsigned long)
+ +                                    alloc_bootmem_pages(PAGE_SIZE);
+ +                      ioapic_phys = __pa(ioapic_phys);
+ +              }
+ +              set_fixmap_nocache(idx, ioapic_phys);
+ +              printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
+ +                     __fix_to_virt(idx), ioapic_phys);
+ +              idx++;
+ +      }
+ +}
+ +
diff --combined arch/x86/kernel/io_apic_64.c

index b16ef029cf88185334c1ec93241138245d240e8b,4504c7f500128be5feda9551c6e1756ec431b207..6510cde36b3549149eabefa4aaf724c72e5a0959
--- 1/arch/x86/kernel/io_apic_64.c
--- 2/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@@ -61,7 -61,7 +61,7 @@@ struct irq_cfg 
   };
   
   /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
- -struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+ +static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
         [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
         [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
         [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
@@@ -82,10 -82,6 +82,10 @@@
   
   static int assign_irq_vector(int irq, cpumask_t mask);
   
+ +int first_system_vector = 0xfe;
+ +
+ +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+ +
   #define __apicdebuginit  __init
   
   int sis_apic_bug; /* not actually supported, dummy for compile */
@@@ -94,7 -90,7 +94,7 @@@ static int no_timer_check
   
   static int disable_timer_pin_1 __initdata;
   
- -int timer_over_8254 __initdata = 1;
+ +int timer_through_8259 __initdata;
   
   /* Where if anywhere is the i8259 connect in external int mode */
   static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@@ -108,17 -104,15 +108,17 @@@ DEFINE_SPINLOCK(vector_lock)
   int nr_ioapic_registers[MAX_IO_APICS];
   
   /* I/O APIC entries */
- -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+ +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
   int nr_ioapics;
   
   /* MP IRQ source entries */
- -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
   
   /* # of MP IRQ source entries */
   int mp_irq_entries;
   
+ +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+ +
   /*
    * Rough estimation of how many shared IRQs there are, can
    * be changed anytime.
@@@ -146,7 -140,7 +146,7 @@@ struct io_apic 
   static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
   {
         return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
- -              + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+ +              + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
   }
   
   static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@@ -189,7 -183,7 +189,7 @@@ static bool io_apic_level_ack_pending(u
                         break;
                 reg = io_apic_read(entry->apic, 0x10 + pin*2);
                 /* Is the remote IRR bit set? */
- -              if ((reg >> 14) & 1) {
+ +              if (reg & IO_APIC_REDIR_REMOTE_IRR) {
                         spin_unlock_irqrestore(&ioapic_lock, flags);
                         return true;
                 }
@@@ -304,7 -298,7 +304,7 @@@ static void __target_IO_APIC_irq(unsign
                         break;
                 io_apic_write(apic, 0x11 + pin*2, dest);
                 reg = io_apic_read(apic, 0x10 + pin*2);
- -              reg &= ~0x000000ff;
+ +              reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                 reg |= vector;
                 io_apic_modify(apic, reg);
                 if (!entry->next)
@@@ -366,37 -360,16 +366,37 @@@ static void add_pin_to_irq(unsigned in
         entry->pin = pin;
   }
   
+ +/*
+ + * Reroute an IRQ to a different pin.
+ + */
+ +static void __init replace_pin_at_irq(unsigned int irq,
+ +                                    int oldapic, int oldpin,
+ +                                    int newapic, int newpin)
+ +{
+ +      struct irq_pin_list *entry = irq_2_pin + irq;
+ +
+ +      while (1) {
+ +              if (entry->apic == oldapic && entry->pin == oldpin) {
+ +                      entry->apic = newapic;
+ +                      entry->pin = newpin;
+ +              }
+ +              if (!entry->next)
+ +                      break;
+ +              entry = irq_2_pin + entry->next;
+ +      }
+ +}
+ +
   
   #define DO_ACTION(name,R,ACTION, FINAL)                                       \
                                                                         \
         static void name##_IO_APIC_irq (unsigned int irq)               \
         __DO_ACTION(R, ACTION, FINAL)
   
- -DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
- -                                              /* mask = 1 */
- -DO_ACTION( __unmask,           0, &= 0xfffeffff, )
- -                                              /* mask = 0 */
+ +/* mask = 1 */
+ +DO_ACTION(__mask,     0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+ +
+ +/* mask = 0 */
+ +DO_ACTION(__unmask,   0, &= ~IO_APIC_REDIR_MASKED, )
   
   static void mask_IO_APIC_irq (unsigned int irq)
   {
@@@ -457,6 -430,20 +457,6 @@@ static int __init disable_timer_pin_set
   }
   __setup("disable_timer_pin_1", disable_timer_pin_setup);
   
- -static int __init setup_disable_8254_timer(char *s)
- -{
- -      timer_over_8254 = -1;
- -      return 1;
- -}
- -static int __init setup_enable_8254_timer(char *s)
- -{
- -      timer_over_8254 = 2;
- -      return 1;
- -}
- -
- -__setup("disable_8254_timer", setup_disable_8254_timer);
- -__setup("enable_8254_timer", setup_enable_8254_timer);
- -
   
   /*
    * Find the IRQ entry number of a certain pin.
@@@ -466,10 -453,10 +466,10 @@@ static int find_irq_entry(int apic, in
         int i;
   
         for (i = 0; i < mp_irq_entries; i++)
- -              if (mp_irqs[i].mpc_irqtype == type &&
- -                  (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- -                   mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- -                  mp_irqs[i].mpc_dstirq == pin)
+ +              if (mp_irqs[i].mp_irqtype == type &&
+ +                  (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+ +                   mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+ +                  mp_irqs[i].mp_dstirq == pin)
                         return i;
   
         return -1;
@@@ -483,13 -470,13 +483,13 @@@ static int __init find_isa_irq_pin(int 
         int i;
   
         for (i = 0; i < mp_irq_entries; i++) {
- -              int lbus = mp_irqs[i].mpc_srcbus;
+ +              int lbus = mp_irqs[i].mp_srcbus;
   
                 if (test_bit(lbus, mp_bus_not_pci) &&
- -                  (mp_irqs[i].mpc_irqtype == type) &&
- -                  (mp_irqs[i].mpc_srcbusirq == irq))
+ +                  (mp_irqs[i].mp_irqtype == type) &&
+ +                  (mp_irqs[i].mp_srcbusirq == irq))
   
- -                      return mp_irqs[i].mpc_dstirq;
+ +                      return mp_irqs[i].mp_dstirq;
         }
         return -1;
   }
@@@ -499,17 -486,17 +499,17 @@@ static int __init find_isa_irq_apic(in
         int i;
   
         for (i = 0; i < mp_irq_entries; i++) {
- -              int lbus = mp_irqs[i].mpc_srcbus;
+ +              int lbus = mp_irqs[i].mp_srcbus;
   
                 if (test_bit(lbus, mp_bus_not_pci) &&
- -                  (mp_irqs[i].mpc_irqtype == type) &&
- -                  (mp_irqs[i].mpc_srcbusirq == irq))
+ +                  (mp_irqs[i].mp_irqtype == type) &&
+ +                  (mp_irqs[i].mp_srcbusirq == irq))
                         break;
         }
         if (i < mp_irq_entries) {
                 int apic;
                 for(apic = 0; apic < nr_ioapics; apic++) {
- -                      if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+ +                      if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
                                 return apic;
                 }
         }
@@@ -529,28 -516,28 +529,28 @@@ int IO_APIC_get_PCI_irq_vector(int bus
   
         apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
                 bus, slot, pin);
- -      if (mp_bus_id_to_pci_bus[bus] == -1) {
+ +      if (test_bit(bus, mp_bus_not_pci)) {
                 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                 return -1;
         }
         for (i = 0; i < mp_irq_entries; i++) {
- -              int lbus = mp_irqs[i].mpc_srcbus;
+ +              int lbus = mp_irqs[i].mp_srcbus;
   
                 for (apic = 0; apic < nr_ioapics; apic++)
- -                      if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- -                          mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ +                      if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+ +                          mp_irqs[i].mp_dstapic == MP_APIC_ALL)
                                 break;
   
                 if (!test_bit(lbus, mp_bus_not_pci) &&
- -                  !mp_irqs[i].mpc_irqtype &&
+ +                  !mp_irqs[i].mp_irqtype &&
                     (bus == lbus) &&
- -                  (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- -                      int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+ +                  (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+ +                      int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
   
                         if (!(apic || IO_APIC_IRQ(irq)))
                                 continue;
   
- -                      if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ +                      if (pin == (mp_irqs[i].mp_srcbusirq & 3))
                                 return irq;
                         /*
                          * Use the first all-but-pin matching entry as a
@@@ -578,13 -565,13 +578,13 @@@
   
   static int MPBIOS_polarity(int idx)
   {
- -      int bus = mp_irqs[idx].mpc_srcbus;
+ +      int bus = mp_irqs[idx].mp_srcbus;
         int polarity;
   
         /*
          * Determine IRQ line polarity (high active or low active):
          */
- -      switch (mp_irqs[idx].mpc_irqflag & 3)
+ +      switch (mp_irqs[idx].mp_irqflag & 3)
         {
                 case 0: /* conforms, ie. bus-type dependent polarity */
                         if (test_bit(bus, mp_bus_not_pci))
@@@ -620,13 -607,13 +620,13 @@@
   
   static int MPBIOS_trigger(int idx)
   {
- -      int bus = mp_irqs[idx].mpc_srcbus;
+ +      int bus = mp_irqs[idx].mp_srcbus;
         int trigger;
   
         /*
          * Determine IRQ trigger mode (edge or level sensitive):
          */
- -      switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ +      switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
         {
                 case 0: /* conforms, ie. bus-type dependent */
                         if (test_bit(bus, mp_bus_not_pci))
@@@ -673,16 -660,16 +673,16 @@@ static inline int irq_trigger(int idx
   static int pin_2_irq(int idx, int apic, int pin)
   {
         int irq, i;
- -      int bus = mp_irqs[idx].mpc_srcbus;
+ +      int bus = mp_irqs[idx].mp_srcbus;
   
         /*
          * Debugging check, we are in big trouble if this message pops up!
          */
- -      if (mp_irqs[idx].mpc_dstirq != pin)
+ +      if (mp_irqs[idx].mp_dstirq != pin)
                 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
   
         if (test_bit(bus, mp_bus_not_pci)) {
- -              irq = mp_irqs[idx].mpc_srcbusirq;
+ +              irq = mp_irqs[idx].mp_srcbusirq;
         } else {
                 /*
                  * PCI IRQs are mapped in order
@@@ -743,7 -730,7 +743,7 @@@ static int __assign_irq_vector(int irq
                 offset = current_offset;
   next:
                 vector += 8;
- -              if (vector >= FIRST_SYSTEM_VECTOR) {
+ +              if (vector >= first_system_vector) {
                         /* If we run out of vectors on large boxen, must share them. */
                         offset = (offset + 1) % 8;
                         vector = FIRST_DEVICE_VECTOR + offset;
@@@ -801,7 -788,7 +801,7 @@@ static void __clear_irq_vector(int irq
         cpus_clear(cfg->domain);
   }
   
- -void __setup_vector_irq(int cpu)
+ +static void __setup_vector_irq(int cpu)
   {
         /* Initialize vector_irq on a new cpu */
         /* This function must be called with vector_lock held */
@@@ -824,13 -811,6 +824,13 @@@
         }
   }
   
+ +void setup_vector_irq(int cpu)
+ +{
+ +      spin_lock(&vector_lock);
+ +      __setup_vector_irq(smp_processor_id());
+ +      spin_unlock(&vector_lock);
+ +}
+ +
   
   static struct irq_chip ioapic_chip;
   
@@@ -866,7 -846,7 +866,7 @@@ static void setup_IO_APIC_irq(int apic
         apic_printk(APIC_VERBOSE,KERN_DEBUG
                     "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
                     "IRQ %d Mode:%i Active:%i)\n",
- -                  apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
+ +                  apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                     irq, trigger, polarity);
   
         /*
@@@ -907,10 -887,10 +907,10 @@@ static void __init setup_IO_APIC_irqs(v
                 idx = find_irq_entry(apic,pin,mp_INT);
                 if (idx == -1) {
                         if (first_notcon) {
- -                              apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ +                              apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
                                 first_notcon = 0;
                         } else
- -                              apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ +                              apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
                         continue;
                 }
                 if (!first_notcon) {
@@@ -931,21 -911,26 +931,21 @@@
   }
   
   /*
- - * Set up the 8259A-master output pin as broadcast to all
- - * CPUs.
+ + * Set up the timer pin, possibly with the 8259A-master behind.
    */
- -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
+ +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+ +                                      int vector)
   {
         struct IO_APIC_route_entry entry;
   
         memset(&entry, 0, sizeof(entry));
   
- -      disable_8259A_irq(0);
- -
- -      /* mask LVT0 */
- -      apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- -
         /*
          * We use logical delivery to get the timer IRQ
          * to the first CPU.
          */
         entry.dest_mode = INT_DEST_MODE;
- -      entry.mask = 0;                                 /* unmask IRQ now */
+ +      entry.mask = 1;                                 /* mask IRQ now */
         entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
         entry.delivery_mode = INT_DELIVERY_MODE;
         entry.polarity = 0;
@@@ -954,7 -939,7 +954,7 @@@
   
         /*
          * The timer IRQ doesn't have to know that behind the
- -       * scene we have a 8259A-master in AEOI mode ...
+ +       * scene we may have a 8259A-master in AEOI mode ...
          */
         set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
   
@@@ -962,6 -947,8 +962,6 @@@
          * Add it to the IO-APIC irq-routing table:
          */
         ioapic_write_entry(apic, pin, entry);
- -
- -      enable_8259A_irq(0);
   }
   
   void __apicdebuginit print_IO_APIC(void)
@@@ -978,7 -965,7 +978,7 @@@
         printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
         for (i = 0; i < nr_ioapics; i++)
                 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- -                     mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+ +                     mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
   
         /*
          * We are a bit conservative about what we expect.  We have to
@@@ -996,7 -983,7 +996,7 @@@
         spin_unlock_irqrestore(&ioapic_lock, flags);
   
         printk("\n");
- -      printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ +      printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
         printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
         printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
   
@@@ -1090,7 -1077,6 +1090,7 @@@ void __apicdebuginit print_local_APIC(v
   
         printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                 smp_processor_id(), hard_smp_processor_id());
+ +      v = apic_read(APIC_ID);
         printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
         v = apic_read(APIC_LVR);
         printk(KERN_INFO "... APIC VERSION: %08x\n", v);
@@@ -1160,7 -1146,7 +1160,7 @@@
   
   void print_all_local_APICs (void)
   {
-       on_each_cpu(print_local_APIC, NULL, 1, 1);
+       on_each_cpu(print_local_APIC, NULL, 1);
   }
   
   void __apicdebuginit print_PIC(void)
@@@ -1554,7 -1540,7 +1554,7 @@@ static inline void init_IO_APIC_traps(v
         }
   }
   
- -static void enable_lapic_irq (unsigned int irq)
+ +static void unmask_lapic_irq(unsigned int irq)
   {
         unsigned long v;
   
@@@ -1562,7 -1548,7 +1562,7 @@@
         apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
   }
   
- -static void disable_lapic_irq (unsigned int irq)
+ +static void mask_lapic_irq(unsigned int irq)
   {
         unsigned long v;
   
@@@ -1575,20 -1561,19 +1575,20 @@@ static void ack_lapic_irq (unsigned in
         ack_APIC_irq();
   }
   
- -static void end_lapic_irq (unsigned int i) { /* nothing */ }
- -
- -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
- -      .name = "local-APIC",
- -      .typename = "local-APIC-edge",
- -      .startup = NULL, /* startup_irq() not used for IRQ0 */
- -      .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
- -      .enable = enable_lapic_irq,
- -      .disable = disable_lapic_irq,
- -      .ack = ack_lapic_irq,
- -      .end = end_lapic_irq,
+ +static struct irq_chip lapic_chip __read_mostly = {
+ +      .name           = "local-APIC",
+ +      .mask           = mask_lapic_irq,
+ +      .unmask         = unmask_lapic_irq,
+ +      .ack            = ack_lapic_irq,
   };
   
+ +static void lapic_register_intr(int irq)
+ +{
+ +      irq_desc[irq].status &= ~IRQ_LEVEL;
+ +      set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ +                                    "edge");
+ +}
+ +
   static void __init setup_nmi(void)
   {
         /*
@@@ -1674,7 -1659,6 +1674,7 @@@ static inline void __init check_timer(v
         struct irq_cfg *cfg = irq_cfg + 0;
         int apic1, pin1, apic2, pin2;
         unsigned long flags;
+ +      int no_pin1 = 0;
   
         local_irq_save(flags);
   
@@@ -1685,11 -1669,16 +1685,11 @@@
         assign_irq_vector(0, TARGET_CPUS);
   
         /*
- -       * Subtle, code in do_timer_interrupt() expects an AEOI
- -       * mode for the 8259A whenever interrupts are routed
- -       * through I/O APICs.  Also IRQ0 has to be enabled in
- -       * the 8259A which implies the virtual wire has to be
- -       * disabled in the local APIC.
+ +       * As IRQ0 is to be enabled in the 8259A, the virtual
+ +       * wire has to be disabled in the local APIC.
          */
         apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
         init_8259A(1);
- -      if (timer_over_8254 > 0)
- -              enable_8259A_irq(0);
   
         pin1  = find_isa_irq_pin(0, mp_INT);
         apic1 = find_isa_irq_apic(0, mp_INT);
@@@ -1699,33 -1688,15 +1699,33 @@@
         apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
                 cfg->vector, apic1, pin1, apic2, pin2);
   
+ +      /*
+ +       * Some BIOS writers are clueless and report the ExtINTA
+ +       * I/O APIC input from the cascaded 8259A as the timer
+ +       * interrupt input.  So just in case, if only one pin
+ +       * was found above, try it both directly and through the
+ +       * 8259A.
+ +       */
+ +      if (pin1 == -1) {
+ +              pin1 = pin2;
+ +              apic1 = apic2;
+ +              no_pin1 = 1;
+ +      } else if (pin2 == -1) {
+ +              pin2 = pin1;
+ +              apic2 = apic1;
+ +      }
+ +
         if (pin1 != -1) {
                 /*
                  * Ok, does IRQ0 through the IOAPIC work?
                  */
+ +              if (no_pin1) {
+ +                      add_pin_to_irq(0, apic1, pin1);
+ +                      setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+ +              }
                 unmask_IO_APIC_irq(0);
                 if (!no_timer_check && timer_irq_works()) {
- -                      nmi_watchdog_default();
                         if (nmi_watchdog == NMI_IO_APIC) {
- -                              disable_8259A_irq(0);
                                 setup_nmi();
                                 enable_8259A_irq(0);
                         }
@@@ -1734,48 -1705,43 +1734,48 @@@
                         goto out;
                 }
                 clear_IO_APIC_pin(apic1, pin1);
- -              apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
- -                              "connected to IO-APIC\n");
- -      }
+ +              if (!no_pin1)
+ +                      apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
+ +                                  "8254 timer not connected to IO-APIC\n");
   
- -      apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
- -                              "through the 8259A ... ");
- -      if (pin2 != -1) {
+ +              apic_printk(APIC_VERBOSE,KERN_INFO
+ +                      "...trying to set up timer (IRQ0) "
+ +                      "through the 8259A ... ");
                 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
                         apic2, pin2);
                 /*
                  * legacy devices should be connected to IO APIC #0
                  */
- -              setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
+ +              replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ +              setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+ +              unmask_IO_APIC_irq(0);
+ +              enable_8259A_irq(0);
                 if (timer_irq_works()) {
                         apic_printk(APIC_VERBOSE," works.\n");
- -                      nmi_watchdog_default();
+ +                      timer_through_8259 = 1;
                         if (nmi_watchdog == NMI_IO_APIC) {
+ +                              disable_8259A_irq(0);
                                 setup_nmi();
+ +                              enable_8259A_irq(0);
                         }
                         goto out;
                 }
                 /*
                  * Cleanup, just in case ...
                  */
+ +              disable_8259A_irq(0);
                 clear_IO_APIC_pin(apic2, pin2);
+ +              apic_printk(APIC_VERBOSE," failed.\n");
         }
- -      apic_printk(APIC_VERBOSE," failed.\n");
   
         if (nmi_watchdog == NMI_IO_APIC) {
                 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
- -              nmi_watchdog = 0;
+ +              nmi_watchdog = NMI_NONE;
         }
   
         apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
   
- -      disable_8259A_irq(0);
- -      irq_desc[0].chip = &lapic_irq_type;
+ +      lapic_register_intr(0);
         apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
         enable_8259A_irq(0);
   
@@@ -1783,7 -1749,6 +1783,7 @@@
                 apic_printk(APIC_VERBOSE," works.\n");
                 goto out;
         }
+ +      disable_8259A_irq(0);
         apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
         apic_printk(APIC_VERBOSE," failed.\n");
   
@@@ -1813,21 -1778,11 +1813,21 @@@ static int __init notimercheck(char *s
   __setup("no_timer_check", notimercheck);
   
   /*
- - *
- - * IRQs that are handled by the PIC in the MPS IOAPIC case.
- - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- - *   Linux doesn't really care, as it's not actually used
- - *   for any interrupt handling anyway.
+ + * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ + * to devices.  However there may be an I/O APIC pin available for
+ + * this interrupt regardless.  The pin may be left unconnected, but
+ + * typically it will be reused as an ExtINT cascade interrupt for
+ + * the master 8259A.  In the MPS case such a pin will normally be
+ + * reported as an ExtINT interrupt in the MP table.  With ACPI
+ + * there is no provision for ExtINT interrupts, and in the absence
+ + * of an override it would be treated as an ordinary ISA I/O APIC
+ + * interrupt, that is edge-triggered and unmasked by default.  We
+ + * used to do this, but it caused problems on some systems because
+ + * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ + * the same ExtINT cascade interrupt to drive the local APIC of the
+ + * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ + * the I/O APIC in all cases now.  No actual device should request
+ + * it anyway.  --macro
    */
   #define PIC_IRQS      (1<<2)
   
@@@ -1838,7 -1793,10 +1838,7 @@@ void __init setup_IO_APIC(void
          * calling enable_IO_APIC() is moved to setup_local_APIC for BP
          */
   
- -      if (acpi_ioapic)
- -              io_apic_irqs = ~0;      /* all IRQs go through IOAPIC */
- -      else
- -              io_apic_irqs = ~PIC_IRQS;
+ +      io_apic_irqs = ~PIC_IRQS;
   
         apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
   
@@@ -1883,8 -1841,8 +1883,8 @@@ static int ioapic_resume(struct sys_dev
   
         spin_lock_irqsave(&ioapic_lock, flags);
         reg_00.raw = io_apic_read(dev->id, 0);
- -      if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
- -              reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ +      if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+ +              reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
                 io_apic_write(dev->id, 0, reg_00.raw);
         }
         spin_unlock_irqrestore(&ioapic_lock, flags);
@@@ -2284,8 -2242,8 +2284,8 @@@ int acpi_get_override_irq(int bus_irq, 
                 return -1;
   
         for (i = 0; i < mp_irq_entries; i++)
- -              if (mp_irqs[i].mpc_irqtype == mp_INT &&
- -                  mp_irqs[i].mpc_srcbusirq == bus_irq)
+ +              if (mp_irqs[i].mp_irqtype == mp_INT &&
+ +                  mp_irqs[i].mp_srcbusirq == bus_irq)
                         break;
         if (i >= mp_irq_entries)
                 return -1;
@@@ -2378,7 -2336,7 +2378,7 @@@ void __init ioapic_init_mappings(void
         ioapic_res = ioapic_setup_resources();
         for (i = 0; i < nr_ioapics; i++) {
                 if (smp_found_config) {
- -                      ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+ +                      ioapic_phys = mp_ioapics[i].mp_apicaddr;
                 } else {
                         ioapic_phys = (unsigned long)
                                 alloc_bootmem_pages(PAGE_SIZE);
diff --combined arch/x86/kernel/irqinit_64.c

index 31f49e8f46a7997aa2fbf8280f545ce318868c9f,0000000000000000000000000000000000000000..0373e88de95ab5be0760b9adb34ace988d253fcc

mode 100644,000000..100644
--- 1/arch/x86/kernel/irqinit_64.c
--- /dev/null
+++ b/arch/x86/kernel/irqinit_64.c
@@@ -1,217 -1,0 +1,221 @@@
+ +#include <linux/linkage.h>
+ +#include <linux/errno.h>
+ +#include <linux/signal.h>
+ +#include <linux/sched.h>
+ +#include <linux/ioport.h>
+ +#include <linux/interrupt.h>
+ +#include <linux/timex.h>
+ +#include <linux/slab.h>
+ +#include <linux/random.h>
+ +#include <linux/init.h>
+ +#include <linux/kernel_stat.h>
+ +#include <linux/sysdev.h>
+ +#include <linux/bitops.h>
+ +
+ +#include <asm/acpi.h>
+ +#include <asm/atomic.h>
+ +#include <asm/system.h>
+ +#include <asm/io.h>
+ +#include <asm/hw_irq.h>
+ +#include <asm/pgtable.h>
+ +#include <asm/delay.h>
+ +#include <asm/desc.h>
+ +#include <asm/apic.h>
+ +#include <asm/i8259.h>
+ +
+ +/*
+ + * Common place to define all x86 IRQ vectors
+ + *
+ + * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ + *
+ + * These macros create the low-level assembly IRQ routines that save
+ + * register context and call do_IRQ(). do_IRQ() then does all the
+ + * operations that are needed to keep the AT (or SMP IOAPIC)
+ + * interrupt-controller happy.
+ + */
+ +
+ +#define IRQ_NAME2(nr) nr##_interrupt(void)
+ +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+ +
+ +/*
+ + *    SMP has a few special interrupts for IPI messages
+ + */
+ +
+ +#define BUILD_IRQ(nr)                         \
+ +      asmlinkage void IRQ_NAME(nr);           \
+ +      asm("\n.p2align\n"                      \
+ +          "IRQ" #nr "_interrupt:\n\t"         \
+ +          "push $~(" #nr ") ; "               \
+ +          "jmp common_interrupt");
+ +
+ +#define BI(x,y) \
+ +      BUILD_IRQ(x##y)
+ +
+ +#define BUILD_16_IRQS(x) \
+ +      BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+ +      BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+ +      BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+ +      BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+ +
+ +/*
+ + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ + * (these are usually mapped to vectors 0x30-0x3f)
+ + */
+ +
+ +/*
+ + * The IO-APIC gives us many more interrupt sources. Most of these
+ + * are unused but an SMP system is supposed to have enough memory ...
+ + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ + * across the spectrum, so we really want to be prepared to get all
+ + * of these. Plus, more powerful systems might have more than 64
+ + * IO-APIC registers.
+ + *
+ + * (these are usually mapped into the 0x30-0xff vector range)
+ + */
+ +                                    BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+ +BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+ +BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+ +BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
+ +
+ +#undef BUILD_16_IRQS
+ +#undef BI
+ +
+ +
+ +#define IRQ(x,y) \
+ +      IRQ##x##y##_interrupt
+ +
+ +#define IRQLIST_16(x) \
+ +      IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+ +      IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+ +      IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+ +      IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+ +
+ +/* for the irq vectors */
+ +static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
+ +                                        IRQLIST_16(0x2), IRQLIST_16(0x3),
+ +      IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+ +      IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+ +      IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
+ +};
+ +
+ +#undef IRQ
+ +#undef IRQLIST_16
+ +
+ +
+ +
+ +
+ +/*
+ + * IRQ2 is cascade interrupt to second interrupt controller
+ + */
+ +
+ +static struct irqaction irq2 = {
+ +      .handler = no_action,
+ +      .mask = CPU_MASK_NONE,
+ +      .name = "cascade",
+ +};
+ +DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+ +      [0 ... IRQ0_VECTOR - 1] = -1,
+ +      [IRQ0_VECTOR] = 0,
+ +      [IRQ1_VECTOR] = 1,
+ +      [IRQ2_VECTOR] = 2,
+ +      [IRQ3_VECTOR] = 3,
+ +      [IRQ4_VECTOR] = 4,
+ +      [IRQ5_VECTOR] = 5,
+ +      [IRQ6_VECTOR] = 6,
+ +      [IRQ7_VECTOR] = 7,
+ +      [IRQ8_VECTOR] = 8,
+ +      [IRQ9_VECTOR] = 9,
+ +      [IRQ10_VECTOR] = 10,
+ +      [IRQ11_VECTOR] = 11,
+ +      [IRQ12_VECTOR] = 12,
+ +      [IRQ13_VECTOR] = 13,
+ +      [IRQ14_VECTOR] = 14,
+ +      [IRQ15_VECTOR] = 15,
+ +      [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+ +};
+ +
+ +static void __init init_ISA_irqs (void)
+ +{
+ +      int i;
+ +
+ +      init_bsp_APIC();
+ +      init_8259A(0);
+ +
+ +      for (i = 0; i < NR_IRQS; i++) {
+ +              irq_desc[i].status = IRQ_DISABLED;
+ +              irq_desc[i].action = NULL;
+ +              irq_desc[i].depth = 1;
+ +
+ +              if (i < 16) {
+ +                      /*
+ +                       * 16 old-style INTA-cycle interrupts:
+ +                       */
+ +                      set_irq_chip_and_handler_name(i, &i8259A_chip,
+ +                                                    handle_level_irq, "XT");
+ +              } else {
+ +                      /*
+ +                       * 'high' PCI IRQs filled in on demand
+ +                       */
+ +                      irq_desc[i].chip = &no_irq_chip;
+ +              }
+ +      }
+ +}
+ +
+ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+ +
+ +void __init native_init_IRQ(void)
+ +{
+ +      int i;
+ +
+ +      init_ISA_irqs();
+ +      /*
+ +       * Cover the whole vector space, no vector can escape
+ +       * us. (some of these will be overridden and become
+ +       * 'special' SMP interrupts)
+ +       */
+ +      for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ +              int vector = FIRST_EXTERNAL_VECTOR + i;
+ +              if (vector != IA32_SYSCALL_VECTOR)
+ +                      set_intr_gate(vector, interrupt[i]);
+ +      }
+ +
+ +#ifdef CONFIG_SMP
+ +      /*
+ +       * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ +       * IPI, driven by wakeup.
+ +       */
+ +      alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+ +
+ +      /* IPIs for invalidation */
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+ +      alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+ +
+ +      /* IPI for generic function call */
+ +      alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ +
++      /* IPI for generic single function call */
++      alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
++                      call_function_single_interrupt);
++
+ +      /* Low priority IPI to cleanup after moving an irq */
+ +      set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+ +#endif
+ +      alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+ +      alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+ +
+ +      /* self generated IPI for local APIC timer */
+ +      alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ +
+ +      /* IPI vectors for APIC spurious and error interrupts */
+ +      alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ +      alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+ +
+ +      if (!acpi_ioapic)
+ +              setup_irq(2, &irq2);
+ +}
diff --combined arch/x86/kernel/ldt.c

index 21f2bae98c157730eff158c1f921ce3aa23882b3,cb0a6398c64baa5678160d1e818ce9951b13d77c..a8449571858ae9dae444076d7c6b28daff795ee9
--- 1/arch/x86/kernel/ldt.c
--- 2/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@@ -20,9 -20,9 +20,9 @@@
   #include <asm/mmu_context.h>
   
   #ifdef CONFIG_SMP
- -static void flush_ldt(void *null)
+ +static void flush_ldt(void *current_mm)
   {
- -      if (current->active_mm)
+ +      if (current->active_mm == current_mm)
                 load_LDT(&current->active_mm->context);
   }
   #endif
@@@ -68,7 -68,7 +68,7 @@@ static int alloc_ldt(mm_context_t *pc, 
                 load_LDT(pc);
                 mask = cpumask_of_cpu(smp_processor_id());
                 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
-                       smp_call_function(flush_ldt, current->mm, 1, 1);
- -                      smp_call_function(flush_ldt, NULL, 1);
++                      smp_call_function(flush_ldt, current->mm, 1);
                 preempt_enable();
   #else
                 load_LDT(pc);
diff --combined arch/x86/kernel/nmi.c

index 716b89284be02841cf74e81a73ad0d0a203cbfca,0000000000000000000000000000000000000000..ec024b3baad0764821c036d0aa2552397f76f017

mode 100644,000000..100644
--- 1/arch/x86/kernel/nmi.c
--- /dev/null
+++ b/arch/x86/kernel/nmi.c
@@@ -1,516 -1,0 +1,516 @@@
-               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+ +/*
+ + *  NMI watchdog support on APIC systems
+ + *
+ + *  Started by Ingo Molnar <mingo@redhat.com>
+ + *
+ + *  Fixes:
+ + *  Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
+ + *  Mikael Pettersson : Power Management for local APIC NMI watchdog.
+ + *  Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
+ + *  Pavel Machek and
+ + *  Mikael Pettersson : PM converted to driver model. Disable/enable API.
+ + */
+ +
+ +#include <asm/apic.h>
+ +
+ +#include <linux/nmi.h>
+ +#include <linux/mm.h>
+ +#include <linux/delay.h>
+ +#include <linux/interrupt.h>
+ +#include <linux/module.h>
+ +#include <linux/sysdev.h>
+ +#include <linux/sysctl.h>
+ +#include <linux/percpu.h>
+ +#include <linux/kprobes.h>
+ +#include <linux/cpumask.h>
+ +#include <linux/kernel_stat.h>
+ +#include <linux/kdebug.h>
+ +#include <linux/smp.h>
+ +
+ +#include <asm/i8259.h>
+ +#include <asm/io_apic.h>
+ +#include <asm/smp.h>
+ +#include <asm/nmi.h>
+ +#include <asm/proto.h>
+ +#include <asm/timer.h>
+ +
+ +#include <asm/mce.h>
+ +
+ +#include <mach_traps.h>
+ +
+ +int unknown_nmi_panic;
+ +int nmi_watchdog_enabled;
+ +
+ +static cpumask_t backtrace_mask = CPU_MASK_NONE;
+ +
+ +/* nmi_active:
+ + * >0: the lapic NMI watchdog is active, but can be disabled
+ + * <0: the lapic NMI watchdog has not been set up, and cannot
+ + *     be enabled
+ + *  0: the lapic NMI watchdog is disabled, but can be enabled
+ + */
+ +atomic_t nmi_active = ATOMIC_INIT(0);         /* oprofile uses this */
+ +EXPORT_SYMBOL(nmi_active);
+ +
+ +unsigned int nmi_watchdog = NMI_NONE;
+ +EXPORT_SYMBOL(nmi_watchdog);
+ +
+ +static int panic_on_timeout;
+ +
+ +static unsigned int nmi_hz = HZ;
+ +static DEFINE_PER_CPU(short, wd_enabled);
+ +static int endflag __initdata;
+ +
+ +static inline unsigned int get_nmi_count(int cpu)
+ +{
+ +#ifdef CONFIG_X86_64
+ +      return cpu_pda(cpu)->__nmi_count;
+ +#else
+ +      return nmi_count(cpu);
+ +#endif
+ +}
+ +
+ +static inline int mce_in_progress(void)
+ +{
+ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+ +      return atomic_read(&mce_entry) > 0;
+ +#endif
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Take the local apic timer and PIT/HPET into account. We don't
+ + * know which one is active, when we have highres/dyntick on
+ + */
+ +static inline unsigned int get_timer_irqs(int cpu)
+ +{
+ +#ifdef CONFIG_X86_64
+ +      return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
+ +#else
+ +      return per_cpu(irq_stat, cpu).apic_timer_irqs +
+ +              per_cpu(irq_stat, cpu).irq0_irqs;
+ +#endif
+ +}
+ +
+ +#ifdef CONFIG_SMP
+ +/*
+ + * The performance counters used by NMI_LOCAL_APIC don't trigger when
+ + * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ + * CPUs during the test make them busy.
+ + */
+ +static __init void nmi_cpu_busy(void *data)
+ +{
+ +      local_irq_enable_in_hardirq();
+ +      /*
+ +       * Intentionally don't use cpu_relax here. This is
+ +       * to make sure that the performance counter really ticks,
+ +       * even if there is a simulator or similar that catches the
+ +       * pause instruction. On a real HT machine this is fine because
+ +       * all other CPUs are busy with "useless" delay loops and don't
+ +       * care if they get somewhat less cycles.
+ +       */
+ +      while (endflag == 0)
+ +              mb();
+ +}
+ +#endif
+ +
+ +int __init check_nmi_watchdog(void)
+ +{
+ +      unsigned int *prev_nmi_count;
+ +      int cpu;
+ +
+ +      if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
+ +              return 0;
+ +
+ +      prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
+ +      if (!prev_nmi_count)
+ +              goto error;
+ +
+ +      printk(KERN_INFO "Testing NMI watchdog ... ");
+ +
+ +#ifdef CONFIG_SMP
+ +      if (nmi_watchdog == NMI_LOCAL_APIC)
-               on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
++              smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
+ +#endif
+ +
+ +      for_each_possible_cpu(cpu)
+ +              prev_nmi_count[cpu] = get_nmi_count(cpu);
+ +      local_irq_enable();
+ +      mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
+ +
+ +      for_each_online_cpu(cpu) {
+ +              if (!per_cpu(wd_enabled, cpu))
+ +                      continue;
+ +              if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
+ +                      printk(KERN_WARNING "WARNING: CPU#%d: NMI "
+ +                              "appears to be stuck (%d->%d)!\n",
+ +                              cpu,
+ +                              prev_nmi_count[cpu],
+ +                              get_nmi_count(cpu));
+ +                      per_cpu(wd_enabled, cpu) = 0;
+ +                      atomic_dec(&nmi_active);
+ +              }
+ +      }
+ +      endflag = 1;
+ +      if (!atomic_read(&nmi_active)) {
+ +              kfree(prev_nmi_count);
+ +              atomic_set(&nmi_active, -1);
+ +              goto error;
+ +      }
+ +      printk("OK.\n");
+ +
+ +      /*
+ +       * now that we know it works we can reduce NMI frequency to
+ +       * something more reasonable; makes a difference in some configs
+ +       */
+ +      if (nmi_watchdog == NMI_LOCAL_APIC)
+ +              nmi_hz = lapic_adjust_nmi_hz(1);
+ +
+ +      kfree(prev_nmi_count);
+ +      return 0;
+ +error:
+ +      if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
+ +              disable_8259A_irq(0);
+ +#ifdef CONFIG_X86_32
+ +      timer_ack = 0;
+ +#endif
+ +      return -1;
+ +}
+ +
+ +static int __init setup_nmi_watchdog(char *str)
+ +{
+ +      unsigned int nmi;
+ +
+ +      if (!strncmp(str, "panic", 5)) {
+ +              panic_on_timeout = 1;
+ +              str = strchr(str, ',');
+ +              if (!str)
+ +                      return 1;
+ +              ++str;
+ +      }
+ +
+ +      get_option(&str, &nmi);
+ +
+ +      if (nmi >= NMI_INVALID)
+ +              return 0;
+ +
+ +      nmi_watchdog = nmi;
+ +      return 1;
+ +}
+ +__setup("nmi_watchdog=", setup_nmi_watchdog);
+ +
+ +/*
+ + * Suspend/resume support
+ + */
+ +#ifdef CONFIG_PM
+ +
+ +static int nmi_pm_active; /* nmi_active before suspend */
+ +
+ +static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
+ +{
+ +      /* only CPU0 goes here, other CPUs should be offline */
+ +      nmi_pm_active = atomic_read(&nmi_active);
+ +      stop_apic_nmi_watchdog(NULL);
+ +      BUG_ON(atomic_read(&nmi_active) != 0);
+ +      return 0;
+ +}
+ +
+ +static int lapic_nmi_resume(struct sys_device *dev)
+ +{
+ +      /* only CPU0 goes here, other CPUs should be offline */
+ +      if (nmi_pm_active > 0) {
+ +              setup_apic_nmi_watchdog(NULL);
+ +              touch_nmi_watchdog();
+ +      }
+ +      return 0;
+ +}
+ +
+ +static struct sysdev_class nmi_sysclass = {
+ +      .name           = "lapic_nmi",
+ +      .resume         = lapic_nmi_resume,
+ +      .suspend        = lapic_nmi_suspend,
+ +};
+ +
+ +static struct sys_device device_lapic_nmi = {
+ +      .id     = 0,
+ +      .cls    = &nmi_sysclass,
+ +};
+ +
+ +static int __init init_lapic_nmi_sysfs(void)
+ +{
+ +      int error;
+ +
+ +      /*
+ +       * should really be a BUG_ON but b/c this is an
+ +       * init call, it just doesn't work.  -dcz
+ +       */
+ +      if (nmi_watchdog != NMI_LOCAL_APIC)
+ +              return 0;
+ +
+ +      if (atomic_read(&nmi_active) < 0)
+ +              return 0;
+ +
+ +      error = sysdev_class_register(&nmi_sysclass);
+ +      if (!error)
+ +              error = sysdev_register(&device_lapic_nmi);
+ +      return error;
+ +}
+ +
+ +/* must come after the local APIC's device_initcall() */
+ +late_initcall(init_lapic_nmi_sysfs);
+ +
+ +#endif        /* CONFIG_PM */
+ +
+ +static void __acpi_nmi_enable(void *__unused)
+ +{
+ +      apic_write_around(APIC_LVT0, APIC_DM_NMI);
+ +}
+ +
+ +/*
+ + * Enable timer based NMIs on all CPUs:
+ + */
+ +void acpi_nmi_enable(void)
+ +{
+ +      if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
++              on_each_cpu(__acpi_nmi_enable, NULL, 1);
+ +}
+ +
+ +static void __acpi_nmi_disable(void *__unused)
+ +{
+ +      apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+ +}
+ +
+ +/*
+ + * Disable timer based NMIs on all CPUs:
+ + */
+ +void acpi_nmi_disable(void)
+ +{
+ +      if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
++              on_each_cpu(__acpi_nmi_disable, NULL, 1);
+ +}
+ +
+ +void setup_apic_nmi_watchdog(void *unused)
+ +{
+ +      if (__get_cpu_var(wd_enabled))
+ +              return;
+ +
+ +      /* cheap hack to support suspend/resume */
+ +      /* if cpu0 is not active neither should the other cpus */
+ +      if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
+ +              return;
+ +
+ +      switch (nmi_watchdog) {
+ +      case NMI_LOCAL_APIC:
+ +               /* enable it before to avoid race with handler */
+ +              __get_cpu_var(wd_enabled) = 1;
+ +              if (lapic_watchdog_init(nmi_hz) < 0) {
+ +                      __get_cpu_var(wd_enabled) = 0;
+ +                      return;
+ +              }
+ +              /* FALL THROUGH */
+ +      case NMI_IO_APIC:
+ +              __get_cpu_var(wd_enabled) = 1;
+ +              atomic_inc(&nmi_active);
+ +      }
+ +}
+ +
+ +void stop_apic_nmi_watchdog(void *unused)
+ +{
+ +      /* only support LOCAL and IO APICs for now */
+ +      if (!nmi_watchdog_active())
+ +              return;
+ +      if (__get_cpu_var(wd_enabled) == 0)
+ +              return;
+ +      if (nmi_watchdog == NMI_LOCAL_APIC)
+ +              lapic_watchdog_stop();
+ +      __get_cpu_var(wd_enabled) = 0;
+ +      atomic_dec(&nmi_active);
+ +}
+ +
+ +/*
+ + * the best way to detect whether a CPU has a 'hard lockup' problem
+ + * is to check it's local APIC timer IRQ counts. If they are not
+ + * changing then that CPU has some problem.
+ + *
+ + * as these watchdog NMI IRQs are generated on every CPU, we only
+ + * have to check the current processor.
+ + *
+ + * since NMIs don't listen to _any_ locks, we have to be extremely
+ + * careful not to rely on unsafe variables. The printk might lock
+ + * up though, so we have to break up any console locks first ...
+ + * [when there will be more tty-related locks, break them up here too!]
+ + */
+ +
+ +static DEFINE_PER_CPU(unsigned, last_irq_sum);
+ +static DEFINE_PER_CPU(local_t, alert_counter);
+ +static DEFINE_PER_CPU(int, nmi_touch);
+ +
+ +void touch_nmi_watchdog(void)
+ +{
+ +      if (nmi_watchdog_active()) {
+ +              unsigned cpu;
+ +
+ +              /*
+ +               * Tell other CPUs to reset their alert counters. We cannot
+ +               * do it ourselves because the alert count increase is not
+ +               * atomic.
+ +               */
+ +              for_each_present_cpu(cpu) {
+ +                      if (per_cpu(nmi_touch, cpu) != 1)
+ +                              per_cpu(nmi_touch, cpu) = 1;
+ +              }
+ +      }
+ +
+ +      /*
+ +       * Tickle the softlockup detector too:
+ +       */
+ +      touch_softlockup_watchdog();
+ +}
+ +EXPORT_SYMBOL(touch_nmi_watchdog);
+ +
+ +notrace __kprobes int
+ +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+ +{
+ +      /*
+ +       * Since current_thread_info()-> is always on the stack, and we
+ +       * always switch the stack NMI-atomically, it's safe to use
+ +       * smp_processor_id().
+ +       */
+ +      unsigned int sum;
+ +      int touched = 0;
+ +      int cpu = smp_processor_id();
+ +      int rc = 0;
+ +
+ +      /* check for other users first */
+ +      if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+ +                      == NOTIFY_STOP) {
+ +              rc = 1;
+ +              touched = 1;
+ +      }
+ +
+ +      sum = get_timer_irqs(cpu);
+ +
+ +      if (__get_cpu_var(nmi_touch)) {
+ +              __get_cpu_var(nmi_touch) = 0;
+ +              touched = 1;
+ +      }
+ +
+ +      if (cpu_isset(cpu, backtrace_mask)) {
+ +              static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
+ +
+ +              spin_lock(&lock);
+ +              printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+ +              dump_stack();
+ +              spin_unlock(&lock);
+ +              cpu_clear(cpu, backtrace_mask);
+ +      }
+ +
+ +      /* Could check oops_in_progress here too, but it's safer not to */
+ +      if (mce_in_progress())
+ +              touched = 1;
+ +
+ +      /* if the none of the timers isn't firing, this cpu isn't doing much */
+ +      if (!touched && __get_cpu_var(last_irq_sum) == sum) {
+ +              /*
+ +               * Ayiee, looks like this CPU is stuck ...
+ +               * wait a few IRQs (5 seconds) before doing the oops ...
+ +               */
+ +              local_inc(&__get_cpu_var(alert_counter));
+ +              if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+ +                      /*
+ +                       * die_nmi will return ONLY if NOTIFY_STOP happens..
+ +                       */
+ +                      die_nmi("BUG: NMI Watchdog detected LOCKUP",
+ +                              regs, panic_on_timeout);
+ +      } else {
+ +              __get_cpu_var(last_irq_sum) = sum;
+ +              local_set(&__get_cpu_var(alert_counter), 0);
+ +      }
+ +
+ +      /* see if the nmi watchdog went off */
+ +      if (!__get_cpu_var(wd_enabled))
+ +              return rc;
+ +      switch (nmi_watchdog) {
+ +      case NMI_LOCAL_APIC:
+ +              rc |= lapic_wd_event(nmi_hz);
+ +              break;
+ +      case NMI_IO_APIC:
+ +              /*
+ +               * don't know how to accurately check for this.
+ +               * just assume it was a watchdog timer interrupt
+ +               * This matches the old behaviour.
+ +               */
+ +              rc = 1;
+ +              break;
+ +      }
+ +      return rc;
+ +}
+ +
+ +#ifdef CONFIG_SYSCTL
+ +
+ +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
+ +{
+ +      unsigned char reason = get_nmi_reason();
+ +      char buf[64];
+ +
+ +      sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+ +      die_nmi(buf, regs, 1); /* Always panic here */
+ +      return 0;
+ +}
+ +
+ +/*
+ + * proc handler for /proc/sys/kernel/nmi
+ + */
+ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+ +                      void __user *buffer, size_t *length, loff_t *ppos)
+ +{
+ +      int old_state;
+ +
+ +      nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+ +      old_state = nmi_watchdog_enabled;
+ +      proc_dointvec(table, write, file, buffer, length, ppos);
+ +      if (!!old_state == !!nmi_watchdog_enabled)
+ +              return 0;
+ +
+ +      if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
+ +              printk(KERN_WARNING
+ +                      "NMI watchdog is permanently disabled\n");
+ +              return -EIO;
+ +      }
+ +
+ +      if (nmi_watchdog == NMI_LOCAL_APIC) {
+ +              if (nmi_watchdog_enabled)
+ +                      enable_lapic_nmi_watchdog();
+ +              else
+ +                      disable_lapic_nmi_watchdog();
+ +      } else {
+ +              printk(KERN_WARNING
+ +                      "NMI watchdog doesn't know what hardware to touch\n");
+ +              return -EIO;
+ +      }
+ +      return 0;
+ +}
+ +
+ +#endif /* CONFIG_SYSCTL */
+ +
+ +int do_nmi_callback(struct pt_regs *regs, int cpu)
+ +{
+ +#ifdef CONFIG_SYSCTL
+ +      if (unknown_nmi_panic)
+ +              return unknown_nmi_panic_callback(regs, cpu);
+ +#endif
+ +      return 0;
+ +}
+ +
+ +void __trigger_all_cpu_backtrace(void)
+ +{
+ +      int i;
+ +
+ +      backtrace_mask = cpu_online_map;
+ +      /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+ +      for (i = 0; i < 10 * 1000; i++) {
+ +              if (cpus_empty(backtrace_mask))
+ +                      break;
+ +              mdelay(1);
+ +      }
+ +}
diff --combined arch/x86/kernel/process.c

index 4061d63aabe74bb12f32f5d162c564240ae8d8a5,2dad8fef391cd338d2aaa0e58ec97d362258be04..7dceea947232f5739b7844c10ba9c2b22f336e95
--- 1/arch/x86/kernel/process.c
--- 2/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@@ -6,7 -6,6 +6,7 @@@
   #include <linux/sched.h>
   #include <linux/module.h>
   #include <linux/pm.h>
+ +#include <linux/clockchips.h>
   
   struct kmem_cache *task_xstate_cachep;
   
@@@ -46,76 -45,6 +46,76 @@@ void arch_task_cache_init(void
                                   SLAB_PANIC, NULL);
   }
   
+ +/*
+ + * Idle related variables and functions
+ + */
+ +unsigned long boot_option_idle_override = 0;
+ +EXPORT_SYMBOL(boot_option_idle_override);
+ +
+ +/*
+ + * Powermanagement idle function, if any..
+ + */
+ +void (*pm_idle)(void);
+ +EXPORT_SYMBOL(pm_idle);
+ +
+ +#ifdef CONFIG_X86_32
+ +/*
+ + * This halt magic was a workaround for ancient floppy DMA
+ + * wreckage. It should be safe to remove.
+ + */
+ +static int hlt_counter;
+ +void disable_hlt(void)
+ +{
+ +      hlt_counter++;
+ +}
+ +EXPORT_SYMBOL(disable_hlt);
+ +
+ +void enable_hlt(void)
+ +{
+ +      hlt_counter--;
+ +}
+ +EXPORT_SYMBOL(enable_hlt);
+ +
+ +static inline int hlt_use_halt(void)
+ +{
+ +      return (!hlt_counter && boot_cpu_data.hlt_works_ok);
+ +}
+ +#else
+ +static inline int hlt_use_halt(void)
+ +{
+ +      return 1;
+ +}
+ +#endif
+ +
+ +/*
+ + * We use this if we don't have any better
+ + * idle routine..
+ + */
+ +void default_idle(void)
+ +{
+ +      if (hlt_use_halt()) {
+ +              current_thread_info()->status &= ~TS_POLLING;
+ +              /*
+ +               * TS_POLLING-cleared state must be visible before we
+ +               * test NEED_RESCHED:
+ +               */
+ +              smp_mb();
+ +
+ +              if (!need_resched())
+ +                      safe_halt();    /* enables interrupts racelessly */
+ +              else
+ +                      local_irq_enable();
+ +              current_thread_info()->status |= TS_POLLING;
+ +      } else {
+ +              local_irq_enable();
+ +              /* loop is done by the caller */
+ +              cpu_relax();
+ +      }
+ +}
+ +#ifdef CONFIG_APM_MODULE
+ +EXPORT_SYMBOL(default_idle);
+ +#endif
+ +
   static void do_nothing(void *unused)
   {
   }
@@@ -132,7 -61,7 +132,7 @@@ void cpu_idle_wait(void
   {
         smp_mb();
         /* kick all the CPUs so that they exit out of pm_idle */
-       smp_call_function(do_nothing, NULL, 0, 1);
+       smp_call_function(do_nothing, NULL, 1);
   }
   EXPORT_SYMBOL_GPL(cpu_idle_wait);
   
@@@ -193,129 -122,44 +193,129 @@@ static void poll_idle(void
    *
    * idle=mwait overrides this decision and forces the usage of mwait.
    */
+ +
+ +#define MWAIT_INFO                    0x05
+ +#define MWAIT_ECX_EXTENDED_INFO               0x01
+ +#define MWAIT_EDX_C1                  0xf0
+ +
   static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
   {
+ +      u32 eax, ebx, ecx, edx;
+ +
         if (force_mwait)
                 return 1;
   
- -      if (c->x86_vendor == X86_VENDOR_AMD) {
- -              switch(c->x86) {
- -              case 0x10:
- -              case 0x11:
- -                      return 0;
- -              }
- -      }
+ +      if (c->cpuid_level < MWAIT_INFO)
+ +              return 0;
+ +
+ +      cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
+ +      /* Check, whether EDX has extended info about MWAIT */
+ +      if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
+ +              return 1;
+ +
+ +      /*
+ +       * edx enumeratios MONITOR/MWAIT extensions. Check, whether
+ +       * C1  supports MWAIT
+ +       */
+ +      return (edx & MWAIT_EDX_C1);
+ +}
+ +
+ +/*
+ + * Check for AMD CPUs, which have potentially C1E support
+ + */
+ +static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
+ +{
+ +      if (c->x86_vendor != X86_VENDOR_AMD)
+ +              return 0;
+ +
+ +      if (c->x86 < 0x0F)
+ +              return 0;
+ +
+ +      /* Family 0x0f models < rev F do not have C1E */
+ +      if (c->x86 == 0x0f && c->x86_model < 0x40)
+ +              return 0;
+ +
         return 1;
   }
   
- -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+ +/*
+ + * C1E aware idle routine. We check for C1E active in the interrupt
+ + * pending message MSR. If we detect C1E, then we handle it the same
+ + * way as C3 power states (local apic timer and TSC stop)
+ + */
+ +static void c1e_idle(void)
   {
- -      static int selected;
+ +      static cpumask_t c1e_mask = CPU_MASK_NONE;
+ +      static int c1e_detected;
   
- -      if (selected)
+ +      if (need_resched())
                 return;
+ +
+ +      if (!c1e_detected) {
+ +              u32 lo, hi;
+ +
+ +              rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+ +              if (lo & K8_INTP_C1E_ACTIVE_MASK) {
+ +                      c1e_detected = 1;
+ +                      mark_tsc_unstable("TSC halt in C1E");
+ +                      printk(KERN_INFO "System has C1E enabled\n");
+ +              }
+ +      }
+ +
+ +      if (c1e_detected) {
+ +              int cpu = smp_processor_id();
+ +
+ +              if (!cpu_isset(cpu, c1e_mask)) {
+ +                      cpu_set(cpu, c1e_mask);
+ +                      /*
+ +                       * Force broadcast so ACPI can not interfere. Needs
+ +                       * to run with interrupts enabled as it uses
+ +                       * smp_function_call.
+ +                       */
+ +                      local_irq_enable();
+ +                      clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
+ +                                         &cpu);
+ +                      printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
+ +                             cpu);
+ +                      local_irq_disable();
+ +              }
+ +              clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+ +
+ +              default_idle();
+ +
+ +              /*
+ +               * The switch back from broadcast mode needs to be
+ +               * called with interrupts disabled.
+ +               */
+ +               local_irq_disable();
+ +               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+ +               local_irq_enable();
+ +      } else
+ +              default_idle();
+ +}
+ +
+ +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+ +{
   #ifdef CONFIG_X86_SMP
         if (pm_idle == poll_idle && smp_num_siblings > 1) {
                 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
                         " performance may degrade.\n");
         }
   #endif
+ +      if (pm_idle)
+ +              return;
+ +
         if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
                 /*
- -               * Skip, if setup has overridden idle.
                  * One CPU supports mwait => All CPUs supports mwait
                  */
- -              if (!pm_idle) {
- -                      printk(KERN_INFO "using mwait in idle threads.\n");
- -                      pm_idle = mwait_idle;
- -              }
- -      }
- -      selected = 1;
+ +              printk(KERN_INFO "using mwait in idle threads.\n");
+ +              pm_idle = mwait_idle;
+ +      } else if (check_c1e_idle(c)) {
+ +              printk(KERN_INFO "using C1E aware idle routine\n");
+ +              pm_idle = c1e_idle;
+ +      } else
+ +              pm_idle = default_idle;
   }
   
   static int __init idle_setup(char *str)
diff --combined arch/x86/kernel/smpboot.c

index f35c2d8016ac412c1c0433bf4382829c1d2cef31,89647898f54606686171e33cccfe53f2e948edf7..687376ab07e82ece4ab1eeb609d738d76245d226
--- 1/arch/x86/kernel/smpboot.c
--- 2/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -59,6 -59,7 +59,6 @@@
   #include <asm/pgtable.h>
   #include <asm/tlbflush.h>
   #include <asm/mtrr.h>
- -#include <asm/nmi.h>
   #include <asm/vmi.h>
   #include <asm/genapic.h>
   #include <linux/mc146818rtc.h>
@@@ -67,6 -68,22 +67,6 @@@
   #include <mach_wakecpu.h>
   #include <smpboot_hooks.h>
   
- -/*
- - * FIXME: For x86_64, those are defined in other files. But moving them here,
- - * would make the setup areas dependent on smp, which is a loss. When we
- - * integrate apic between arches, we can probably do a better job, but
- - * right now, they'll stay here -- glommer
- - */
- -
- -/* which logical CPU number maps to which CPU (physical APIC ID) */
- -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
- -                      { [0 ... NR_CPUS-1] = BAD_APICID };
- -void *x86_cpu_to_apicid_early_ptr;
- -
- -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
- -                              = { [0 ... NR_CPUS-1] = BAD_APICID };
- -void *x86_bios_cpu_apicid_early_ptr;
- -
   #ifdef CONFIG_X86_32
   u8 apicid_2_node[MAX_APICID];
   static int low_mappings;
@@@ -181,12 -198,13 +181,12 @@@ static void map_cpu_to_logical_apicid(v
         map_cpu_to_node(cpu, node);
   }
   
- -static void unmap_cpu_to_logical_apicid(int cpu)
+ +void numa_remove_cpu(int cpu)
   {
         cpu_2_logical_apicid[cpu] = BAD_APICID;
         unmap_cpu_to_node(cpu);
   }
   #else
- -#define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
   #define map_cpu_to_logical_apicid()  do {} while (0)
   #endif
   
@@@ -327,12 -345,19 +327,12 @@@ static void __cpuinit start_secondary(v
          * lock helps us to not include this cpu in a currently in progress
          * smp_call_function().
          */
-       lock_ipi_call_lock();
+       ipi_call_lock_irq();
- -#ifdef CONFIG_X86_64
- -      spin_lock(&vector_lock);
- -
- -      /* Setup the per cpu irq handling data structures */
- -      __setup_vector_irq(smp_processor_id());
- -      /*
- -       * Allow the master to continue.
- -       */
- -      spin_unlock(&vector_lock);
+ +#ifdef CONFIG_X86_IO_APIC
+ +      setup_vector_irq(smp_processor_id());
   #endif
         cpu_set(smp_processor_id(), cpu_online_map);
-       unlock_ipi_call_lock();
+       ipi_call_unlock_irq();
         per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
   
         setup_secondary_clock();
@@@ -341,8 -366,31 +341,8 @@@
         cpu_idle();
   }
   
- -#ifdef CONFIG_X86_32
- -/*
- - * Everything has been set up for the secondary
- - * CPUs - they just need to reload everything
- - * from the task structure
- - * This function must not return.
- - */
- -void __devinit initialize_secondary(void)
- -{
- -      /*
- -       * We don't actually need to load the full TSS,
- -       * basically just the stack pointer and the ip.
- -       */
- -
- -      asm volatile(
- -              "movl %0,%%esp\n\t"
- -              "jmp *%1"
- -              :
- -              :"m" (current->thread.sp), "m" (current->thread.ip));
- -}
- -#endif
- -
   static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
   {
- -#ifdef CONFIG_X86_32
         /*
          * Mask B, Pentium, but not Pentium MMX
          */
@@@ -392,6 -440,7 +392,6 @@@
   
   valid_k7:
         ;
- -#endif
   }
   
   static void __cpuinit smp_checks(void)
@@@ -506,6 -555,23 +506,6 @@@ cpumask_t cpu_coregroup_map(int cpu
                 return c->llc_shared_map;
   }
   
- -#ifdef CONFIG_X86_32
- -/*
- - * We are called very early to get the low memory for the
- - * SMP bootup trampoline page.
- - */
- -void __init smp_alloc_memory(void)
- -{
- -      trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
- -      /*
- -       * Has to be in very low memory so we can execute
- -       * real-mode AP code.
- -       */
- -      if (__pa(trampoline_base) >= 0x9F000)
- -              BUG();
- -}
- -#endif
- -
   static void impress_friends(void)
   {
         int cpu;
@@@ -682,7 -748,11 +682,7 @@@ wakeup_secondary_cpu(int phys_apicid, u
          * target processor state.
          */
         startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
- -#ifdef CONFIG_X86_64
- -                       (unsigned long)init_rsp);
- -#else
                          (unsigned long)stack_start.sp);
- -#endif
   
         /*
          * Run STARTUP IPI loop.
@@@ -762,45 -832,6 +762,45 @@@ static void __cpuinit do_fork_idle(stru
         complete(&c_idle->done);
   }
   
+ +#ifdef CONFIG_X86_64
+ +/*
+ + * Allocate node local memory for the AP pda.
+ + *
+ + * Must be called after the _cpu_pda pointer table is initialized.
+ + */
+ +static int __cpuinit get_local_pda(int cpu)
+ +{
+ +      struct x8664_pda *oldpda, *newpda;
+ +      unsigned long size = sizeof(struct x8664_pda);
+ +      int node = cpu_to_node(cpu);
+ +
+ +      if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
+ +              return 0;
+ +
+ +      oldpda = cpu_pda(cpu);
+ +      newpda = kmalloc_node(size, GFP_ATOMIC, node);
+ +      if (!newpda) {
+ +              printk(KERN_ERR "Could not allocate node local PDA "
+ +                      "for CPU %d on node %d\n", cpu, node);
+ +
+ +              if (oldpda)
+ +                      return 0;       /* have a usable pda */
+ +              else
+ +                      return -1;
+ +      }
+ +
+ +      if (oldpda) {
+ +              memcpy(newpda, oldpda, size);
+ +              if (!after_bootmem)
+ +                      free_bootmem((unsigned long)oldpda, size);
+ +      }
+ +
+ +      newpda->in_bootmem = 0;
+ +      cpu_pda(cpu) = newpda;
+ +      return 0;
+ +}
+ +#endif /* CONFIG_X86_64 */
+ +
   static int __cpuinit do_boot_cpu(int apicid, int cpu)
   /*
    * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@@ -817,14 -848,28 +817,14 @@@
                 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
         };
         INIT_WORK(&c_idle.work, do_fork_idle);
- -#ifdef CONFIG_X86_64
- -      /* allocate memory for gdts of secondary cpus. Hotplug is considered */
- -      if (!cpu_gdt_descr[cpu].address &&
- -              !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
- -              printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
- -              return -1;
- -      }
   
+ +#ifdef CONFIG_X86_64
         /* Allocate node local memory for AP pdas */
- -      if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- -              struct x8664_pda *newpda, *pda;
- -              int node = cpu_to_node(cpu);
- -              pda = cpu_pda(cpu);
- -              newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
- -                                    node);
- -              if (newpda) {
- -                      memcpy(newpda, pda, sizeof(struct x8664_pda));
- -                      cpu_pda(cpu) = newpda;
- -              } else
- -                      printk(KERN_ERR
- -              "Could not allocate node local PDA for CPU %d on node %d\n",
- -                              cpu, node);
+ +      if (cpu > 0) {
+ +              boot_error = get_local_pda(cpu);
+ +              if (boot_error)
+ +                      goto restore_state;
+ +                      /* if can't get pda memory, can't start cpu */
         }
   #endif
   
@@@ -860,15 -905,18 +860,15 @@@ do_rest
   #ifdef CONFIG_X86_32
         per_cpu(current_task, cpu) = c_idle.idle;
         init_gdt(cpu);
- -      early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
- -      c_idle.idle->thread.ip = (unsigned long) start_secondary;
         /* Stack for startup_32 can be just as for start_secondary onwards */
- -      stack_start.sp = (void *) c_idle.idle->thread.sp;
         irq_ctx_init(cpu);
   #else
         cpu_pda(cpu)->pcurrent = c_idle.idle;
- -      init_rsp = c_idle.idle->thread.sp;
- -      load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
- -      initial_code = (unsigned long)start_secondary;
         clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
   #endif
+ +      early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ +      initial_code = (unsigned long)start_secondary;
+ +      stack_start.sp = (void *) c_idle.idle->thread.sp;
   
         /* start_ip had better be page-aligned! */
         start_ip = setup_trampoline();
@@@ -939,14 -987,16 +939,14 @@@
                                 inquire_remote_apic(apicid);
                 }
         }
- -
- -      if (boot_error) {
- -              /* Try to put things back the way they were before ... */
- -              unmap_cpu_to_logical_apicid(cpu);
   #ifdef CONFIG_X86_64
- -              clear_node_cpumask(cpu); /* was set by numa_add_cpu */
+ +restore_state:
   #endif
+ +      if (boot_error) {
+ +              /* Try to put things back the way they were before ... */
+ +              numa_remove_cpu(cpu); /* was set by numa_add_cpu */
                 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
                 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- -              cpu_clear(cpu, cpu_possible_map);
                 cpu_clear(cpu, cpu_present_map);
                 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
         }
@@@ -1038,12 -1088,14 +1038,12 @@@ static __init void disable_smp(void
   {
         cpu_present_map = cpumask_of_cpu(0);
         cpu_possible_map = cpumask_of_cpu(0);
- -#ifdef CONFIG_X86_32
         smpboot_clear_io_apic_irqs();
- -#endif
+ +
         if (smp_found_config)
- -              phys_cpu_present_map =
- -                              physid_mask_of_physid(boot_cpu_physical_apicid);
+ +              physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
         else
- -              phys_cpu_present_map = physid_mask_of_physid(0);
+ +              physid_set_mask_of_physid(0, &phys_cpu_present_map);
         map_cpu_to_logical_apicid();
         cpu_set(0, per_cpu(cpu_sibling_map, 0));
         cpu_set(0, per_cpu(cpu_core_map, 0));
@@@ -1106,12 -1158,12 +1106,12 @@@ static int __init smp_sanity_check(unsi
          * If SMP should be disabled, then really disable it!
          */
         if (!max_cpus) {
- -              printk(KERN_INFO "SMP mode deactivated,"
- -                               "forcing use of dummy APIC emulation.\n");
+ +              printk(KERN_INFO "SMP mode deactivated.\n");
                 smpboot_clear_io_apic();
- -#ifdef CONFIG_X86_32
+ +
+ +              localise_nmi_watchdog();
+ +
                 connect_bsp_APIC();
- -#endif
                 setup_local_APIC();
                 end_local_APIC_setup();
                 return -1;
@@@ -1139,6 -1191,7 +1139,6 @@@ static void __init smp_cpu_index_defaul
   void __init native_smp_prepare_cpus(unsigned int max_cpus)
   {
         preempt_disable();
- -      nmi_watchdog_default();
         smp_cpu_index_default();
         current_cpu_data = boot_cpu_data;
         cpu_callin_map = cpumask_of_cpu(0);
@@@ -1165,8 -1218,9 +1165,8 @@@
         }
         preempt_enable();
   
- -#ifdef CONFIG_X86_32
         connect_bsp_APIC();
- -#endif
+ +
         /*
          * Switch from PIC to APIC mode.
          */
@@@ -1204,8 -1258,8 +1204,8 @@@ void __init native_smp_prepare_boot_cpu
         int me = smp_processor_id();
   #ifdef CONFIG_X86_32
         init_gdt(me);
- -      switch_to_new_gdt();
   #endif
+ +      switch_to_new_gdt();
         /* already set me in cpu_online_map in boot_cpu_init() */
         cpu_set(me, cpu_callout_map);
         per_cpu(cpu_state, me) = CPU_ONLINE;
@@@ -1225,6 -1279,23 +1225,6 @@@ void __init native_smp_cpus_done(unsign
   
   #ifdef CONFIG_HOTPLUG_CPU
   
- -#  ifdef CONFIG_X86_32
- -void cpu_exit_clear(void)
- -{
- -      int cpu = raw_smp_processor_id();
- -
- -      idle_task_exit();
- -
- -      cpu_uninit();
- -      irq_ctx_exit(cpu);
- -
- -      cpu_clear(cpu, cpu_callout_map);
- -      cpu_clear(cpu, cpu_callin_map);
- -
- -      unmap_cpu_to_logical_apicid(cpu);
- -}
- -#  endif /* CONFIG_X86_32 */
- -
   static void remove_siblinginfo(int cpu)
   {
         int sibling;
@@@ -1278,20 -1349,12 +1278,20 @@@ __init void prefill_possible_map(void
         int i;
         int possible;
   
+ +      /* no processor from mptable or madt */
+ +      if (!num_processors)
+ +              num_processors = 1;
+ +
+ +#ifdef CONFIG_HOTPLUG_CPU
         if (additional_cpus == -1) {
                 if (disabled_cpus > 0)
                         additional_cpus = disabled_cpus;
                 else
                         additional_cpus = 0;
         }
+ +#else
+ +      additional_cpus = 0;
+ +#endif
         possible = num_processors + additional_cpus;
         if (possible > NR_CPUS)
                 possible = NR_CPUS;
@@@ -1301,18 -1364,18 +1301,18 @@@
   
         for (i = 0; i < possible; i++)
                 cpu_set(i, cpu_possible_map);
+ +
+ +      nr_cpu_ids = possible;
   }
   
   static void __ref remove_cpu_from_maps(int cpu)
   {
         cpu_clear(cpu, cpu_online_map);
- -#ifdef CONFIG_X86_64
         cpu_clear(cpu, cpu_callout_map);
         cpu_clear(cpu, cpu_callin_map);
         /* was set by cpu_init() */
         clear_bit(cpu, (unsigned long *)&cpu_initialized);
- -      clear_node_cpumask(cpu);
- -#endif
+ +      numa_remove_cpu(cpu);
   }
   
   int __cpu_disable(void)
diff --combined arch/x86/kernel/tlb_64.c

index 5039d0f097a2f3cd1a4962418d09a39d5d8eaaff,184a367516d3655cd43fa8e5dfeb17c0e4cac556..dcbf7a1159eaf25713b83f6eaf83b1d9379a1fe7
--- 1/arch/x86/kernel/tlb_64.c
--- 2/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@@ -15,8 -15,6 +15,8 @@@
   #include <asm/proto.h>
   #include <asm/apicdef.h>
   #include <asm/idle.h>
+ +#include <asm/uv/uv_hub.h>
+ +#include <asm/uv/uv_bau.h>
   
   #include <mach_ipi.h>
   /*
@@@ -164,9 -162,6 +164,9 @@@ void native_flush_tlb_others(const cpum
         union smp_flush_state *f;
         cpumask_t cpumask = *cpumaskp;
   
+ +      if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
+ +              return;
+ +
         /* Caller has disabled preemption */
         sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
         f = &per_cpu(flush_state, sender);
@@@ -275,5 -270,5 +275,5 @@@ static void do_flush_tlb_all(void *info
   
   void flush_tlb_all(void)
   {
-       on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+       on_each_cpu(do_flush_tlb_all, NULL, 1);
   }
diff --combined arch/x86/kernel/vsyscall_64.c

index e50740d32314e2608e13da780fa2c704f2172df4,0dcae19ed6270638d8f953cd6c2b444e3cad5c57..0b8b6690a86d184959703177b9cb6011bfd605ca
--- 1/arch/x86/kernel/vsyscall_64.c
--- 2/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@@ -42,8 -42,7 +42,8 @@@
   #include <asm/topology.h>
   #include <asm/vgtod.h>
   
- -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+ +#define __vsyscall(nr) \
+ +              __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
   #define __syscall_clobber "r11","cx","memory"
   
   /*
@@@ -250,7 -249,7 +250,7 @@@ static ctl_table kernel_root_table2[] 
      doesn't violate that. We'll find out if it does. */
   static void __cpuinit vsyscall_set_cpu(int cpu)
   {
- -      unsigned long *d;
+ +      unsigned long d;
         unsigned long node = 0;
   #ifdef CONFIG_NUMA
         node = cpu_to_node(cpu);
@@@ -261,11 -260,11 +261,11 @@@
         /* Store cpu number in limit so that it can be loaded quickly
            in user space in vgetcpu.
            12 bits for the CPU and 8 bits for the node. */
- -      d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU);
- -      *d = 0x0f40000000000ULL;
- -      *d |= cpu;
- -      *d |= (node & 0xf) << 12;
- -      *d |= (node >> 4) << 48;
+ +      d = 0x0f40000000000ULL;
+ +      d |= cpu;
+ +      d |= (node & 0xf) << 12;
+ +      d |= (node >> 4) << 48;
+ +      write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
   }
   
   static void __cpuinit cpu_vsyscall_init(void *arg)
@@@ -279,7 -278,7 +279,7 @@@ cpu_vsyscall_notifier(struct notifier_b
   {
         long cpu = (long)arg;
         if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-               smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+               smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
         return NOTIFY_DONE;
   }
   
@@@ -302,7 -301,7 +302,7 @@@ static int __init vsyscall_init(void
   #ifdef CONFIG_SYSCTL
         register_sysctl_table(kernel_root_table2);
   #endif
-       on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+       on_each_cpu(cpu_vsyscall_init, NULL, 1);
         hotcpu_notifier(cpu_vsyscall_notifier, 0);
         return 0;
   }
diff --combined arch/x86/mach-voyager/voyager_smp.c

index 8dedd01e909fe4efeb2ff1501518856b23b6e967,abea08459a73b0984f66ae656c546668315c17ca..ee0fba0921572ba89ad56e45e5757e9eb9351385
--- 1/arch/x86/mach-voyager/voyager_smp.c
--- 2/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@@ -59,6 -59,11 +59,6 @@@ __u32 voyager_quad_processors = 0
    * activity count.  Finally exported by i386_ksyms.c */
   static int voyager_extended_cpus = 1;
   
- -/* Have we found an SMP box - used by time.c to do the profiling
- -   interrupt for timeslicing; do not set to 1 until the per CPU timer
- -   interrupt is active */
- -int smp_found_config = 0;
- -
   /* Used for the invalidate map that's also checked in the spinlock */
   static volatile unsigned long smp_invalidate_needed;
   
@@@ -950,94 -955,24 +950,24 @@@ static void smp_stop_cpu_function(void 
                 halt();
   }
   
- static DEFINE_SPINLOCK(call_lock);
- 
- struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       volatile unsigned long started;
-       volatile unsigned long finished;
-       int wait;
- };
- 
- static struct call_data_struct *call_data;
- 
   /* execute a thread on a new CPU.  The function to be called must be
    * previously set up.  This is used to schedule a function for
    * execution on all CPUs - set up the function then broadcast a
    * function_interrupt CPI to come here on each CPU */
   static void smp_call_function_interrupt(void)
   {
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       /* must take copy of wait because call_data may be replaced
-        * unless the function is waiting for us to finish */
-       int wait = call_data->wait;
-       __u8 cpu = smp_processor_id();
- 
-       /*
-        * Notify initiating CPU that I've grabbed the data and am
-        * about to execute the function
-        */
-       mb();
-       if (!test_and_clear_bit(cpu, &call_data->started)) {
-               /* If the bit wasn't set, this could be a replay */
-               printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
-                      " with no call pending\n", cpu);
-               return;
-       }
-       /*
-        * At this point the info structure may be out of scope unless wait==1
-        */
         irq_enter();
-       (*func) (info);
+       generic_smp_call_function_interrupt();
         __get_cpu_var(irq_stat).irq_call_count++;
         irq_exit();
-       if (wait) {
-               mb();
-               clear_bit(cpu, &call_data->finished);
-       }
   }
   
- static int
- voyager_smp_call_function_mask(cpumask_t cpumask,
-                              void (*func) (void *info), void *info, int wait)
+ static void smp_call_function_single_interrupt(void)
   {
-       struct call_data_struct data;
-       u32 mask = cpus_addr(cpumask)[0];
- 
-       mask &= ~(1 << smp_processor_id());
- 
-       if (!mask)
-               return 0;
- 
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
- 
-       data.func = func;
-       data.info = info;
-       data.started = mask;
-       data.wait = wait;
-       if (wait)
-               data.finished = mask;
- 
-       spin_lock(&call_lock);
-       call_data = &data;
-       wmb();
-       /* Send a message to all other CPUs and wait for them to respond */
-       send_CPI(mask, VIC_CALL_FUNCTION_CPI);
- 
-       /* Wait for response */
-       while (data.started)
-               barrier();
- 
-       if (wait)
-               while (data.finished)
-                       barrier();
- 
-       spin_unlock(&call_lock);
- 
-       return 0;
+       irq_enter();
+       generic_smp_call_function_single_interrupt();
+       __get_cpu_var(irq_stat).irq_call_count++;
+       irq_exit();
   }
   
   /* Sorry about the name.  In an APIC based system, the APICs
@@@ -1094,6 -1029,12 +1024,12 @@@ void smp_qic_call_function_interrupt(st
         smp_call_function_interrupt();
   }
   
+ void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+ {
+       ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+       smp_call_function_single_interrupt();
+ }
+ 
   void smp_vic_cpi_interrupt(struct pt_regs *regs)
   {
         struct pt_regs *old_regs = set_irq_regs(regs);
@@@ -1114,6 -1055,8 +1050,8 @@@
                 smp_enable_irq_interrupt();
         if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
                 smp_call_function_interrupt();
+       if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+               smp_call_function_single_interrupt();
         set_irq_regs(old_regs);
   }
   
@@@ -1129,9 -1072,18 +1067,9 @@@ static void do_flush_tlb_all(void *info
   /* flush the TLB of every active CPU in the system */
   void flush_tlb_all(void)
   {
-       on_each_cpu(do_flush_tlb_all, 0, 1, 1);
+       on_each_cpu(do_flush_tlb_all, 0, 1);
   }
   
- -/* used to set up the trampoline for other CPUs when the memory manager
- - * is sorted out */
- -void __init smp_alloc_memory(void)
- -{
- -      trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
- -      if (__pa(trampoline_base) >= 0x93000)
- -              BUG();
- -}
- -
   /* send a reschedule CPI to one CPU by physical CPU number*/
   static void voyager_smp_send_reschedule(int cpu)
   {
@@@ -1161,7 -1113,7 +1099,7 @@@ int safe_smp_processor_id(void
   /* broadcast a halt to all other CPUs */
   static void voyager_smp_send_stop(void)
   {
-       smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
+       smp_call_function(smp_stop_cpu_function, NULL, 1);
   }
   
   /* this function is triggered in time.c when a clock tick fires
@@@ -1848,5 -1800,7 +1786,7 @@@ struct smp_ops smp_ops = 
   
         .smp_send_stop = voyager_smp_send_stop,
         .smp_send_reschedule = voyager_smp_send_reschedule,
-       .smp_call_function_mask = voyager_smp_call_function_mask,
+ 
+       .send_call_func_ipi = native_send_call_func_ipi,
+       .send_call_func_single_ipi = native_send_call_func_single_ipi,
   };
diff --combined arch/x86/mm/pageattr.c

index 47f4e2e4a0968ca848c2d16358d6ae7e43a6f565,9b836ba9deddcccdcf3728fc147f18c6ed0a0553..65c6e46bf059ae10bfa86b83c087c29ecb2f0070
--- 1/arch/x86/mm/pageattr.c
--- 2/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@@ -34,41 -34,6 +34,41 @@@ struct cpa_data 
         unsigned        force_split : 1;
   };
   
+ +#ifdef CONFIG_PROC_FS
+ +static unsigned long direct_pages_count[PG_LEVEL_NUM];
+ +
+ +void update_page_count(int level, unsigned long pages)
+ +{
+ +      unsigned long flags;
+ +
+ +      /* Protect against CPA */
+ +      spin_lock_irqsave(&pgd_lock, flags);
+ +      direct_pages_count[level] += pages;
+ +      spin_unlock_irqrestore(&pgd_lock, flags);
+ +}
+ +
+ +static void split_page_count(int level)
+ +{
+ +      direct_pages_count[level]--;
+ +      direct_pages_count[level - 1] += PTRS_PER_PTE;
+ +}
+ +
+ +int arch_report_meminfo(char *page)
+ +{
+ +      int n = sprintf(page, "DirectMap4k:  %8lu\n"
+ +                      "DirectMap2M:  %8lu\n",
+ +                      direct_pages_count[PG_LEVEL_4K],
+ +                      direct_pages_count[PG_LEVEL_2M]);
+ +#ifdef CONFIG_X86_64
+ +      n += sprintf(page + n, "DirectMap1G:  %8lu\n",
+ +                   direct_pages_count[PG_LEVEL_1G]);
+ +#endif
+ +      return n;
+ +}
+ +#else
+ +static inline void split_page_count(int level) { }
+ +#endif
+ +
   #ifdef CONFIG_X86_64
   
   static inline unsigned long highmap_start_pfn(void)
@@@ -141,7 -106,7 +141,7 @@@ static void cpa_flush_all(unsigned lon
   {
         BUG_ON(irqs_disabled());
   
-       on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
+       on_each_cpu(__cpa_flush_all, (void *) cache, 1);
   }
   
   static void __cpa_flush_range(void *arg)
@@@ -162,7 -127,7 +162,7 @@@ static void cpa_flush_range(unsigned lo
         BUG_ON(irqs_disabled());
         WARN_ON(PAGE_ALIGN(start) != start);
   
-       on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+       on_each_cpu(__cpa_flush_range, NULL, 1);
   
         if (!cache)
                 return;
@@@ -262,7 -227,6 +262,7 @@@ pte_t *lookup_address(unsigned long add
   
         return pte_offset_kernel(pmd, address);
   }
+ +EXPORT_SYMBOL_GPL(lookup_address);
   
   /*
    * Set the new pmd in all the pgds we know about:
@@@ -536,16 -500,6 +536,16 @@@ static int split_large_page(pte_t *kpte
         for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
                 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
   
+ +      if (address >= (unsigned long)__va(0) &&
+ +              address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+ +              split_page_count(level);
+ +
+ +#ifdef CONFIG_X86_64
+ +      if (address >= (unsigned long)__va(1UL<<32) &&
+ +              address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
+ +              split_page_count(level);
+ +#endif
+ +
         /*
          * Install the new, split up pagetable. Important details here:
          *
@@@ -659,24 -613,15 +659,24 @@@ static int cpa_process_alias(struct cpa
         struct cpa_data alias_cpa;
         int ret = 0;
   
- -      if (cpa->pfn > max_pfn_mapped)
+ +      if (cpa->pfn >= max_pfn_mapped)
                 return 0;
   
+ +#ifdef CONFIG_X86_64
+ +      if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+ +              return 0;
+ +#endif
         /*
          * No need to redo, when the primary call touched the direct
          * mapping already:
          */
- -      if (!within(cpa->vaddr, PAGE_OFFSET,
- -                  PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+ +      if (!(within(cpa->vaddr, PAGE_OFFSET,
+ +                  PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+ +#ifdef CONFIG_X86_64
+ +              || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+ +                  PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+ +#endif
+ +      )) {
   
                 alias_cpa = *cpa;
                 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
@@@ -860,7 -805,7 +860,7 @@@ int _set_memory_wc(unsigned long addr, 
   
   int set_memory_wc(unsigned long addr, int numpages)
   {
- -      if (!pat_wc_enabled)
+ +      if (!pat_enabled)
                 return set_memory_uc(addr, numpages);
   
         if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
diff --combined arch/x86/oprofile/nmi_int.c

index 2b6ad5b9f9d53f0012629f5c255f3d12e6d87f82,3238ad32ffd8a1f2c7c75c53696a2fde8b65f321..7f3329b55d2e3b76eb60886844d7ee846b7a0c60
--- 1/arch/x86/oprofile/nmi_int.c
--- 2/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@@ -218,8 -218,8 +218,8 @@@ static int nmi_setup(void
                 }
   
         }
-       on_each_cpu(nmi_save_registers, NULL, 0, 1);
-       on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+       on_each_cpu(nmi_save_registers, NULL, 1);
+       on_each_cpu(nmi_cpu_setup, NULL, 1);
         nmi_enabled = 1;
         return 0;
   }
@@@ -269,13 -269,12 +269,13 @@@ static void nmi_cpu_shutdown(void *dumm
   
   static void nmi_shutdown(void)
   {
- -      struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
+ +      struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
         nmi_enabled = 0;
-       on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+       on_each_cpu(nmi_cpu_shutdown, NULL, 1);
         unregister_die_notifier(&profile_exceptions_nb);
         model->shutdown(msrs);
         free_msrs();
+ +      put_cpu_var(cpu_msrs);
   }
   
   static void nmi_cpu_start(void *dummy)
@@@ -286,7 -285,7 +286,7 @@@
   
   static int nmi_start(void)
   {
-       on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+       on_each_cpu(nmi_cpu_start, NULL, 1);
         return 0;
   }
   
@@@ -298,7 -297,7 +298,7 @@@ static void nmi_cpu_stop(void *dummy
   
   static void nmi_stop(void)
   {
-       on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+       on_each_cpu(nmi_cpu_stop, NULL, 1);
   }
   
   struct op_counter_config counter_config[OP_MAX_COUNTER];
diff --combined arch/x86/xen/enlighten.c

index dcd4e51f2f16829e2e9519615606bc93e4a4cd1e,8e317782fe377c1823f9578ced2a419f0d9b90bb..bb508456ef523e1fa50f77a2993bf481b06f03f0
--- 1/arch/x86/xen/enlighten.c
--- 2/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@@ -45,7 -45,6 +45,7 @@@
   #include <asm/pgtable.h>
   #include <asm/tlbflush.h>
   #include <asm/reboot.h>
+ +#include <asm/pgalloc.h>
   
   #include "xen-ops.h"
   #include "mmu.h"
@@@ -76,13 -75,13 +76,13 @@@ DEFINE_PER_CPU(unsigned long, xen_curre
   struct start_info *xen_start_info;
   EXPORT_SYMBOL_GPL(xen_start_info);
   
- -static /* __initdata */ struct shared_info dummy_shared_info;
+ +struct shared_info xen_dummy_shared_info;
   
   /*
    * Point at some empty memory to start with. We map the real shared_info
    * page as soon as fixmap is up and running.
    */
- -struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
+ +struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
   
   /*
    * Flag to determine whether vcpu info placement is available on all
@@@ -99,13 -98,13 +99,13 @@@
    */
   static int have_vcpu_info_placement = 1;
   
- -static void __init xen_vcpu_setup(int cpu)
+ +static void xen_vcpu_setup(int cpu)
   {
         struct vcpu_register_vcpu_info info;
         int err;
         struct vcpu_info *vcpup;
   
- -      BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
+ +      BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
         per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
   
         if (!have_vcpu_info_placement)
@@@ -137,41 -136,11 +137,41 @@@
         }
   }
   
+ +/*
+ + * On restore, set the vcpu placement up again.
+ + * If it fails, then we're in a bad state, since
+ + * we can't back out from using it...
+ + */
+ +void xen_vcpu_restore(void)
+ +{
+ +      if (have_vcpu_info_placement) {
+ +              int cpu;
+ +
+ +              for_each_online_cpu(cpu) {
+ +                      bool other_cpu = (cpu != smp_processor_id());
+ +
+ +                      if (other_cpu &&
+ +                          HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+ +                              BUG();
+ +
+ +                      xen_vcpu_setup(cpu);
+ +
+ +                      if (other_cpu &&
+ +                          HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+ +                              BUG();
+ +              }
+ +
+ +              BUG_ON(!have_vcpu_info_placement);
+ +      }
+ +}
+ +
   static void __init xen_banner(void)
   {
         printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
                pv_info.name);
- -      printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
+ +      printk(KERN_INFO "Hypervisor signature: %s%s\n",
+ +             xen_start_info->magic,
+ +             xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
   }
   
   static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@@ -266,13 -235,13 +266,13 @@@ static void xen_irq_enable(void
   {
         struct vcpu_info *vcpu;
   
- -      /* There's a one instruction preempt window here.  We need to
- -         make sure we're don't switch CPUs between getting the vcpu
- -         pointer and updating the mask. */
- -      preempt_disable();
+ +      /* We don't need to worry about being preempted here, since
+ +         either a) interrupts are disabled, so no preemption, or b)
+ +         the caller is confused and is trying to re-enable interrupts
+ +         on an indeterminate processor. */
+ +
         vcpu = x86_read_percpu(xen_vcpu);
         vcpu->evtchn_upcall_mask = 0;
- -      preempt_enable_no_resched();
   
         /* Doesn't matter if we get preempted here, because any
            pending event will get dealt with anyway. */
@@@ -285,7 -254,7 +285,7 @@@
   static void xen_safe_halt(void)
   {
         /* Blocking includes an implicit local_irq_enable(). */
- -      if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
+ +      if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
                 BUG();
   }
   
@@@ -638,30 -607,6 +638,30 @@@ static void xen_flush_tlb_others(const 
         xen_mc_issue(PARAVIRT_LAZY_MMU);
   }
   
+ +static void xen_clts(void)
+ +{
+ +      struct multicall_space mcs;
+ +
+ +      mcs = xen_mc_entry(0);
+ +
+ +      MULTI_fpu_taskswitch(mcs.mc, 0);
+ +
+ +      xen_mc_issue(PARAVIRT_LAZY_CPU);
+ +}
+ +
+ +static void xen_write_cr0(unsigned long cr0)
+ +{
+ +      struct multicall_space mcs;
+ +
+ +      /* Only pay attention to cr0.TS; everything else is
+ +         ignored. */
+ +      mcs = xen_mc_entry(0);
+ +
+ +      MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
+ +
+ +      xen_mc_issue(PARAVIRT_LAZY_CPU);
+ +}
+ +
   static void xen_write_cr2(unsigned long cr2)
   {
         x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@@ -679,10 -624,8 +679,10 @@@ static unsigned long xen_read_cr2_direc
   
   static void xen_write_cr4(unsigned long cr4)
   {
- -      /* Just ignore cr4 changes; Xen doesn't allow us to do
- -         anything anyway. */
+ +      cr4 &= ~X86_CR4_PGE;
+ +      cr4 &= ~X86_CR4_PSE;
+ +
+ +      native_write_cr4(cr4);
   }
   
   static unsigned long xen_read_cr3(void)
@@@ -888,7 -831,7 +888,7 @@@ static __init void xen_pagetable_setup_
                           PFN_DOWN(__pa(xen_start_info->pt_base)));
   }
   
- -static __init void setup_shared_info(void)
+ +void xen_setup_shared_info(void)
   {
         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@@ -911,8 -854,6 +911,8 @@@
         /* In UP this is as good a place as any to set up shared info */
         xen_setup_vcpu_info_placement();
   #endif
+ +
+ +      xen_setup_mfn_list_list();
   }
   
   static __init void xen_pagetable_setup_done(pgd_t *base)
@@@ -925,23 -866,15 +925,23 @@@
         pv_mmu_ops.release_pmd = xen_release_pmd;
         pv_mmu_ops.set_pte = xen_set_pte;
   
- -      setup_shared_info();
+ +      xen_setup_shared_info();
   
         /* Actually pin the pagetable down, but we can't set PG_pinned
            yet because the page structures don't exist yet. */
         pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
   }
   
+ +static __init void xen_post_allocator_init(void)
+ +{
+ +      pv_mmu_ops.set_pmd = xen_set_pmd;
+ +      pv_mmu_ops.set_pud = xen_set_pud;
+ +
+ +      xen_mark_init_mm_pinned();
+ +}
+ +
   /* This is called once we have the cpu_possible_map */
- -void __init xen_setup_vcpu_info_placement(void)
+ +void xen_setup_vcpu_info_placement(void)
   {
         int cpu;
   
@@@ -1014,33 -947,6 +1014,33 @@@ static unsigned xen_patch(u8 type, u16 
         return ret;
   }
   
+ +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
+ +{
+ +      pte_t pte;
+ +
+ +      phys >>= PAGE_SHIFT;
+ +
+ +      switch (idx) {
+ +      case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+ +#ifdef CONFIG_X86_F00F_BUG
+ +      case FIX_F00F_IDT:
+ +#endif
+ +      case FIX_WP_TEST:
+ +      case FIX_VDSO:
+ +#ifdef CONFIG_X86_LOCAL_APIC
+ +      case FIX_APIC_BASE:     /* maps dummy local APIC */
+ +#endif
+ +              pte = pfn_pte(phys, prot);
+ +              break;
+ +
+ +      default:
+ +              pte = mfn_pte(phys, prot);
+ +              break;
+ +      }
+ +
+ +      __native_set_fixmap(idx, pte);
+ +}
+ +
   static const struct pv_info xen_info __initdata = {
         .paravirt_enabled = 1,
         .shared_kernel_pmd = 0,
@@@ -1054,7 -960,7 +1054,7 @@@ static const struct pv_init_ops xen_ini
         .banner = xen_banner,
         .memory_setup = xen_memory_setup,
         .arch_setup = xen_arch_setup,
- -      .post_allocator_init = xen_mark_init_mm_pinned,
+ +      .post_allocator_init = xen_post_allocator_init,
   };
   
   static const struct pv_time_ops xen_time_ops __initdata = {
@@@ -1062,7 -968,7 +1062,7 @@@
   
         .set_wallclock = xen_set_wallclock,
         .get_wallclock = xen_get_wallclock,
- -      .get_cpu_khz = xen_cpu_khz,
+ +      .get_tsc_khz = xen_tsc_khz,
         .sched_clock = xen_sched_clock,
   };
   
@@@ -1072,10 -978,10 +1072,10 @@@ static const struct pv_cpu_ops xen_cpu_
         .set_debugreg = xen_set_debugreg,
         .get_debugreg = xen_get_debugreg,
   
- -      .clts = native_clts,
+ +      .clts = xen_clts,
   
         .read_cr0 = native_read_cr0,
- -      .write_cr0 = native_write_cr0,
+ +      .write_cr0 = xen_write_cr0,
   
         .read_cr4 = native_read_cr4,
         .read_cr4_safe = native_read_cr4_safe,
@@@ -1089,7 -995,7 +1089,7 @@@
         .read_pmc = native_read_pmc,
   
         .iret = xen_iret,
- -      .irq_enable_syscall_ret = xen_sysexit,
+ +      .irq_enable_sysexit = xen_sysexit,
   
         .load_tr_desc = paravirt_nop,
         .set_ldt = xen_set_ldt,
@@@ -1123,9 -1029,6 +1123,9 @@@ static const struct pv_irq_ops xen_irq_
         .irq_enable = xen_irq_enable,
         .safe_halt = xen_safe_halt,
         .halt = xen_halt,
+ +#ifdef CONFIG_X86_64
+ +      .adjust_exception_frame = paravirt_nop,
+ +#endif
   };
   
   static const struct pv_apic_ops xen_apic_ops __initdata = {
@@@ -1157,9 -1060,6 +1157,9 @@@ static const struct pv_mmu_ops xen_mmu_
         .pte_update = paravirt_nop,
         .pte_update_defer = paravirt_nop,
   
+ +      .pgd_alloc = __paravirt_pgd_alloc,
+ +      .pgd_free = paravirt_nop,
+ +
         .alloc_pte = xen_alloc_pte_init,
         .release_pte = xen_release_pte_init,
         .alloc_pmd = xen_alloc_pte_init,
@@@ -1172,13 -1072,9 +1172,13 @@@
   
         .set_pte = NULL,        /* see xen_pagetable_setup_* */
         .set_pte_at = xen_set_pte_at,
- -      .set_pmd = xen_set_pmd,
+ +      .set_pmd = xen_set_pmd_hyper,
+ +
+ +      .ptep_modify_prot_start = __ptep_modify_prot_start,
+ +      .ptep_modify_prot_commit = __ptep_modify_prot_commit,
   
         .pte_val = xen_pte_val,
+ +      .pte_flags = native_pte_val,
         .pgd_val = xen_pgd_val,
   
         .make_pte = xen_make_pte,
@@@ -1186,7 -1082,7 +1186,7 @@@
   
         .set_pte_atomic = xen_set_pte_atomic,
         .set_pte_present = xen_set_pte_at,
- -      .set_pud = xen_set_pud,
+ +      .set_pud = xen_set_pud_hyper,
         .pte_clear = xen_pte_clear,
         .pmd_clear = xen_pmd_clear,
   
@@@ -1201,8 -1097,6 +1201,8 @@@
                 .enter = paravirt_enter_lazy_mmu,
                 .leave = xen_leave_lazy,
         },
+ +
+ +      .set_fixmap = xen_set_fixmap,
   };
   
   #ifdef CONFIG_SMP
@@@ -1214,19 -1108,19 +1214,21 @@@ static const struct smp_ops xen_smp_op
   
         .smp_send_stop = xen_smp_send_stop,
         .smp_send_reschedule = xen_smp_send_reschedule,
-       .smp_call_function_mask = xen_smp_call_function_mask,
+ 
+       .send_call_func_ipi = xen_smp_send_call_function_ipi,
+       .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
   };
   #endif        /* CONFIG_SMP */
   
   static void xen_reboot(int reason)
   {
+ +      struct sched_shutdown r = { .reason = reason };
+ +
   #ifdef CONFIG_SMP
         smp_send_stop();
   #endif
   
- -      if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
+ +      if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
                 BUG();
   }
   
@@@ -1281,8 -1175,6 +1283,8 @@@ asmlinkage void __init xen_start_kernel
   
         BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
   
+ +      xen_setup_features();
+ +
         /* Install Xen paravirt ops */
         pv_info = xen_info;
         pv_init_ops = xen_init_ops;
@@@ -1292,26 -1184,21 +1294,26 @@@
         pv_apic_ops = xen_apic_ops;
         pv_mmu_ops = xen_mmu_ops;
   
+ +      if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
+ +              pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
+ +              pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
+ +      }
+ +
         machine_ops = xen_machine_ops;
   
   #ifdef CONFIG_SMP
         smp_ops = xen_smp_ops;
   #endif
   
- -      xen_setup_features();
- -
         /* Get mfn list */
         if (!xen_feature(XENFEAT_auto_translated_physmap))
- -              phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
+ +              xen_build_dynamic_phys_to_machine();
   
         pgd = (pgd_t *)xen_start_info->pt_base;
   
+ +      init_pg_tables_start = __pa(pgd);
         init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+ +      max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
   
         init_mm.pgd = pgd; /* use the Xen pagetables to start */
   
@@@ -1347,12 -1234,9 +1349,12 @@@
                 ? __pa(xen_start_info->mod_start) : 0;
         boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
   
- -      if (!is_initial_xendomain())
+ +      if (!is_initial_xendomain()) {
+ +              add_preferred_console("xenboot", 0, NULL);
+ +              add_preferred_console("tty", 0, NULL);
                 add_preferred_console("hvc", 0, NULL);
+ +      }
   
         /* Start the world */
- -      start_kernel();
+ +      i386_start_kernel();
   }
diff --combined arch/x86/xen/mmu.c

index 42b3b9ed641d6b4cf81f38a1ca86bccc76c415da,5c01590380bcd0fa4afd7f387f9a0e0db25dda14..ff0aa74afaa1ae01ce1a3bd45a907af1cd04e3ea
--- 1/arch/x86/xen/mmu.c
--- 2/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@@ -56,131 -56,6 +56,131 @@@
   #include "multicalls.h"
   #include "mmu.h"
   
+ +#define P2M_ENTRIES_PER_PAGE  (PAGE_SIZE / sizeof(unsigned long))
+ +#define TOP_ENTRIES           (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
+ +
+ +/* Placeholder for holes in the address space */
+ +static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
+ +      __attribute__((section(".data.page_aligned"))) =
+ +              { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+ +
+ + /* Array of pointers to pages containing p2m entries */
+ +static unsigned long *p2m_top[TOP_ENTRIES]
+ +      __attribute__((section(".data.page_aligned"))) =
+ +              { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+ +
+ +/* Arrays of p2m arrays expressed in mfns used for save/restore */
+ +static unsigned long p2m_top_mfn[TOP_ENTRIES]
+ +      __attribute__((section(".bss.page_aligned")));
+ +
+ +static unsigned long p2m_top_mfn_list[
+ +                      PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
+ +      __attribute__((section(".bss.page_aligned")));
+ +
+ +static inline unsigned p2m_top_index(unsigned long pfn)
+ +{
+ +      BUG_ON(pfn >= MAX_DOMAIN_PAGES);
+ +      return pfn / P2M_ENTRIES_PER_PAGE;
+ +}
+ +
+ +static inline unsigned p2m_index(unsigned long pfn)
+ +{
+ +      return pfn % P2M_ENTRIES_PER_PAGE;
+ +}
+ +
+ +/* Build the parallel p2m_top_mfn structures */
+ +void xen_setup_mfn_list_list(void)
+ +{
+ +      unsigned pfn, idx;
+ +
+ +      for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+ +              unsigned topidx = p2m_top_index(pfn);
+ +
+ +              p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
+ +      }
+ +
+ +      for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+ +              unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
+ +              p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
+ +      }
+ +
+ +      BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+ +
+ +      HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+ +              virt_to_mfn(p2m_top_mfn_list);
+ +      HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
+ +}
+ +
+ +/* Set up p2m_top to point to the domain-builder provided p2m pages */
+ +void __init xen_build_dynamic_phys_to_machine(void)
+ +{
+ +      unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+ +      unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
+ +      unsigned pfn;
+ +
+ +      for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+ +              unsigned topidx = p2m_top_index(pfn);
+ +
+ +              p2m_top[topidx] = &mfn_list[pfn];
+ +      }
+ +}
+ +
+ +unsigned long get_phys_to_machine(unsigned long pfn)
+ +{
+ +      unsigned topidx, idx;
+ +
+ +      if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+ +              return INVALID_P2M_ENTRY;
+ +
+ +      topidx = p2m_top_index(pfn);
+ +      idx = p2m_index(pfn);
+ +      return p2m_top[topidx][idx];
+ +}
+ +EXPORT_SYMBOL_GPL(get_phys_to_machine);
+ +
+ +static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+ +{
+ +      unsigned long *p;
+ +      unsigned i;
+ +
+ +      p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+ +      BUG_ON(p == NULL);
+ +
+ +      for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+ +              p[i] = INVALID_P2M_ENTRY;
+ +
+ +      if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
+ +              free_page((unsigned long)p);
+ +      else
+ +              *mfnp = virt_to_mfn(p);
+ +}
+ +
+ +void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+ +{
+ +      unsigned topidx, idx;
+ +
+ +      if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ +              BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ +              return;
+ +      }
+ +
+ +      if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+ +              BUG_ON(mfn != INVALID_P2M_ENTRY);
+ +              return;
+ +      }
+ +
+ +      topidx = p2m_top_index(pfn);
+ +      if (p2m_top[topidx] == p2m_missing) {
+ +              /* no need to allocate a page to store an invalid entry */
+ +              if (mfn == INVALID_P2M_ENTRY)
+ +                      return;
+ +              alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+ +      }
+ +
+ +      idx = p2m_index(pfn);
+ +      p2m_top[topidx][idx] = mfn;
+ +}
+ +
   xmaddr_t arbitrary_virt_to_machine(unsigned long address)
   {
         unsigned int level;
@@@ -223,60 -98,24 +223,60 @@@ void make_lowmem_page_readwrite(void *v
   }
   
   
- -void xen_set_pmd(pmd_t *ptr, pmd_t val)
+ +static bool page_pinned(void *ptr)
+ +{
+ +      struct page *page = virt_to_page(ptr);
+ +
+ +      return PagePinned(page);
+ +}
+ +
+ +static void extend_mmu_update(const struct mmu_update *update)
   {
         struct multicall_space mcs;
         struct mmu_update *u;
   
- -      preempt_disable();
+ +      mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
+ +
+ +      if (mcs.mc != NULL)
+ +              mcs.mc->args[1]++;
+ +      else {
+ +              mcs = __xen_mc_entry(sizeof(*u));
+ +              MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
+ +      }
   
- -      mcs = xen_mc_entry(sizeof(*u));
         u = mcs.args;
- -      u->ptr = virt_to_machine(ptr).maddr;
- -      u->val = pmd_val_ma(val);
- -      MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
+ +      *u = *update;
+ +}
+ +
+ +void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
+ +{
+ +      struct mmu_update u;
+ +
+ +      preempt_disable();
+ +
+ +      xen_mc_batch();
+ +
+ +      u.ptr = virt_to_machine(ptr).maddr;
+ +      u.val = pmd_val_ma(val);
+ +      extend_mmu_update(&u);
   
         xen_mc_issue(PARAVIRT_LAZY_MMU);
   
         preempt_enable();
   }
   
+ +void xen_set_pmd(pmd_t *ptr, pmd_t val)
+ +{
+ +      /* If page is not pinned, we can just update the entry
+ +         directly */
+ +      if (!page_pinned(ptr)) {
+ +              *ptr = val;
+ +              return;
+ +      }
+ +
+ +      xen_set_pmd_hyper(ptr, val);
+ +}
+ +
   /*
    * Associate a virtual page frame with a given physical page frame
    * and protection flags for that frame.
@@@ -340,33 -179,13 +340,33 @@@ out
                 preempt_enable();
   }
   
+ +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ +{
+ +      /* Just return the pte as-is.  We preserve the bits on commit */
+ +      return *ptep;
+ +}
+ +
+ +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ +                               pte_t *ptep, pte_t pte)
+ +{
+ +      struct mmu_update u;
+ +
+ +      xen_mc_batch();
+ +
+ +      u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+ +      u.val = pte_val_ma(pte);
+ +      extend_mmu_update(&u);
+ +
+ +      xen_mc_issue(PARAVIRT_LAZY_MMU);
+ +}
+ +
   /* Assume pteval_t is equivalent to all the other *val_t types. */
   static pteval_t pte_mfn_to_pfn(pteval_t val)
   {
         if (val & _PAGE_PRESENT) {
                 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
                 pteval_t flags = val & ~PTE_MASK;
- -              val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+ +              val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
         }
   
         return val;
@@@ -377,7 -196,7 +377,7 @@@ static pteval_t pte_pfn_to_mfn(pteval_
         if (val & _PAGE_PRESENT) {
                 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
                 pteval_t flags = val & ~PTE_MASK;
- -              val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+ +              val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
         }
   
         return val;
@@@ -410,35 -229,24 +410,35 @@@ pmdval_t xen_pmd_val(pmd_t pmd
         return pte_mfn_to_pfn(pmd.pmd);
   }
   
- -void xen_set_pud(pud_t *ptr, pud_t val)
+ +void xen_set_pud_hyper(pud_t *ptr, pud_t val)
   {
- -      struct multicall_space mcs;
- -      struct mmu_update *u;
+ +      struct mmu_update u;
   
         preempt_disable();
   
- -      mcs = xen_mc_entry(sizeof(*u));
- -      u = mcs.args;
- -      u->ptr = virt_to_machine(ptr).maddr;
- -      u->val = pud_val_ma(val);
- -      MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
+ +      xen_mc_batch();
+ +
+ +      u.ptr = virt_to_machine(ptr).maddr;
+ +      u.val = pud_val_ma(val);
+ +      extend_mmu_update(&u);
   
         xen_mc_issue(PARAVIRT_LAZY_MMU);
   
         preempt_enable();
   }
   
+ +void xen_set_pud(pud_t *ptr, pud_t val)
+ +{
+ +      /* If page is not pinned, we can just update the entry
+ +         directly */
+ +      if (!page_pinned(ptr)) {
+ +              *ptr = val;
+ +              return;
+ +      }
+ +
+ +      xen_set_pud_hyper(ptr, val);
+ +}
+ +
   void xen_set_pte(pte_t *ptep, pte_t pte)
   {
         ptep->pte_high = pte.pte_high;
@@@ -460,7 -268,7 +460,7 @@@ void xen_pte_clear(struct mm_struct *mm
   
   void xen_pmd_clear(pmd_t *pmdp)
   {
- -      xen_set_pmd(pmdp, __pmd(0));
+ +      set_pmd(pmdp, __pmd(0));
   }
   
   pmd_t xen_make_pmd(pmdval_t pmd)
@@@ -633,29 -441,6 +633,29 @@@ void xen_pgd_pin(pgd_t *pgd
         xen_mc_issue(0);
   }
   
+ +/*
+ + * On save, we need to pin all pagetables to make sure they get their
+ + * mfns turned into pfns.  Search the list for any unpinned pgds and pin
+ + * them (unpinned pgds are not currently in use, probably because the
+ + * process is under construction or destruction).
+ + */
+ +void xen_mm_pin_all(void)
+ +{
+ +      unsigned long flags;
+ +      struct page *page;
+ +
+ +      spin_lock_irqsave(&pgd_lock, flags);
+ +
+ +      list_for_each_entry(page, &pgd_list, lru) {
+ +              if (!PagePinned(page)) {
+ +                      xen_pgd_pin((pgd_t *)page_address(page));
+ +                      SetPageSavePinned(page);
+ +              }
+ +      }
+ +
+ +      spin_unlock_irqrestore(&pgd_lock, flags);
+ +}
+ +
   /* The init_mm pagetable is really pinned as soon as its created, but
      that's before we have page structures to store the bits.  So do all
      the book-keeping now. */
@@@ -713,29 -498,6 +713,29 @@@ static void xen_pgd_unpin(pgd_t *pgd
         xen_mc_issue(0);
   }
   
+ +/*
+ + * On resume, undo any pinning done at save, so that the rest of the
+ + * kernel doesn't see any unexpected pinned pagetables.
+ + */
+ +void xen_mm_unpin_all(void)
+ +{
+ +      unsigned long flags;
+ +      struct page *page;
+ +
+ +      spin_lock_irqsave(&pgd_lock, flags);
+ +
+ +      list_for_each_entry(page, &pgd_list, lru) {
+ +              if (PageSavePinned(page)) {
+ +                      BUG_ON(!PagePinned(page));
+ +                      printk("unpinning pinned %p\n", page_address(page));
+ +                      xen_pgd_unpin((pgd_t *)page_address(page));
+ +                      ClearPageSavePinned(page);
+ +              }
+ +      }
+ +
+ +      spin_unlock_irqrestore(&pgd_lock, flags);
+ +}
+ +
   void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
   {
         spin_lock(&next->page_table_lock);
@@@ -796,7 -558,7 +796,7 @@@ static void drop_mm_ref(struct mm_struc
         }
   
         if (!cpus_empty(mask))
-               xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+               smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
   }
   #else
   static void drop_mm_ref(struct mm_struct *mm)
@@@ -829,7 -591,7 +829,7 @@@ void xen_exit_mmap(struct mm_struct *mm
         spin_lock(&mm->page_table_lock);
   
         /* pgd may not be pinned in the error exit path of execve */
- -      if (PagePinned(virt_to_page(mm->pgd)))
+ +      if (page_pinned(mm->pgd))
                 xen_pgd_unpin(mm->pgd);
   
         spin_unlock(&mm->page_table_lock);
diff --combined arch/x86/xen/smp.c

index d2e3c20127d7608fd0bb5735256994bb56fd01b2,a1651d029ea83fdda9ac8a716c541d13d8de5475..233156f39b7f39f605fdb2c0cbd266da60783e28
--- 1/arch/x86/xen/smp.c
--- 2/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@@ -35,28 -35,15 +35,15 @@@
   #include "xen-ops.h"
   #include "mmu.h"
   
- -static cpumask_t xen_cpu_initialized_map;
+ +cpumask_t xen_cpu_initialized_map;
- static DEFINE_PER_CPU(int, resched_irq) = -1;
- static DEFINE_PER_CPU(int, callfunc_irq) = -1;
- static DEFINE_PER_CPU(int, debug_irq) = -1;
- 
- /*
-  * Structure and data for smp_call_function(). This is designed to minimise
-  * static memory requirements. It also looks cleaner.
-  */
- static DEFINE_SPINLOCK(call_lock);
   
- struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       atomic_t started;
-       atomic_t finished;
-       int wait;
- };
+ static DEFINE_PER_CPU(int, resched_irq);
+ static DEFINE_PER_CPU(int, callfunc_irq);
+ static DEFINE_PER_CPU(int, callfuncsingle_irq);
+ static DEFINE_PER_CPU(int, debug_irq) = -1;
   
   static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
- 
- static struct call_data_struct *call_data;
+ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
   
   /*
    * Reschedule call back. Nothing to do,
@@@ -65,12 -52,6 +52,12 @@@
    */
   static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
   {
+ +#ifdef CONFIG_X86_32
+ +      __get_cpu_var(irq_stat).irq_resched_count++;
+ +#else
+ +      add_pda(irq_resched_count, 1);
+ +#endif
+ +
         return IRQ_HANDLED;
   }
   
@@@ -128,6 -109,17 +115,17 @@@ static int xen_smp_intr_init(unsigned i
                 goto fail;
         per_cpu(debug_irq, cpu) = rc;
   
+       callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+       rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+                                   cpu,
+                                   xen_call_function_single_interrupt,
+                                   IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+                                   callfunc_name,
+                                   NULL);
+       if (rc < 0)
+               goto fail;
+       per_cpu(callfuncsingle_irq, cpu) = rc;
+ 
         return 0;
   
    fail:
@@@ -137,6 -129,9 +135,9 @@@
                 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
         if (per_cpu(debug_irq, cpu) >= 0)
                 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+       if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+ 
         return rc;
   }
   
@@@ -336,7 -331,7 +337,7 @@@ static void stop_self(void *v
   
   void xen_smp_send_stop(void)
   {
-       smp_call_function(stop_self, NULL, 0, 0);
+       smp_call_function(stop_self, NULL, 0);
   }
   
   void xen_smp_send_reschedule(int cpu)
@@@ -344,7 -339,6 +345,6 @@@
         xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
   }
   
- 
   static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
   {
         unsigned cpu;
@@@ -355,83 -349,42 +355,42 @@@
                 xen_send_IPI_one(cpu, vector);
   }
   
+ void xen_smp_send_call_function_ipi(cpumask_t mask)
+ {
+       int cpu;
+ 
+       xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+ 
+       /* Make sure other vcpus get a chance to run if they need to. */
+       for_each_cpu_mask(cpu, mask) {
+               if (xen_vcpu_stolen(cpu)) {
+                       HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+                       break;
+               }
+       }
+ }
+ 
+ void xen_smp_send_call_function_single_ipi(int cpu)
+ {
+       xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+ }
+ 
   static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
   {
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       int wait = call_data->wait;
- 
-       /*
-        * Notify initiating CPU that I've grabbed the data and am
-        * about to execute the function
-        */
-       mb();
-       atomic_inc(&call_data->started);
-       /*
-        * At this point the info structure may be out of scope unless wait==1
-        */
         irq_enter();
-       (*func)(info);
+       generic_smp_call_function_interrupt();
         __get_cpu_var(irq_stat).irq_call_count++;
         irq_exit();
   
-       if (wait) {
-               mb();           /* commit everything before setting finished */
-               atomic_inc(&call_data->finished);
-       }
- 
         return IRQ_HANDLED;
   }
   
- int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-                              void *info, int wait)
+ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
   {
-       struct call_data_struct data;
-       int cpus, cpu;
-       bool yield;
- 
-       /* Holding any lock stops cpus from going down. */
-       spin_lock(&call_lock);
- 
-       cpu_clear(smp_processor_id(), mask);
- 
-       cpus = cpus_weight(mask);
-       if (!cpus) {
-               spin_unlock(&call_lock);
-               return 0;
-       }
- 
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
- 
-       data.func = func;
-       data.info = info;
-       atomic_set(&data.started, 0);
-       data.wait = wait;
-       if (wait)
-               atomic_set(&data.finished, 0);
- 
-       call_data = &data;
-       mb();                   /* write everything before IPI */
- 
-       /* Send a message to other CPUs and wait for them to respond */
-       xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
- 
-       /* Make sure other vcpus get a chance to run if they need to. */
-       yield = false;
-       for_each_cpu_mask(cpu, mask)
-               if (xen_vcpu_stolen(cpu))
-                       yield = true;
- 
-       if (yield)
-               HYPERVISOR_sched_op(SCHEDOP_yield, 0);
- 
-       /* Wait for response */
-       while (atomic_read(&data.started) != cpus ||
-              (wait && atomic_read(&data.finished) != cpus))
-               cpu_relax();
- 
-       spin_unlock(&call_lock);
+       irq_enter();
+       generic_smp_call_function_single_interrupt();
+       __get_cpu_var(irq_stat).irq_call_count++;
+       irq_exit();
   
-       return 0;
+       return IRQ_HANDLED;
   }
diff --combined arch/x86/xen/xen-ops.h

index d852ddbb3448bf61245f4c16ecd88b8f14a2c5aa,a636ab5e13411835e0de75fbcb43e1bdf96451e3..6f4b1045c1c20768015d1fc7987f9ee3a73b4475
--- 1/arch/x86/xen/xen-ops.h
--- 2/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@@ -9,35 -9,26 +9,35 @@@
   extern const char xen_hypervisor_callback[];
   extern const char xen_failsafe_callback[];
   
+ +struct trap_info;
   void xen_copy_trap_info(struct trap_info *traps);
   
   DECLARE_PER_CPU(unsigned long, xen_cr3);
   DECLARE_PER_CPU(unsigned long, xen_current_cr3);
   
   extern struct start_info *xen_start_info;
+ +extern struct shared_info xen_dummy_shared_info;
   extern struct shared_info *HYPERVISOR_shared_info;
   
+ +void xen_setup_mfn_list_list(void);
+ +void xen_setup_shared_info(void);
+ +
   char * __init xen_memory_setup(void);
   void __init xen_arch_setup(void);
   void __init xen_init_IRQ(void);
   void xen_enable_sysenter(void);
+ +void xen_vcpu_restore(void);
+ +
+ +void __init xen_build_dynamic_phys_to_machine(void);
   
   void xen_setup_timer(int cpu);
   void xen_setup_cpu_clockevents(void);
- -unsigned long xen_cpu_khz(void);
+ +unsigned long xen_tsc_khz(void);
   void __init xen_time_init(void);
   unsigned long xen_get_wallclock(void);
   int xen_set_wallclock(unsigned long time);
   unsigned long long xen_sched_clock(void);
+ +void xen_timer_resume(void);
   
   irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
   
@@@ -55,16 -46,9 +55,11 @@@ void xen_smp_cpus_done(unsigned int max
   
   void xen_smp_send_stop(void);
   void xen_smp_send_reschedule(int cpu);
- int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-                          int wait);
- int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-                                int nonatomic, int wait);
- 
- int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-                              void *info, int wait);
+ void xen_smp_send_call_function_ipi(cpumask_t mask);
+ void xen_smp_send_call_function_single_ipi(int cpu);
   
+ +extern cpumask_t xen_cpu_initialized_map;
+ +
   
   /* Declare an asm function, along with symbols needed to make it
      inlineable */
diff --combined fs/buffer.c

index 5fa1512cd9a210932426e109b5095bbd9529e704,5c23ef560d0189ea55f3975c044f89881c646505..d48caee12e2a4adaae056451f7dcef71205ccfa5
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -821,7 -821,7 +821,7 @@@ static int fsync_buffers_list(spinlock_
                                  * contents - it is a noop if I/O is still in
                                  * flight on potentially older contents.
                                  */
- -                              ll_rw_block(SWRITE, 1, &bh);
+ +                              ll_rw_block(SWRITE_SYNC, 1, &bh);
                                 brelse(bh);
                                 spin_lock(lock);
                         }
@@@ -1464,7 -1464,7 +1464,7 @@@ static void invalidate_bh_lru(void *arg
         
   void invalidate_bh_lrus(void)
   {
-       on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
+       on_each_cpu(invalidate_bh_lru, NULL, 1);
   }
   EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
   
@@@ -1691,13 -1691,11 +1691,13 @@@ static int __block_write_full_page(stru
                          */
                         clear_buffer_dirty(bh);
                         set_buffer_uptodate(bh);
- -              } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
+ +              } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
+ +                         buffer_dirty(bh)) {
                         WARN_ON(bh->b_size != blocksize);
                         err = get_block(inode, block, bh, 1);
                         if (err)
                                 goto recover;
+ +                      clear_buffer_delay(bh);
                         if (buffer_new(bh)) {
                                 /* blockdev mappings never come here */
                                 clear_buffer_new(bh);
@@@ -1776,8 -1774,7 +1776,8 @@@ recover
         bh = head;
         /* Recovery: lock and submit the mapped buffers */
         do {
- -              if (buffer_mapped(bh) && buffer_dirty(bh)) {
+ +              if (buffer_mapped(bh) && buffer_dirty(bh) &&
+ +                  !buffer_delay(bh)) {
                         lock_buffer(bh);
                         mark_buffer_async_write(bh);
                 } else {
@@@ -2064,7 -2061,6 +2064,7 @@@ int generic_write_end(struct file *file
                         struct page *page, void *fsdata)
   {
         struct inode *inode = mapping->host;
+ +      int i_size_changed = 0;
   
         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
   
@@@ -2077,21 -2073,12 +2077,21 @@@
          */
         if (pos+copied > inode->i_size) {
                 i_size_write(inode, pos+copied);
- -              mark_inode_dirty(inode);
+ +              i_size_changed = 1;
         }
   
         unlock_page(page);
         page_cache_release(page);
   
+ +      /*
+ +       * Don't mark the inode dirty under page lock. First, it unnecessarily
+ +       * makes the holding time of page lock longer. Second, it forces lock
+ +       * ordering of page lock and transaction start for journaling
+ +       * filesystems.
+ +       */
+ +      if (i_size_changed)
+ +              mark_inode_dirty(inode);
+ +
         return copied;
   }
   EXPORT_SYMBOL(generic_write_end);
@@@ -2953,19 -2940,16 +2953,19 @@@ void ll_rw_block(int rw, int nr, struc
         for (i = 0; i < nr; i++) {
                 struct buffer_head *bh = bhs[i];
   
- -              if (rw == SWRITE)
+ +              if (rw == SWRITE || rw == SWRITE_SYNC)
                         lock_buffer(bh);
                 else if (test_set_buffer_locked(bh))
                         continue;
   
- -              if (rw == WRITE || rw == SWRITE) {
+ +              if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
                         if (test_clear_buffer_dirty(bh)) {
                                 bh->b_end_io = end_buffer_write_sync;
                                 get_bh(bh);
- -                              submit_bh(WRITE, bh);
+ +                              if (rw == SWRITE_SYNC)
+ +                                      submit_bh(WRITE_SYNC, bh);
+ +                              else
+ +                                      submit_bh(WRITE, bh);
                                 continue;
                         }
                 } else {
@@@ -2994,7 -2978,7 +2994,7 @@@ int sync_dirty_buffer(struct buffer_hea
         if (test_clear_buffer_dirty(bh)) {
                 get_bh(bh);
                 bh->b_end_io = end_buffer_write_sync;
- -              ret = submit_bh(WRITE, bh);
+ +              ret = submit_bh(WRITE_SYNC, bh);
                 wait_on_buffer(bh);
                 if (buffer_eopnotsupp(bh)) {
                         clear_buffer_eopnotsupp(bh);
diff --combined include/asm-x86/hw_irq.h

index 18f067c310f7880be0f8629e45a91add4b380abb,bf025399d9392ed7910d34c8c0b7309c125f92e9..77ba51df56680fcd9e28b4529eb647c0eea07292
--- 1/include/asm-x86/hw_irq.h
--- 2/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@@ -1,106 -1,5 +1,107 @@@
+ +#ifndef _ASM_HW_IRQ_H
+ +#define _ASM_HW_IRQ_H
+ +
+ +/*
+ + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ + *
+ + * moved some of the old arch/i386/kernel/irq.h to here. VY
+ + *
+ + * IRQ/IPI changes taken from work by Thomas Radke
+ + * <tomsoft@informatik.tu-chemnitz.de>
+ + *
+ + * hacked by Andi Kleen for x86-64.
+ + * unified by tglx
+ + */
+ +
+ +#include <asm/irq_vectors.h>
+ +
+ +#ifndef __ASSEMBLY__
+ +
+ +#include <linux/percpu.h>
+ +#include <linux/profile.h>
+ +#include <linux/smp.h>
+ +
+ +#include <asm/atomic.h>
+ +#include <asm/irq.h>
+ +#include <asm/sections.h>
+ +
+ +#define platform_legacy_irq(irq)      ((irq) < 16)
+ +
+ +/* Interrupt handlers registered during init_IRQ */
+ +extern void apic_timer_interrupt(void);
+ +extern void error_interrupt(void);
+ +extern void spurious_interrupt(void);
+ +extern void thermal_interrupt(void);
+ +extern void reschedule_interrupt(void);
+ +
+ +extern void invalidate_interrupt(void);
+ +extern void invalidate_interrupt0(void);
+ +extern void invalidate_interrupt1(void);
+ +extern void invalidate_interrupt2(void);
+ +extern void invalidate_interrupt3(void);
+ +extern void invalidate_interrupt4(void);
+ +extern void invalidate_interrupt5(void);
+ +extern void invalidate_interrupt6(void);
+ +extern void invalidate_interrupt7(void);
+ +
+ +extern void irq_move_cleanup_interrupt(void);
+ +extern void threshold_interrupt(void);
+ +
+ +extern void call_function_interrupt(void);
++extern void call_function_single_interrupt(void);
+ +
+ +/* PIC specific functions */
+ +extern void disable_8259A_irq(unsigned int irq);
+ +extern void enable_8259A_irq(unsigned int irq);
+ +extern int i8259A_irq_pending(unsigned int irq);
+ +extern void make_8259A_irq(unsigned int irq);
+ +extern void init_8259A(int aeoi);
+ +
+ +/* IOAPIC */
+ +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+ +extern unsigned long io_apic_irqs;
+ +
+ +extern void init_VISWS_APIC_irqs(void);
+ +extern void setup_IO_APIC(void);
+ +extern void disable_IO_APIC(void);
+ +extern void print_IO_APIC(void);
+ +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+ +extern void setup_ioapic_dest(void);
+ +
+ +#ifdef CONFIG_X86_64
+ +extern void enable_IO_APIC(void);
+ +#endif
+ +
+ +/* IPI functions */
+ +extern void send_IPI_self(int vector);
+ +extern void send_IPI(int dest, int vector);
+ +
+ +/* Statistics */
+ +extern atomic_t irq_err_count;
+ +extern atomic_t irq_mis_count;
+ +
+ +/* EISA */
+ +extern void eisa_set_level_irq(unsigned int irq);
+ +
+ +/* Voyager functions */
+ +extern asmlinkage void vic_cpi_interrupt(void);
+ +extern asmlinkage void vic_sys_interrupt(void);
+ +extern asmlinkage void vic_cmn_interrupt(void);
+ +extern asmlinkage void qic_timer_interrupt(void);
+ +extern asmlinkage void qic_invalidate_interrupt(void);
+ +extern asmlinkage void qic_reschedule_interrupt(void);
+ +extern asmlinkage void qic_enable_irq_interrupt(void);
+ +extern asmlinkage void qic_call_function_interrupt(void);
+ +
   #ifdef CONFIG_X86_32
- -# include "hw_irq_32.h"
+ +extern void (*const interrupt[NR_IRQS])(void);
   #else
- -# include "hw_irq_64.h"
+ +typedef int vector_irq_t[NR_VECTORS];
+ +DECLARE_PER_CPU(vector_irq_t, vector_irq);
+ +extern spinlock_t vector_lock;
+ +#endif
+ +extern void setup_vector_irq(int cpu);
+ +
+ +#endif /* !ASSEMBLY_ */
+ +
   #endif
diff --combined include/asm-x86/irq_vectors.h

index 0ac864ef3cd4f112dd03c9e2b6b9600dbb094277,0000000000000000000000000000000000000000..90b1d1f12f08d652d39fef01e241ed321f38c415

mode 100644,000000..100644
--- 1/include/asm-x86/irq_vectors.h
--- /dev/null
+++ b/include/asm-x86/irq_vectors.h
@@@ -1,169 -1,0 +1,173 @@@
- #define VIC_END_FAKE_CPI              VIC_CALL_FUNCTION_CPI
+ +#ifndef _ASM_IRQ_VECTORS_H
+ +#define _ASM_IRQ_VECTORS_H
+ +
+ +#include <linux/threads.h>
+ +
+ +#define NMI_VECTOR            0x02
+ +
+ +/*
+ + * IDT vectors usable for external interrupt sources start
+ + * at 0x20:
+ + */
+ +#define FIRST_EXTERNAL_VECTOR 0x20
+ +
+ +#ifdef CONFIG_X86_32
+ +# define SYSCALL_VECTOR               0x80
+ +#else
+ +# define IA32_SYSCALL_VECTOR  0x80
+ +#endif
+ +
+ +/*
+ + * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+ + * cleanup after irq migration on 64 bit.
+ + */
+ +#define IRQ_MOVE_CLEANUP_VECTOR       FIRST_EXTERNAL_VECTOR
+ +
+ +/*
+ + * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
+ + * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
+ + */
+ +#ifdef CONFIG_X86_32
+ +#define IRQ0_VECTOR           (FIRST_EXTERNAL_VECTOR)
+ +#else
+ +#define IRQ0_VECTOR           (FIRST_EXTERNAL_VECTOR + 0x10)
+ +#endif
+ +#define IRQ1_VECTOR           (IRQ0_VECTOR + 1)
+ +#define IRQ2_VECTOR           (IRQ0_VECTOR + 2)
+ +#define IRQ3_VECTOR           (IRQ0_VECTOR + 3)
+ +#define IRQ4_VECTOR           (IRQ0_VECTOR + 4)
+ +#define IRQ5_VECTOR           (IRQ0_VECTOR + 5)
+ +#define IRQ6_VECTOR           (IRQ0_VECTOR + 6)
+ +#define IRQ7_VECTOR           (IRQ0_VECTOR + 7)
+ +#define IRQ8_VECTOR           (IRQ0_VECTOR + 8)
+ +#define IRQ9_VECTOR           (IRQ0_VECTOR + 9)
+ +#define IRQ10_VECTOR          (IRQ0_VECTOR + 10)
+ +#define IRQ11_VECTOR          (IRQ0_VECTOR + 11)
+ +#define IRQ12_VECTOR          (IRQ0_VECTOR + 12)
+ +#define IRQ13_VECTOR          (IRQ0_VECTOR + 13)
+ +#define IRQ14_VECTOR          (IRQ0_VECTOR + 14)
+ +#define IRQ15_VECTOR          (IRQ0_VECTOR + 15)
+ +
+ +/*
+ + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ + *
+ + *  some of the following vectors are 'rare', they are merged
+ + *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ + *  TLB, reschedule and local APIC vectors are performance-critical.
+ + *
+ + *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ + */
+ +#ifdef CONFIG_X86_32
+ +
+ +# define SPURIOUS_APIC_VECTOR         0xff
+ +# define ERROR_APIC_VECTOR            0xfe
+ +# define INVALIDATE_TLB_VECTOR                0xfd
+ +# define RESCHEDULE_VECTOR            0xfc
+ +# define CALL_FUNCTION_VECTOR         0xfb
++# define CALL_FUNCTION_SINGLE_VECTOR  0xfa
+ +# define THERMAL_APIC_VECTOR          0xf0
+ +
+ +#else
+ +
+ +#define SPURIOUS_APIC_VECTOR          0xff
+ +#define ERROR_APIC_VECTOR             0xfe
+ +#define RESCHEDULE_VECTOR             0xfd
+ +#define CALL_FUNCTION_VECTOR          0xfc
++#define CALL_FUNCTION_SINGLE_VECTOR   0xfb
+ +#define THERMAL_APIC_VECTOR           0xfa
+ +#define THRESHOLD_APIC_VECTOR         0xf9
+ +#define INVALIDATE_TLB_VECTOR_END     0xf7
+ +#define INVALIDATE_TLB_VECTOR_START   0xf0    /* f0-f7 used for TLB flush */
+ +
+ +#define NUM_INVALIDATE_TLB_VECTORS    8
+ +
+ +#endif
+ +
+ +/*
+ + * Local APIC timer IRQ vector is on a different priority level,
+ + * to work around the 'lost local interrupt if more than 2 IRQ
+ + * sources per level' errata.
+ + */
+ +#define LOCAL_TIMER_VECTOR    0xef
+ +
+ +/*
+ + * First APIC vector available to drivers: (vectors 0x30-0xee) we
+ + * start at 0x31(0x41) to spread out vectors evenly between priority
+ + * levels. (0x80 is the syscall vector)
+ + */
+ +#ifdef CONFIG_X86_32
+ +# define FIRST_DEVICE_VECTOR  0x31
+ +#else
+ +# define FIRST_DEVICE_VECTOR  (IRQ15_VECTOR + 2)
+ +#endif
+ +
+ +#define NR_VECTORS            256
+ +
+ +#define FPU_IRQ                       13
+ +
+ +#define       FIRST_VM86_IRQ          3
+ +#define LAST_VM86_IRQ         15
+ +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+ +
+ +#if !defined(CONFIG_X86_VOYAGER)
+ +
+ +# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
+ +
+ +#  define NR_IRQS             224
+ +
+ +#  if (224 >= 32 * NR_CPUS)
+ +#   define NR_IRQ_VECTORS     NR_IRQS
+ +#  else
+ +#   define NR_IRQ_VECTORS     (32 * NR_CPUS)
+ +#  endif
+ +
+ +# else /* IO_APIC || PARAVIRT */
+ +
+ +#  define NR_IRQS             16
+ +#  define NR_IRQ_VECTORS      NR_IRQS
+ +
+ +# endif
+ +
+ +#else /* !VISWS && !VOYAGER */
+ +
+ +# define NR_IRQS              224
+ +# define NR_IRQ_VECTORS               NR_IRQS
+ +
+ +#endif /* VISWS */
+ +
+ +/* Voyager specific defines */
+ +/* These define the CPIs we use in linux */
+ +#define VIC_CPI_LEVEL0                        0
+ +#define VIC_CPI_LEVEL1                        1
+ +/* now the fake CPIs */
+ +#define VIC_TIMER_CPI                 2
+ +#define VIC_INVALIDATE_CPI            3
+ +#define VIC_RESCHEDULE_CPI            4
+ +#define VIC_ENABLE_IRQ_CPI            5
+ +#define VIC_CALL_FUNCTION_CPI         6
++#define VIC_CALL_FUNCTION_SINGLE_CPI  7
+ +
+ +/* Now the QIC CPIs:  Since we don't need the two initial levels,
+ + * these are 2 less than the VIC CPIs */
+ +#define QIC_CPI_OFFSET                        1
+ +#define QIC_TIMER_CPI                 (VIC_TIMER_CPI - QIC_CPI_OFFSET)
+ +#define QIC_INVALIDATE_CPI            (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
+ +#define QIC_RESCHEDULE_CPI            (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
+ +#define QIC_ENABLE_IRQ_CPI            (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
+ +#define QIC_CALL_FUNCTION_CPI         (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
++#define QIC_CALL_FUNCTION_SINGLE_CPI  (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
+ +
+ +#define VIC_START_FAKE_CPI            VIC_TIMER_CPI
++#define VIC_END_FAKE_CPI              VIC_CALL_FUNCTION_SINGLE_CPI
+ +
+ +/* this is the SYS_INT CPI. */
+ +#define VIC_SYS_INT                   8
+ +#define VIC_CMN_INT                   15
+ +
+ +/* This is the boot CPI for alternate processors.  It gets overwritten
+ + * by the above once the system has activated all available processors */
+ +#define VIC_CPU_BOOT_CPI              VIC_CPI_LEVEL0
+ +#define VIC_CPU_BOOT_ERRATA_CPI               (VIC_CPI_LEVEL0 + 8)
+ +
+ +
+ +#endif /* _ASM_IRQ_VECTORS_H */
diff --combined include/asm-x86/smp.h

index 2e221f1ce0b21a4ed5cc8c715332455496f34b6a,e3c24807b59b114928b110373a88e952eb031e2b..c2784b3e0b77e23269a1c61407aaeb2db39cdc9d
--- 1/include/asm-x86/smp.h
--- 2/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@@ -29,12 -29,21 +29,12 @@@ extern int smp_num_siblings
   extern unsigned int num_processors;
   extern cpumask_t cpu_initialized;
   
- -#ifdef CONFIG_SMP
- -extern u16 x86_cpu_to_apicid_init[];
- -extern u16 x86_bios_cpu_apicid_init[];
- -extern void *x86_cpu_to_apicid_early_ptr;
- -extern void *x86_bios_cpu_apicid_early_ptr;
- -#else
- -#define x86_cpu_to_apicid_early_ptr NULL
- -#define x86_bios_cpu_apicid_early_ptr NULL
- -#endif
- -
   DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
   DECLARE_PER_CPU(cpumask_t, cpu_core_map);
   DECLARE_PER_CPU(u16, cpu_llc_id);
- -DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
- -DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
+ +
+ +DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
+ +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
   
   /* Static state in head.S used to set up a CPU */
   extern struct {
@@@ -50,9 -59,9 +50,9 @@@ struct smp_ops 
   
         void (*smp_send_stop)(void);
         void (*smp_send_reschedule)(int cpu);
-       int (*smp_call_function_mask)(cpumask_t mask,
-                                     void (*func)(void *info), void *info,
-                                     int wait);
+ 
+       void (*send_call_func_ipi)(cpumask_t mask);
+       void (*send_call_func_single_ipi)(int cpu);
   };
   
   /* Globals due to paravirt */
@@@ -94,21 -103,28 +94,26 @@@ static inline void smp_send_reschedule(
         smp_ops.smp_send_reschedule(cpu);
   }
   
- static inline int smp_call_function_mask(cpumask_t mask,
-                                        void (*func) (void *info), void *info,
-                                        int wait)
+ static inline void arch_send_call_function_single_ipi(int cpu)
+ {
+       smp_ops.send_call_func_single_ipi(cpu);
+ }
+ 
+ static inline void arch_send_call_function_ipi(cpumask_t mask)
   {
-       return smp_ops.smp_call_function_mask(mask, func, info, wait);
+       smp_ops.send_call_func_ipi(mask);
   }
   
   void native_smp_prepare_boot_cpu(void);
   void native_smp_prepare_cpus(unsigned int max_cpus);
   void native_smp_cpus_done(unsigned int max_cpus);
   int native_cpu_up(unsigned int cpunum);
+ void native_send_call_func_ipi(cpumask_t mask);
+ void native_send_call_func_single_ipi(int cpu);
   
   extern int __cpu_disable(void);
   extern void __cpu_die(unsigned int cpu);
   
- -extern void prefill_possible_map(void);
- -
   void smp_store_cpu_info(int id);
   #define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
   
@@@ -119,14 -135,6 +124,14 @@@ static inline int num_booting_cpus(void
   }
   #endif /* CONFIG_SMP */
   
+ +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_CPU)
+ +extern void prefill_possible_map(void);
+ +#else
+ +static inline void prefill_possible_map(void)
+ +{
+ +}
+ +#endif
+ +
   extern unsigned disabled_cpus __cpuinitdata;
   
   #ifdef CONFIG_X86_32_SMP
@@@ -194,10 -202,10 +199,8 @@@ static inline int hard_smp_processor_id
   #endif /* CONFIG_X86_LOCAL_APIC */
   
   #ifdef CONFIG_HOTPLUG_CPU
- -extern void cpu_exit_clear(void);
   extern void cpu_uninit(void);
   #endif
   
- extern void lock_ipi_call_lock(void);
- extern void unlock_ipi_call_lock(void);
- -extern void smp_alloc_memory(void);
   #endif /* __ASSEMBLY__ */
   #endif
diff --combined kernel/Makefile

index f6328e16dfdde5749b05279e503f2172a2831544,9fa57976f252fe5604047f39981b0dbf90be357a..0a7ed838984b034a0fcb843dc765f6941c769e72
--- 1/kernel/Makefile
--- 2/kernel/Makefile
+++ b/kernel/Makefile
@@@ -3,7 -3,7 +3,7 @@@
   #
   
   obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
- -          exit.o itimer.o time.o softirq.o resource.o \
+ +          cpu.o exit.o itimer.o time.o softirq.o resource.o \
             sysctl.o capability.o ptrace.o timer.o user.o \
             signal.o sys.o kmod.o workqueue.o pid.o \
             rcupdate.o extable.o params.o posix-timers.o \
@@@ -11,18 -11,6 +11,18 @@@
             hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
             notifier.o ksysfs.o pm_qos_params.o sched_clock.o
   
+ +CFLAGS_REMOVE_sched.o = -mno-spe
+ +
+ +ifdef CONFIG_FTRACE
+ +# Do not trace debug files and internal ftrace files
+ +CFLAGS_REMOVE_lockdep.o = -pg
+ +CFLAGS_REMOVE_lockdep_proc.o = -pg
+ +CFLAGS_REMOVE_mutex-debug.o = -pg
+ +CFLAGS_REMOVE_rtmutex-debug.o = -pg
+ +CFLAGS_REMOVE_cgroup-debug.o = -pg
+ +CFLAGS_REMOVE_sched_clock.o = -pg
+ +endif
+ +
   obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
   obj-$(CONFIG_STACKTRACE) += stacktrace.o
   obj-y += time/
@@@ -39,7 -27,8 +39,8 @@@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.
   obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
   obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
   obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
- -obj-$(CONFIG_SMP) += cpu.o spinlock.o
+ obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+ +obj-$(CONFIG_SMP) += spinlock.o
   obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
   obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
   obj-$(CONFIG_UID16) += uid16.o
@@@ -81,9 -70,6 +82,9 @@@ obj-$(CONFIG_TASK_DELAY_ACCT) += delaya
   obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
   obj-$(CONFIG_MARKERS) += marker.o
   obj-$(CONFIG_LATENCYTOP) += latencytop.o
+ +obj-$(CONFIG_FTRACE) += trace/
+ +obj-$(CONFIG_TRACING) += trace/
+ +obj-$(CONFIG_SMP) += sched_cpupri.o
   
   ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
   # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --combined kernel/hrtimer.c

index 2913a8bff612e8d571c618d88c4fd77a222006c3,50e8616d7955cdfd476bf611f6be80a30967e3ae..b8e4dce80a748dc06c94ed6cb43225903c7f99c1
--- 1/kernel/hrtimer.c
--- 2/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@@ -300,10 -300,11 +300,10 @@@ EXPORT_SYMBOL_GPL(ktime_sub_ns)
    */
   u64 ktime_divns(const ktime_t kt, s64 div)
   {
- -      u64 dclc, inc, dns;
+ +      u64 dclc;
         int sft = 0;
   
- -      dclc = dns = ktime_to_ns(kt);
- -      inc = div;
+ +      dclc = ktime_to_ns(kt);
         /* Make sure the divisor is less than 2^32: */
         while (div >> 32) {
                 sft++;
@@@ -622,7 -623,7 +622,7 @@@ static void retrigger_next_event(void *
   void clock_was_set(void)
   {
         /* Retrigger the CPU local events everywhere */
-       on_each_cpu(retrigger_next_event, NULL, 0, 1);
+       on_each_cpu(retrigger_next_event, NULL, 1);
   }
   
   /*
@@@ -631,6 -632,8 +631,6 @@@
    */
   void hres_timers_resume(void)
   {
- -      WARN_ON_ONCE(num_online_cpus() > 1);
- -
         /* Retrigger the CPU local events: */
         retrigger_next_event(NULL);
   }
@@@ -1000,18 -1003,10 +1000,18 @@@ hrtimer_start(struct hrtimer *timer, kt
          */
         raise = timer->state == HRTIMER_STATE_PENDING;
   
+ +      /*
+ +       * We use preempt_disable to prevent this task from migrating after
+ +       * setting up the softirq and raising it. Otherwise, if me migrate
+ +       * we will raise the softirq on the wrong CPU.
+ +       */
+ +      preempt_disable();
+ +
         unlock_hrtimer_base(timer, &flags);
   
         if (raise)
                 hrtimer_raise_softirq();
+ +      preempt_enable();
   
         return ret;
   }
@@@ -1083,7 -1078,7 +1083,7 @@@ ktime_t hrtimer_get_remaining(const str
   }
   EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
   
- -#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
+ +#ifdef CONFIG_NO_HZ
   /**
    * hrtimer_get_next_event - get the time until next expiry event
    *
@@@ -1674,7 -1669,7 +1674,7 @@@ void __init hrtimers_init(void
                           (void *)(long)smp_processor_id());
         register_cpu_notifier(&hrtimers_nb);
   #ifdef CONFIG_HIGH_RES_TIMERS
- -      open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
+ +      open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
   #endif
   }
   
diff --combined kernel/softirq.c

index 3e9e896fdc5baf28c448245118a6ce9cf9de02cb,c159fd094772d7e67c1482835de520052f8946fe..81e2fe0f983a04d1407a77e214b8f0612feedf9a
--- 1/kernel/softirq.c
--- 2/kernel/softirq.c
+++ b/kernel/softirq.c
@@@ -131,17 -131,23 +131,17 @@@ void _local_bh_enable(void
   
   EXPORT_SYMBOL(_local_bh_enable);
   
- -void local_bh_enable(void)
+ +static inline void _local_bh_enable_ip(unsigned long ip)
   {
+ +      WARN_ON_ONCE(in_irq() || irqs_disabled());
   #ifdef CONFIG_TRACE_IRQFLAGS
- -      unsigned long flags;
- -
- -      WARN_ON_ONCE(in_irq());
- -#endif
- -      WARN_ON_ONCE(irqs_disabled());
- -
- -#ifdef CONFIG_TRACE_IRQFLAGS
- -      local_irq_save(flags);
+ +      local_irq_disable();
   #endif
         /*
          * Are softirqs going to be turned on now:
          */
         if (softirq_count() == SOFTIRQ_OFFSET)
- -              trace_softirqs_on((unsigned long)__builtin_return_address(0));
+ +              trace_softirqs_on(ip);
         /*
          * Keep preemption disabled until we are done with
          * softirq processing:
@@@ -153,20 -159,40 +153,20 @@@
   
         dec_preempt_count();
   #ifdef CONFIG_TRACE_IRQFLAGS
- -      local_irq_restore(flags);
+ +      local_irq_enable();
   #endif
         preempt_check_resched();
   }
+ +
+ +void local_bh_enable(void)
+ +{
+ +      _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+ +}
   EXPORT_SYMBOL(local_bh_enable);
   
   void local_bh_enable_ip(unsigned long ip)
   {
- -#ifdef CONFIG_TRACE_IRQFLAGS
- -      unsigned long flags;
- -
- -      WARN_ON_ONCE(in_irq());
- -
- -      local_irq_save(flags);
- -#endif
- -      /*
- -       * Are softirqs going to be turned on now:
- -       */
- -      if (softirq_count() == SOFTIRQ_OFFSET)
- -              trace_softirqs_on(ip);
- -      /*
- -       * Keep preemption disabled until we are done with
- -       * softirq processing:
- -       */
- -      sub_preempt_count(SOFTIRQ_OFFSET - 1);
- -
- -      if (unlikely(!in_interrupt() && local_softirq_pending()))
- -              do_softirq();
- -
- -      dec_preempt_count();
- -#ifdef CONFIG_TRACE_IRQFLAGS
- -      local_irq_restore(flags);
- -#endif
- -      preempt_check_resched();
+ +      _local_bh_enable_ip(ip);
   }
   EXPORT_SYMBOL(local_bh_enable_ip);
   
@@@ -321,8 -347,9 +321,8 @@@ void raise_softirq(unsigned int nr
         local_irq_restore(flags);
   }
   
- -void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+ +void open_softirq(int nr, void (*action)(struct softirq_action *))
   {
- -      softirq_vec[nr].data = data;
         softirq_vec[nr].action = action;
   }
   
@@@ -333,8 -360,10 +333,8 @@@ struct tasklet_hea
         struct tasklet_struct **tail;
   };
   
- -/* Some compilers disobey section attribute on statics when not
- -   initialized -- RR */
- -static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
- -static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
+ +static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
+ +static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
   
   void __tasklet_schedule(struct tasklet_struct *t)
   {
@@@ -474,8 -503,8 +474,8 @@@ void __init softirq_init(void
                         &per_cpu(tasklet_hi_vec, cpu).head;
         }
   
- -      open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
- -      open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+ +      open_softirq(TASKLET_SOFTIRQ, tasklet_action);
+ +      open_softirq(HI_SOFTIRQ, tasklet_hi_action);
   }
   
   static int ksoftirqd(void * __bind_cpu)
@@@ -616,7 -645,7 +616,7 @@@ static int __cpuinit cpu_callback(struc
   
                 p = per_cpu(ksoftirqd, hotcpu);
                 per_cpu(ksoftirqd, hotcpu) = NULL;
- -              sched_setscheduler(p, SCHED_FIFO, &param);
+ +              sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
                 kthread_stop(p);
                 takeover_tasklets(hotcpu);
                 break;
@@@ -645,12 -674,12 +645,12 @@@ __init int spawn_ksoftirqd(void
   /*
    * Call a function on all processors
    */
- int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+ int on_each_cpu(void (*func) (void *info), void *info, int wait)
   {
         int ret = 0;
   
         preempt_disable();
-       ret = smp_call_function(func, info, retry, wait);
+       ret = smp_call_function(func, info, wait);
         local_irq_disable();
         func(info);
         local_irq_enable();
diff --combined kernel/time/tick-broadcast.c

index 67f80c2617096454a1d0b1f520825034f7c87dfc,75e718539dcb062eaa811d3f7eb33d1d7d415321..f48d0f09d32f9c8763190a766f2f23a9fef9e923
--- 1/kernel/time/tick-broadcast.c
--- 2/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@@ -30,7 -30,6 +30,7 @@@
   struct tick_device tick_broadcast_device;
   static cpumask_t tick_broadcast_mask;
   static DEFINE_SPINLOCK(tick_broadcast_lock);
+ +static int tick_broadcast_force;
   
   #ifdef CONFIG_TICK_ONESHOT
   static void tick_broadcast_clear_oneshot(int cpu);
@@@ -233,11 -232,10 +233,11 @@@ static void tick_do_broadcast_on_off(vo
                                                      CLOCK_EVT_MODE_SHUTDOWN);
                 }
                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
- -                      dev->features |= CLOCK_EVT_FEAT_DUMMY;
+ +                      tick_broadcast_force = 1;
                 break;
         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
- -              if (cpu_isset(cpu, tick_broadcast_mask)) {
+ +              if (!tick_broadcast_force &&
+ +                  cpu_isset(cpu, tick_broadcast_mask)) {
                         cpu_clear(cpu, tick_broadcast_mask);
                         if (td->mode == TICKDEV_MODE_PERIODIC)
                                 tick_setup_periodic(dev, 0);
@@@ -268,7 -266,7 +268,7 @@@ void tick_broadcast_on_off(unsigned lon
                        "offline CPU #%d\n", *oncpu);
         else
                 smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
-                                        &reason, 1, 1);
+                                        &reason, 1);
   }
   
   /*
diff --combined mm/page_alloc.c

index f024b9b3a2a6d1ea9d455a1e0e019391e57a606d,53242344a774f1c4e2db14a7bc12477523a9d67c..79ac4afc908cff9ed4b7220e4fd9851955d73783
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -918,7 -918,7 +918,7 @@@ void drain_local_pages(void *arg
    */
   void drain_all_pages(void)
   {
-       on_each_cpu(drain_local_pages, NULL, 0, 1);
+       on_each_cpu(drain_local_pages, NULL, 1);
   }
   
   #ifdef CONFIG_HIBERNATION
@@@ -2328,6 -2328,7 +2328,6 @@@ static void build_zonelists(pg_data_t *
   static void build_zonelist_cache(pg_data_t *pgdat)
   {
         pgdat->node_zonelists[0].zlcache_ptr = NULL;
- -      pgdat->node_zonelists[1].zlcache_ptr = NULL;
   }
   
   #endif        /* CONFIG_NUMA */
@@@ -2929,18 -2930,6 +2929,18 @@@ void __init free_bootmem_with_active_re
         }
   }
   
+ +void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
+ +{
+ +      int i;
+ +      int ret;
+ +
+ +      for_each_active_range_index_in_nid(i, nid) {
+ +              ret = work_fn(early_node_map[i].start_pfn,
+ +                            early_node_map[i].end_pfn, data);
+ +              if (ret)
+ +                      break;
+ +      }
+ +}
   /**
    * sparse_memory_present_with_active_regions - Call memory_present for each active range
    * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@@ -3473,11 -3462,6 +3473,11 @@@ void __paginginit free_area_init_node(i
         calculate_node_totalpages(pgdat, zones_size, zholes_size);
   
         alloc_node_mem_map(pgdat);
+ +#ifdef CONFIG_FLAT_NODE_MEM_MAP
+ +      printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
+ +              nid, (unsigned long)pgdat,
+ +              (unsigned long)pgdat->node_mem_map);
+ +#endif
   
         free_area_init_core(pgdat, zones_size, zholes_size);
   }
@@@ -3520,7 -3504,7 +3520,7 @@@ void __init add_active_range(unsigned i
   {
         int i;
   
- -      printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) "
+ +      printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) "
                           "%d entries of %d used\n",
                           nid, start_pfn, end_pfn,
                           nr_nodemap_entries, MAX_ACTIVE_REGIONS);
@@@ -3564,68 -3548,27 +3564,68 @@@
   }
   
   /**
- - * shrink_active_range - Shrink an existing registered range of PFNs
+ + * remove_active_range - Shrink an existing registered range of PFNs
    * @nid: The node id the range is on that should be shrunk
- - * @old_end_pfn: The old end PFN of the range
- - * @new_end_pfn: The new PFN of the range
+ + * @start_pfn: The new PFN of the range
+ + * @end_pfn: The new PFN of the range
    *
    * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- - * The map is kept at the end physical page range that has already been
- - * registered with add_active_range(). This function allows an arch to shrink
- - * an existing registered range.
+ + * The map is kept near the end physical page range that has already been
+ + * registered. This function allows an arch to shrink an existing registered
+ + * range.
    */
- -void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
- -                                              unsigned long new_end_pfn)
+ +void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
+ +                              unsigned long end_pfn)
   {
- -      int i;
+ +      int i, j;
+ +      int removed = 0;
+ +
+ +      printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
+ +                        nid, start_pfn, end_pfn);
   
         /* Find the old active region end and shrink */
- -      for_each_active_range_index_in_nid(i, nid)
- -              if (early_node_map[i].end_pfn == old_end_pfn) {
- -                      early_node_map[i].end_pfn = new_end_pfn;
- -                      break;
+ +      for_each_active_range_index_in_nid(i, nid) {
+ +              if (early_node_map[i].start_pfn >= start_pfn &&
+ +                  early_node_map[i].end_pfn <= end_pfn) {
+ +                      /* clear it */
+ +                      early_node_map[i].start_pfn = 0;
+ +                      early_node_map[i].end_pfn = 0;
+ +                      removed = 1;
+ +                      continue;
+ +              }
+ +              if (early_node_map[i].start_pfn < start_pfn &&
+ +                  early_node_map[i].end_pfn > start_pfn) {
+ +                      unsigned long temp_end_pfn = early_node_map[i].end_pfn;
+ +                      early_node_map[i].end_pfn = start_pfn;
+ +                      if (temp_end_pfn > end_pfn)
+ +                              add_active_range(nid, end_pfn, temp_end_pfn);
+ +                      continue;
                 }
+ +              if (early_node_map[i].start_pfn >= start_pfn &&
+ +                  early_node_map[i].end_pfn > end_pfn &&
+ +                  early_node_map[i].start_pfn < end_pfn) {
+ +                      early_node_map[i].start_pfn = end_pfn;
+ +                      continue;
+ +              }
+ +      }
+ +
+ +      if (!removed)
+ +              return;
+ +
+ +      /* remove the blank ones */
+ +      for (i = nr_nodemap_entries - 1; i > 0; i--) {
+ +              if (early_node_map[i].nid != nid)
+ +                      continue;
+ +              if (early_node_map[i].end_pfn)
+ +                      continue;
+ +              /* we found it, get rid of it */
+ +              for (j = i; j < nr_nodemap_entries - 1; j++)
+ +                      memcpy(&early_node_map[j], &early_node_map[j+1],
+ +                              sizeof(early_node_map[j]));
+ +              j = nr_nodemap_entries - 1;
+ +              memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
+ +              nr_nodemap_entries--;
+ +      }
   }
   
   /**
@@@ -3669,7 -3612,7 +3669,7 @@@ static void __init sort_node_map(void
   }
   
   /* Find the lowest pfn for a node */
- -unsigned long __init find_min_pfn_for_node(unsigned long nid)
+ +unsigned long __init find_min_pfn_for_node(int nid)
   {
         int i;
         unsigned long min_pfn = ULONG_MAX;
@@@ -3680,7 -3623,7 +3680,7 @@@
   
         if (min_pfn == ULONG_MAX) {
                 printk(KERN_WARNING
- -                      "Could not find start_pfn for node %lu\n", nid);
+ +                      "Could not find start_pfn for node %d\n", nid);
                 return 0;
         }
   
@@@ -3936,7 -3879,7 +3936,7 @@@ void __init free_area_init_nodes(unsign
         for (i = 0; i < MAX_NR_ZONES; i++) {
                 if (i == ZONE_MOVABLE)
                         continue;
- -              printk("  %-8s %8lu -> %8lu\n",
+ +              printk("  %-8s %0#10lx -> %0#10lx\n",
                                 zone_names[i],
                                 arch_zone_lowest_possible_pfn[i],
                                 arch_zone_highest_possible_pfn[i]);
@@@ -3952,7 -3895,7 +3952,7 @@@
         /* Print out the early_node_map[] */
         printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
         for (i = 0; i < nr_nodemap_entries; i++)
- -              printk("  %3d: %8lu -> %8lu\n", early_node_map[i].nid,
+ +              printk("  %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
                                                 early_node_map[i].start_pfn,
                                                 early_node_map[i].end_pfn);
   
diff --combined mm/slab.c

index b4aa4c88250ea8db315fccc099ee5b99321b90b4,0772abb412b904a383517723e1006e50e047d033..052e7d64537eb61e71c780ff4a6ca653dff0750e
--- 1/mm/slab.c
--- 2/mm/slab.c
+++ b/mm/slab.c
@@@ -1901,7 -1901,15 +1901,7 @@@ static void check_poison_obj(struct kme
   #endif
   
   #if DEBUG
- -/**
- - * slab_destroy_objs - destroy a slab and its objects
- - * @cachep: cache pointer being destroyed
- - * @slabp: slab pointer being destroyed
- - *
- - * Call the registered destructor for each object in a slab that is being
- - * destroyed.
- - */
- -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
+ +static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
   {
         int i;
         for (i = 0; i < cachep->num; i++) {
@@@ -1930,7 -1938,7 +1930,7 @@@
         }
   }
   #else
- -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
+ +static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
   {
   }
   #endif
@@@ -1948,7 -1956,7 +1948,7 @@@ static void slab_destroy(struct kmem_ca
   {
         void *addr = slabp->s_mem - slabp->colouroff;
   
- -      slab_destroy_objs(cachep, slabp);
+ +      slab_destroy_debugcheck(cachep, slabp);
         if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
                 struct slab_rcu *slab_rcu;
   
@@@ -2446,7 -2454,7 +2446,7 @@@ static void drain_cpu_caches(struct kme
         struct kmem_list3 *l3;
         int node;
   
-       on_each_cpu(do_drain, cachep, 1, 1);
+       on_each_cpu(do_drain, cachep, 1);
         check_irq_on();
         for_each_online_node(node) {
                 l3 = cachep->nodelists[node];
@@@ -3931,7 -3939,7 +3931,7 @@@ static int do_tune_cpucache(struct kmem
         }
         new->cachep = cachep;
   
-       on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
+       on_each_cpu(do_ccupdate_local, (void *)new, 1);
   
         check_irq_on();
         cachep->batchcount = batchcount;
diff --combined mm/slub.c

index 488400d107007bfc60b63050cf8b2969d4e9f952,44715eb70c06a540694793313c6947bd172eaf5f..35ab38a94b46279dc018c14841452f533313702b
--- 1/mm/slub.c
--- 2/mm/slub.c
+++ b/mm/slub.c
@@@ -5,7 -5,7 +5,7 @@@
    * The allocator synchronizes using per slab locks and only
    * uses a centralized lock to manage a pool of partial slabs.
    *
- - * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>
+ + * (C) 2007 SGI, Christoph Lameter
    */
   
   #include <linux/mm.h>
@@@ -411,7 -411,7 +411,7 @@@ static void set_track(struct kmem_cach
         if (addr) {
                 p->addr = addr;
                 p->cpu = smp_processor_id();
- -              p->pid = current ? current->pid : -1;
+ +              p->pid = current->pid;
                 p->when = jiffies;
         } else
                 memset(p, 0, sizeof(struct track));
@@@ -431,8 -431,9 +431,8 @@@ static void print_track(const char *s, 
         if (!t->addr)
                 return;
   
- -      printk(KERN_ERR "INFO: %s in ", s);
- -      __print_symbol("%s", (unsigned long)t->addr);
- -      printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid);
+ +      printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
+ +              s, t->addr, jiffies - t->when, t->cpu, t->pid);
   }
   
   static void print_tracking(struct kmem_cache *s, void *object)
@@@ -1496,7 -1497,7 +1496,7 @@@ static void flush_cpu_slab(void *d
   static void flush_all(struct kmem_cache *s)
   {
   #ifdef CONFIG_SMP
-       on_each_cpu(flush_cpu_slab, s, 1, 1);
+       on_each_cpu(flush_cpu_slab, s, 1);
   #else
         unsigned long flags;
   
@@@ -1627,11 -1628,9 +1627,11 @@@ static __always_inline void *slab_alloc
         void **object;
         struct kmem_cache_cpu *c;
         unsigned long flags;
+ +      unsigned int objsize;
   
         local_irq_save(flags);
         c = get_cpu_slab(s, smp_processor_id());
+ +      objsize = c->objsize;
         if (unlikely(!c->freelist || !node_match(c, node)))
   
                 object = __slab_alloc(s, gfpflags, node, addr, c);
@@@ -1644,7 -1643,7 +1644,7 @@@
         local_irq_restore(flags);
   
         if (unlikely((gfpflags & __GFP_ZERO) && object))
- -              memset(object, 0, c->objsize);
+ +              memset(object, 0, objsize);
   
         return object;
   }
@@@ -2766,7 -2765,6 +2766,7 @@@ void kfree(const void *x
   
         page = virt_to_head_page(x);
         if (unlikely(!PageSlab(page))) {
+ +              BUG_ON(!PageCompound(page));
                 put_page(page);
                 return;
         }
@@@ -2997,6 -2995,8 +2997,6 @@@ void __init kmem_cache_init(void
                 create_kmalloc_cache(&kmalloc_caches[1],
                                 "kmalloc-96", 96, GFP_KERNEL);
                 caches++;
- -      }
- -      if (KMALLOC_MIN_SIZE <= 128) {
                 create_kmalloc_cache(&kmalloc_caches[2],
                                 "kmalloc-192", 192, GFP_KERNEL);
                 caches++;
@@@ -3026,16 -3026,6 +3026,16 @@@
         for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
                 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
   
+ +      if (KMALLOC_MIN_SIZE == 128) {
+ +              /*
+ +               * The 192 byte sized cache is not used if the alignment
+ +               * is 128 byte. Redirect kmalloc to use the 256 byte cache
+ +               * instead.
+ +               */
+ +              for (i = 128 + 8; i <= 192; i += 8)
+ +                      size_index[(i - 1) / 8] = 8;
+ +      }
+ +
         slab_state = UP;
   
         /* Provide the correct kmalloc names now that the caches are up */
diff --combined net/iucv/iucv.c

index 7f82b7616212eeaa6ab04fab033510ba9b39fb8f,a178e27e7b1ae5fffaf85c330eb93a6d4e3e34e9..cc34ac769a3c9d3eb19bf6c1f52828d6e961725d
--- 1/net/iucv/iucv.c
--- 2/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@@ -480,7 -480,7 +480,7 @@@ static void iucv_setmask_mp(void
                 if (cpu_isset(cpu, iucv_buffer_cpumask) &&
                     !cpu_isset(cpu, iucv_irq_cpumask))
                         smp_call_function_single(cpu, iucv_allow_cpu,
-                                                NULL, 0, 1);
+                                                NULL, 1);
         preempt_enable();
   }
   
@@@ -498,7 -498,7 +498,7 @@@ static void iucv_setmask_up(void
         cpumask = iucv_irq_cpumask;
         cpu_clear(first_cpu(iucv_irq_cpumask), cpumask);
         for_each_cpu_mask(cpu, cpumask)
-               smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1);
+               smp_call_function_single(cpu, iucv_block_cpu, NULL, 1);
   }
   
   /**
@@@ -523,7 -523,7 +523,7 @@@ static int iucv_enable(void
         rc = -EIO;
         preempt_disable();
         for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1);
+               smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
         preempt_enable();
         if (cpus_empty(iucv_buffer_cpumask))
                 /* No cpu could declare an iucv buffer. */
@@@ -545,7 -545,7 +545,7 @@@ out
    */
   static void iucv_disable(void)
   {
-       on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1);
+       on_each_cpu(iucv_retrieve_cpu, NULL, 1);
         kfree(iucv_path_table);
   }
   
@@@ -580,7 -580,7 +580,7 @@@ static int __cpuinit iucv_cpu_notify(st
         case CPU_ONLINE_FROZEN:
         case CPU_DOWN_FAILED:
         case CPU_DOWN_FAILED_FROZEN:
-               smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1);
+               smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
                 break;
         case CPU_DOWN_PREPARE:
         case CPU_DOWN_PREPARE_FROZEN:
@@@ -589,10 -589,10 +589,10 @@@
                 if (cpus_empty(cpumask))
                         /* Can't offline last IUCV enabled cpu. */
                         return NOTIFY_BAD;
-               smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 0, 1);
+               smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
                 if (cpus_empty(iucv_irq_cpumask))
                         smp_call_function_single(first_cpu(iucv_buffer_cpumask),
-                                                iucv_allow_cpu, NULL, 0, 1);
+                                                iucv_allow_cpu, NULL, 1);
                 break;
         }
         return NOTIFY_OK;
@@@ -652,7 -652,7 +652,7 @@@ static void iucv_cleanup_queue(void
          * pending interrupts force them to the work queue by calling
          * an empty function on all cpus.
          */
-       smp_call_function(__iucv_cleanup_queue, NULL, 0, 1);
+       smp_call_function(__iucv_cleanup_queue, NULL, 1);
         spin_lock_irq(&iucv_queue_lock);
         list_for_each_entry_safe(p, n, &iucv_task_queue, list) {
                 /* Remove stale work items from the task queue. */
@@@ -1559,11 -1559,16 +1559,11 @@@ static void iucv_external_interrupt(u1
   
         p = iucv_irq_data[smp_processor_id()];
         if (p->ippathid >= iucv_max_pathid) {
- -              printk(KERN_WARNING "iucv_do_int: Got interrupt with "
- -                     "pathid %d > max_connections (%ld)\n",
- -                     p->ippathid, iucv_max_pathid - 1);
+ +              WARN_ON(p->ippathid >= iucv_max_pathid);
                 iucv_sever_pathid(p->ippathid, iucv_error_no_listener);
                 return;
         }
- -      if (p->iptype  < 0x01 || p->iptype > 0x09) {
- -              printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
- -              return;
- -      }
+ +      BUG_ON(p->iptype  < 0x01 || p->iptype > 0x09);
         work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
         if (!work) {
                 printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
author	Ingo Molnar <mingo@elte.hu>
	Tue, 15 Jul 2008 19:55:59 +0000 (21:55 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Tue, 15 Jul 2008 19:55:59 +0000 (21:55 +0200)
		1	2
arch/arm/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/appldata/appldata_base.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/mcheck/mce_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/mtrr/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perfctr-watchdog.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/io_apic_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/io_apic_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irqinit_64.c	patch \|	diff1 \|	\|	blob \| history
arch/x86/kernel/ldt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/nmi.c	patch \|	diff1 \|	\|	blob \| history
arch/x86/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/smpboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tlb_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/vsyscall_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mach-voyager/voyager_smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/pageattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/oprofile/nmi_int.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/enlighten.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/xen-ops.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-x86/hw_irq.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-x86/irq_vectors.h	patch \|	diff1 \|	\|	blob \| history
include/asm-x86/smp.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/hrtimer.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/softirq.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-broadcast.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slab.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slub.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/iucv/iucv.c	patch \|	diff1 \|	diff2 \|	blob \| history