Merge branch 'core/percpu' into perfcounters/core

author Ingo Molnar <mingo@elte.hu>

Sun, 18 Jan 2009 17:15:49 +0000 (18:15 +0100)

committer Ingo Molnar <mingo@elte.hu>

Sun, 18 Jan 2009 17:15:49 +0000 (18:15 +0100)
author Ingo Molnar <mingo@elte.hu>
Sun, 18 Jan 2009 17:15:49 +0000 (18:15 +0100)
committer Ingo Molnar <mingo@elte.hu>
Sun, 18 Jan 2009 17:15:49 +0000 (18:15 +0100)
diff --combined arch/powerpc/kernel/irq.c

index f5ae4878ccef0169bd910b889600a7faa6807258,ad1e5ac721d86f557bac20079b407c8929459273..7f8e6a92c5a1b3470c234c069c1291bd13e5e731
--- 1/arch/powerpc/kernel/irq.c
--- 2/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@@ -104,13 -104,6 +104,13 @@@ static inline notrace void set_soft_ena
         : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
   }
   
+ +#ifdef CONFIG_PERF_COUNTERS
+ +notrace void __weak perf_counter_do_pending(void)
+ +{
+ +      set_perf_counter_pending(0);
+ +}
+ +#endif
+ +
   notrace void raw_local_irq_restore(unsigned long en)
   {
         /*
@@@ -142,9 -135,6 +142,9 @@@
                         iseries_handle_interrupts();
         }
   
+ +      if (get_perf_counter_pending())
+ +              perf_counter_do_pending();
+ +
         /*
          * if (get_paca()->hard_enabled) return;
          * But again we need to take care that gcc gets hard_enabled directly
@@@ -241,7 -231,7 +241,7 @@@ void fixup_irqs(cpumask_t map
                 if (irq_desc[irq].status & IRQ_PER_CPU)
                         continue;
   
-               cpus_and(mask, irq_desc[irq].affinity, map);
+               cpumask_and(&mask, irq_desc[irq].affinity, &map);
                 if (any_online_cpu(mask) == NR_CPUS) {
                         printk("Breaking affinity for irq %i\n", irq);
                         mask = map;
diff --combined arch/x86/ia32/ia32entry.S

index 3c14ed07dc4e75f20dfc9fc86677a62551bf82c1,9c79b247700801e6467b5629e7ee757f09a44b7e..01e7c4c5c7fe3b8d9405543cb981d4df2c0d26f6
--- 1/arch/x86/ia32/ia32entry.S
--- 2/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@@ -112,8 -112,8 +112,8 @@@ ENTRY(ia32_sysenter_target
         CFI_DEF_CFA     rsp,0
         CFI_REGISTER    rsp,rbp
         SWAPGS_UNSAFE_STACK
-       movq    %gs:pda_kernelstack, %rsp
-       addq    $(PDA_STACKOFFSET),%rsp 
+       movq    PER_CPU_VAR(kernel_stack), %rsp
+       addq    $(KERNEL_STACK_OFFSET),%rsp
         /*
          * No need to follow this irqs on/off section: the syscall
          * disabled irqs, here we enable it straight after entry:
@@@ -273,13 -273,13 +273,13 @@@ ENDPROC(ia32_sysenter_target
   ENTRY(ia32_cstar_target)
         CFI_STARTPROC32 simple
         CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
+       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
         CFI_REGISTER    rip,rcx
         /*CFI_REGISTER  rflags,r11*/
         SWAPGS_UNSAFE_STACK
         movl    %esp,%r8d
         CFI_REGISTER    rsp,r8
-       movq    %gs:pda_kernelstack,%rsp
+       movq    PER_CPU_VAR(kernel_stack),%rsp
         /*
          * No need to follow this irqs on/off section: the syscall
          * disabled irqs and here we enable it straight after entry:
@@@ -823,8 -823,7 +823,8 @@@ ia32_sys_call_table
         .quad compat_sys_signalfd4
         .quad sys_eventfd2
         .quad sys_epoll_create1
- -      .quad sys_dup3                  /* 330 */
+ +      .quad sys_dup3                          /* 330 */
         .quad sys_pipe2
         .quad sys_inotify_init1
+ +      .quad sys_perf_counter_open
   ia32_syscall_end:
diff --combined arch/x86/include/asm/hardirq_32.h

index 7a07897a78887f59c6bbccbbdbe1f82669f37bba,d4b5d731073fb194f75de05f3878ad157b048b21..7838276bfe512f1d2d8a937d93f9fa05e6fca25b
--- 1/arch/x86/include/asm/hardirq_32.h
--- 2/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@@ -9,7 -9,6 +9,7 @@@ typedef struct 
         unsigned long idle_timestamp;
         unsigned int __nmi_count;       /* arch dependent */
         unsigned int apic_timer_irqs;   /* arch dependent */
+ +      unsigned int apic_perf_irqs;    /* arch dependent */
         unsigned int irq0_irqs;
         unsigned int irq_resched_count;
         unsigned int irq_call_count;
@@@ -20,6 -19,9 +20,9 @@@
   
   DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
   
+ /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
+ #define MAX_HARDIRQS_PER_CPU NR_VECTORS
+ 
   #define __ARCH_IRQ_STAT
   #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
   
diff --combined arch/x86/include/asm/hardirq_64.h

index b5a6b5d56704c89330d2f88f515c5d25281e105f,a65bab20f6ce5cbe737dd365407ddaff518e559c..42930b2792155014162ad82768bc9789d410dab6
--- 1/arch/x86/include/asm/hardirq_64.h
--- 2/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@@ -3,22 -3,36 +3,37 @@@
   
   #include <linux/threads.h>
   #include <linux/irq.h>
- #include <asm/pda.h>
   #include <asm/apic.h>
   
+ typedef struct {
+       unsigned int __softirq_pending;
+       unsigned int __nmi_count;       /* arch dependent */
+       unsigned int apic_timer_irqs;   /* arch dependent */
++      unsigned int apic_perf_irqs;    /* arch dependent */
+       unsigned int irq0_irqs;
+       unsigned int irq_resched_count;
+       unsigned int irq_call_count;
+       unsigned int irq_tlb_count;
+       unsigned int irq_thermal_count;
+       unsigned int irq_spurious_count;
+       unsigned int irq_threshold_count;
+ } ____cacheline_aligned irq_cpustat_t;
+ 
+ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+ 
   /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
   #define MAX_HARDIRQS_PER_CPU NR_VECTORS
   
   #define __ARCH_IRQ_STAT 1
   
- #define inc_irq_stat(member)  add_pda(member, 1)
+ #define inc_irq_stat(member)  percpu_add(irq_stat.member, 1)
   
- #define local_softirq_pending() read_pda(__softirq_pending)
+ #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
   
   #define __ARCH_SET_SOFTIRQ_PENDING 1
   
- #define set_softirq_pending(x) write_pda(__softirq_pending, (x))
- #define or_softirq_pending(x)  or_pda(__softirq_pending, (x))
+ #define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
+ #define or_softirq_pending(x)  percpu_or(irq_stat.__softirq_pending, (x))
   
   extern void ack_bad_irq(unsigned int irq);
   
diff --combined arch/x86/include/asm/irq_vectors.h

index 21a0b92027f5850455b45f467300aad42b887a2f,a16a2ab2b42998a964d20c990ba88365d07bdfe4..1554d0236e03c77cb4004f66dfdf92e5c0b76191
--- 1/arch/x86/include/asm/irq_vectors.h
--- 2/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@@ -86,11 -86,6 +86,11 @@@
    */
   #define LOCAL_TIMER_VECTOR    0xef
   
+ +/*
+ + * Performance monitoring interrupt vector:
+ + */
+ +#define LOCAL_PERF_VECTOR     0xee
+ +
   /*
    * First APIC vector available to drivers: (vectors 0x30-0xee) we
    * start at 0x31(0x41) to spread out vectors evenly between priority
@@@ -110,6 -105,8 +110,8 @@@
   
   #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
   
+ #include <asm/apicnum.h>      /* need MAX_IO_APICS */
+ 
   #ifndef CONFIG_SPARSE_IRQ
   # if NR_CPUS < MAX_IO_APICS
   #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
@@@ -117,11 -114,12 +119,12 @@@
   #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
   # endif
   #else
- # if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
- #  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
- # else
- #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
- # endif
+ 
+ # define NR_IRQS                                      \
+       ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ?          \
+               (NR_VECTORS + (8 * NR_CPUS)) :          \
+               (NR_VECTORS + (32 * MAX_IO_APICS)))     \
+ 
   #endif
   
   #elif defined(CONFIG_X86_VOYAGER)
diff --combined arch/x86/include/asm/thread_info.h

index efdf93820aedda3abe54b82f697740f363fd5ffc,b46f8ca007b5754ce1282809d6a7e8669104ed81..f38488989db7c2f473b531bc05b1fc46922ce51f
--- 1/arch/x86/include/asm/thread_info.h
--- 2/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@@ -82,7 -82,6 +82,7 @@@ struct thread_info 
   #define TIF_SYSCALL_AUDIT     7       /* syscall auditing active */
   #define TIF_SECCOMP           8       /* secure computing */
   #define TIF_MCE_NOTIFY                10      /* notify userspace of an MCE */
+ +#define TIF_PERF_COUNTERS     11      /* notify perf counter work */
   #define TIF_NOTSC             16      /* TSC is not accessible in userland */
   #define TIF_IA32              17      /* 32bit process */
   #define TIF_FORK              18      /* ret_from_fork */
@@@ -105,7 -104,6 +105,7 @@@
   #define _TIF_SYSCALL_AUDIT    (1 << TIF_SYSCALL_AUDIT)
   #define _TIF_SECCOMP          (1 << TIF_SECCOMP)
   #define _TIF_MCE_NOTIFY               (1 << TIF_MCE_NOTIFY)
+ +#define _TIF_PERF_COUNTERS    (1 << TIF_PERF_COUNTERS)
   #define _TIF_NOTSC            (1 << TIF_NOTSC)
   #define _TIF_IA32             (1 << TIF_IA32)
   #define _TIF_FORK             (1 << TIF_FORK)
@@@ -137,7 -135,7 +137,7 @@@
   
   /* Only used for 64 bit */
   #define _TIF_DO_NOTIFY_MASK                                           \
- -      (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+ +      (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME)
   
   /* flags to check in __switch_to() */
   #define _TIF_WORK_CTXSW                                                       \
@@@ -196,25 -194,21 +196,21 @@@ static inline struct thread_info *curre
   
   #else /* X86_32 */
   
- #include <asm/pda.h>
+ #include <asm/percpu.h>
+ #define KERNEL_STACK_OFFSET (5*8)
   
   /*
    * macros/functions for gaining access to the thread information structure
    * preempt_count needs to be 1 initially, until the scheduler is functional.
    */
   #ifndef __ASSEMBLY__
- static inline struct thread_info *current_thread_info(void)
- {
-       struct thread_info *ti;
-       ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
-       return ti;
- }
+ DECLARE_PER_CPU(unsigned long, kernel_stack);
   
- /* do not use in interrupt context */
- static inline struct thread_info *stack_thread_info(void)
+ static inline struct thread_info *current_thread_info(void)
   {
         struct thread_info *ti;
-       asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+       ti = (void *)(percpu_read(kernel_stack) +
+                     KERNEL_STACK_OFFSET - THREAD_SIZE);
         return ti;
   }
   
@@@ -222,8 -216,8 +218,8 @@@
   
   /* how to get the thread information struct from ASM */
   #define GET_THREAD_INFO(reg) \
-       movq %gs:pda_kernelstack,reg ; \
-       subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+       movq PER_CPU_VAR(kernel_stack),reg ; \
+       subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
   
   #endif
   
diff --combined arch/x86/kernel/apic.c

index d2d17b8d10f863f78defdef3f42aeb7f073fa4f8,4857879558346af6a31e519a07a88a0e97abd511..e9af14f748ea95a9ae2f7d374370ade92fd22b96
--- 1/arch/x86/kernel/apic.c
--- 2/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@@ -35,7 -35,6 +35,7 @@@
   #include <linux/nmi.h>
   #include <linux/timex.h>
   
+ +#include <asm/perf_counter.h>
   #include <asm/atomic.h>
   #include <asm/mtrr.h>
   #include <asm/mpspec.h>
@@@ -48,6 -47,7 +48,7 @@@
   #include <asm/proto.h>
   #include <asm/apic.h>
   #include <asm/i8259.h>
+ #include <asm/smp.h>
   
   #include <mach_apic.h>
   #include <mach_apicdef.h>
@@@ -895,6 -895,10 +896,10 @@@ void disable_local_APIC(void
   {
         unsigned int value;
   
+       /* APIC hasn't been mapped yet */
+       if (!apic_phys)
+               return;
+ 
         clear_local_APIC();
   
         /*
@@@ -1126,6 -1130,11 +1131,11 @@@ void __cpuinit setup_local_APIC(void
         unsigned int value;
         int i, j;
   
+       if (disable_apic) {
+               disable_ioapic_setup();
+               return;
+       }
+ 
   #ifdef CONFIG_X86_32
         /* Pound the ESR really hard over the head with a big hammer - mbligh */
         if (lapic_is_integrated() && esr_disable) {
@@@ -1135,7 -1144,6 +1145,7 @@@
                 apic_write(APIC_ESR, 0);
         }
   #endif
+ +      perf_counters_lapic_init(0);
   
         preempt_disable();
   
@@@ -1567,11 -1575,11 +1577,11 @@@ int apic_version[MAX_APICS]
   
   int __init APIC_init_uniprocessor(void)
   {
- #ifdef CONFIG_X86_64
         if (disable_apic) {
                 pr_info("Apic disabled\n");
                 return -1;
         }
+ #ifdef CONFIG_X86_64
         if (!cpu_has_apic) {
                 disable_apic = 1;
                 pr_info("Apic disabled by BIOS\n");
@@@ -1869,17 -1877,8 +1879,8 @@@ void __cpuinit generic_processor_info(i
   #endif
   
   #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-       /* are we being called early in kernel startup? */
-       if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-               u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-               u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
- 
-               cpu_to_apicid[cpu] = apicid;
-               bios_cpu_apicid[cpu] = apicid;
-       } else {
-               per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-               per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-       }
+       early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+       early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
   #endif
   
         set_cpu_possible(cpu, true);
diff --combined arch/x86/kernel/cpu/common.c

index 667e5d561ed77f39fadbc547421bef1f2832910b,7976a6a0f65c1c626c4fb983e7c45f7f6d84198d..95eb30e1e677d467c7185d7992789870469d2196
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -17,11 -17,12 +17,13 @@@
   #include <asm/mmu_context.h>
   #include <asm/mtrr.h>
   #include <asm/mce.h>
+ +#include <asm/perf_counter.h>
   #include <asm/pat.h>
   #include <asm/asm.h>
   #include <asm/numa.h>
   #include <asm/smp.h>
+ #include <asm/cpu.h>
+ #include <asm/cpumask.h>
   #ifdef CONFIG_X86_LOCAL_APIC
   #include <asm/mpspec.h>
   #include <asm/apic.h>
@@@ -773,7 -774,6 +775,7 @@@ void __init identify_boot_cpu(void
   #else
         vgetcpu_set_mode();
   #endif
+ +      init_hw_perf_counters();
   }
   
   void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@@ -879,54 -879,34 +881,34 @@@ static __init int setup_disablecpuid(ch
   __setup("clearcpuid=", setup_disablecpuid);
   
   #ifdef CONFIG_X86_64
- struct x8664_pda **_cpu_pda __read_mostly;
- EXPORT_SYMBOL(_cpu_pda);
- 
   struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
   
- static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+ DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+ #ifdef CONFIG_SMP
+ DEFINE_PER_CPU(char *, irq_stack_ptr);        /* will be set during per cpu init */
+ #else
+ DEFINE_PER_CPU(char *, irq_stack_ptr) =
+       per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+ #endif
+ 
+ DEFINE_PER_CPU(unsigned long, kernel_stack) =
+       (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+ EXPORT_PER_CPU_SYMBOL(kernel_stack);
+ 
+ DEFINE_PER_CPU(unsigned int, irq_count) = -1;
   
   void __cpuinit pda_init(int cpu)
   {
-       struct x8664_pda *pda = cpu_pda(cpu);
- 
         /* Setup up data that may be needed in __get_free_pages early */
         loadsegment(fs, 0);
         loadsegment(gs, 0);
-       /* Memory clobbers used to order PDA accessed */
-       mb();
-       wrmsrl(MSR_GS_BASE, pda);
-       mb();
- 
-       pda->cpunumber = cpu;
-       pda->irqcount = -1;
-       pda->kernelstack = (unsigned long)stack_thread_info() -
-                                PDA_STACKOFFSET + THREAD_SIZE;
-       pda->active_mm = &init_mm;
-       pda->mmu_state = 0;
- 
-       if (cpu == 0) {
-               /* others are initialized in smpboot.c */
-               pda->pcurrent = &init_task;
-               pda->irqstackptr = boot_cpu_stack;
-               pda->irqstackptr += IRQSTACKSIZE - 64;
-       } else {
-               if (!pda->irqstackptr) {
-                       pda->irqstackptr = (char *)
-                               __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-                       if (!pda->irqstackptr)
-                               panic("cannot allocate irqstack for cpu %d",
-                                     cpu);
-                       pda->irqstackptr += IRQSTACKSIZE - 64;
-               }
   
-               if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
-                       pda->nodenumber = cpu_to_node(cpu);
-       }
+       load_pda_offset(cpu);
   }
   
- static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-                                 DEBUG_STKSZ] __page_aligned_bss;
+ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+       __aligned(PAGE_SIZE);
   
   extern asmlinkage void ignore_sysret(void);
   
@@@ -984,15 -964,18 +966,18 @@@ void __cpuinit cpu_init(void
         struct tss_struct *t = &per_cpu(init_tss, cpu);
         struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
         unsigned long v;
-       char *estacks = NULL;
         struct task_struct *me;
         int i;
   
         /* CPU 0 is initialised in head64.c */
         if (cpu != 0)
                 pda_init(cpu);
-       else
-               estacks = boot_exception_stacks;
+ 
+ #ifdef CONFIG_NUMA
+       if (cpu != 0 && percpu_read(node_number) == 0 &&
+           cpu_to_node(cpu) != NUMA_NO_NODE)
+               percpu_write(node_number, cpu_to_node(cpu));
+ #endif
   
         me = current;
   
@@@ -1026,18 -1009,13 +1011,13 @@@
          * set up and load the per-CPU TSS
          */
         if (!orig_ist->ist[0]) {
-               static const unsigned int order[N_EXCEPTION_STACKS] = {
-                 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
-                 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+               static const unsigned int sizes[N_EXCEPTION_STACKS] = {
+                 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+                 [DEBUG_STACK - 1] = DEBUG_STKSZ
                 };
+               char *estacks = per_cpu(exception_stacks, cpu);
                 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-                       if (cpu) {
-                               estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
-                               if (!estacks)
-                                       panic("Cannot allocate exception "
-                                             "stack %ld %d\n", v, cpu);
-                       }
-                       estacks += PAGE_SIZE << order[v];
+                       estacks += sizes[v];
                         orig_ist->ist[v] = t->x86_tss.ist[v] =
                                         (unsigned long)estacks;
                 }
diff --combined arch/x86/kernel/entry_64.S

index 1954a96622036f3409d5381c4679d5260a111eaa,c52b60919163af41af58a7068fcba266fb0c657e..c092e7d2686d3015d655cf08beb06796e928ab01
--- 1/arch/x86/kernel/entry_64.S
--- 2/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -52,6 -52,7 +52,7 @@@
   #include <asm/irqflags.h>
   #include <asm/paravirt.h>
   #include <asm/ftrace.h>
+ #include <asm/percpu.h>
   
   /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
   #include <linux/elf-em.h>
@@@ -209,7 -210,7 +210,7 @@@ ENTRY(native_usergs_sysret64
   
         /* %rsp:at FRAMEEND */
         .macro FIXUP_TOP_OF_STACK tmp offset=0
-       movq %gs:pda_oldrsp,\tmp
+       movq PER_CPU_VAR(old_rsp),\tmp
         movq \tmp,RSP+\offset(%rsp)
         movq $__USER_DS,SS+\offset(%rsp)
         movq $__USER_CS,CS+\offset(%rsp)
@@@ -220,7 -221,7 +221,7 @@@
   
         .macro RESTORE_TOP_OF_STACK tmp offset=0
         movq RSP+\offset(%rsp),\tmp
-       movq \tmp,%gs:pda_oldrsp
+       movq \tmp,PER_CPU_VAR(old_rsp)
         movq EFLAGS+\offset(%rsp),\tmp
         movq \tmp,R11+\offset(%rsp)
         .endm
@@@ -336,15 -337,15 +337,15 @@@ ENTRY(save_args
         je 1f
         SWAPGS
         /*
-        * irqcount is used to check if a CPU is already on an interrupt stack
+        * irq_count is used to check if a CPU is already on an interrupt stack
          * or not. While this is essentially redundant with preempt_count it is
          * a little cheaper to use a separate counter in the PDA (short of
          * moving irq_enter into assembly, which would be too much work)
          */
- 1:    incl %gs:pda_irqcount
+ 1:    incl PER_CPU_VAR(irq_count)
         jne 2f
         popq_cfi %rax                   /* move return address... */
-       mov %gs:pda_irqstackptr,%rsp
+       mov PER_CPU_VAR(irq_stack_ptr),%rsp
         EMPTY_FRAME 0
         pushq_cfi %rax                  /* ... to the new stack */
         /*
@@@ -467,7 -468,7 +468,7 @@@ END(ret_from_fork
   ENTRY(system_call)
         CFI_STARTPROC   simple
         CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
+       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
         CFI_REGISTER    rip,rcx
         /*CFI_REGISTER  rflags,r11*/
         SWAPGS_UNSAFE_STACK
@@@ -478,8 -479,8 +479,8 @@@
          */
   ENTRY(system_call_after_swapgs)
   
-       movq    %rsp,%gs:pda_oldrsp
-       movq    %gs:pda_kernelstack,%rsp
+       movq    %rsp,PER_CPU_VAR(old_rsp)
+       movq    PER_CPU_VAR(kernel_stack),%rsp
         /*
          * No need to follow this irqs off/on section - it's straight
          * and short:
@@@ -522,7 -523,7 +523,7 @@@ sysret_check
         CFI_REGISTER    rip,rcx
         RESTORE_ARGS 0,-ARG_SKIP,1
         /*CFI_REGISTER  rflags,r11*/
-       movq    %gs:pda_oldrsp, %rsp
+       movq    PER_CPU_VAR(old_rsp), %rsp
         USERGS_SYSRET64
   
         CFI_RESTORE_STATE
@@@ -832,11 -833,11 +833,11 @@@ common_interrupt
         XCPT_FRAME
         addq $-0x80,(%rsp)              /* Adjust vector to [-256,-1] range */
         interrupt do_IRQ
-       /* 0(%rsp): oldrsp-ARGOFFSET */
+       /* 0(%rsp): old_rsp-ARGOFFSET */
   ret_from_intr:
         DISABLE_INTERRUPTS(CLBR_NONE)
         TRACE_IRQS_OFF
-       decl %gs:pda_irqcount
+       decl PER_CPU_VAR(irq_count)
         leaveq
         CFI_DEF_CFA_REGISTER    rsp
         CFI_ADJUST_CFA_OFFSET   -8
@@@ -1024,11 -1025,6 +1025,11 @@@ apicinterrupt ERROR_APIC_VECTOR 
   apicinterrupt SPURIOUS_APIC_VECTOR \
         spurious_interrupt smp_spurious_interrupt
   
+ +#ifdef CONFIG_PERF_COUNTERS
+ +apicinterrupt LOCAL_PERF_VECTOR \
+ +      perf_counter_interrupt smp_perf_counter_interrupt
+ +#endif
+ +
   /*
    * Exception entry points.
    */
@@@ -1077,10 -1073,10 +1078,10 @@@ ENTRY(\sym
         TRACE_IRQS_OFF
         movq %rsp,%rdi          /* pt_regs pointer */
         xorl %esi,%esi          /* no error code */
-       movq %gs:pda_data_offset, %rbp
-       subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       PER_CPU(init_tss, %rbp)
+       subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
         call \do_sym
-       addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
         jmp paranoid_exit       /* %ebx: no swapgs flag */
         CFI_ENDPROC
   END(\sym)
@@@ -1264,14 -1260,14 +1265,14 @@@ ENTRY(call_softirq
         CFI_REL_OFFSET rbp,0
         mov  %rsp,%rbp
         CFI_DEF_CFA_REGISTER rbp
-       incl %gs:pda_irqcount
-       cmove %gs:pda_irqstackptr,%rsp
+       incl PER_CPU_VAR(irq_count)
+       cmove PER_CPU_VAR(irq_stack_ptr),%rsp
         push  %rbp                      # backlink for old unwinder
         call __do_softirq
         leaveq
         CFI_DEF_CFA_REGISTER    rsp
         CFI_ADJUST_CFA_OFFSET   -8
-       decl %gs:pda_irqcount
+       decl PER_CPU_VAR(irq_count)
         ret
         CFI_ENDPROC
   END(call_softirq)
@@@ -1301,15 -1297,15 +1302,15 @@@ ENTRY(xen_do_hypervisor_callback)   # d
         movq %rdi, %rsp            # we don't return, adjust the stack frame
         CFI_ENDPROC
         DEFAULT_FRAME
- 11:   incl %gs:pda_irqcount
+ 11:   incl PER_CPU_VAR(irq_count)
         movq %rsp,%rbp
         CFI_DEF_CFA_REGISTER rbp
-       cmovzq %gs:pda_irqstackptr,%rsp
+       cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
         pushq %rbp                      # backlink for old unwinder
         call xen_evtchn_do_upcall
         popq %rsp
         CFI_DEF_CFA_REGISTER rsp
-       decl %gs:pda_irqcount
+       decl PER_CPU_VAR(irq_count)
         jmp  error_exit
         CFI_ENDPROC
   END(do_hypervisor_callback)
diff --combined arch/x86/kernel/irq.c

index 22f650db917fc2615f34741b11035bb74a6d5656,8b30d0c2512cefa30ac7492039d3b014a8817699..a6bca1d33a8aca4e701dc8b9ad7e4d6ffda1d2ef
--- 1/arch/x86/kernel/irq.c
--- 2/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@@ -36,11 -36,7 +36,7 @@@ void ack_bad_irq(unsigned int irq
   #endif
   }
   
- #ifdef CONFIG_X86_32
- # define irq_stats(x)         (&per_cpu(irq_stat, x))
- #else
- # define irq_stats(x)         cpu_pda(x)
- #endif
+ #define irq_stats(x)          (&per_cpu(irq_stat, x))
   /*
    * /proc/interrupts printing:
    */
@@@ -57,10 -53,6 +53,10 @@@ static int show_other_interrupts(struc
         for_each_online_cpu(j)
                 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
         seq_printf(p, "  Local timer interrupts\n");
+ +      seq_printf(p, "CNT: ");
+ +      for_each_online_cpu(j)
+ +              seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+ +      seq_printf(p, "  Performance counter interrupts\n");
   #endif
   #ifdef CONFIG_SMP
         seq_printf(p, "RES: ");
@@@ -168,7 -160,6 +164,7 @@@ u64 arch_irq_stat_cpu(unsigned int cpu
   
   #ifdef CONFIG_X86_LOCAL_APIC
         sum += irq_stats(cpu)->apic_timer_irqs;
+ +      sum += irq_stats(cpu)->apic_perf_irqs;
   #endif
   #ifdef CONFIG_SMP
         sum += irq_stats(cpu)->irq_resched_count;
diff --combined kernel/Makefile

index 8b2628c7914b039a90be2647b1b8926751d2dc7a,2aebc4cd787810a5e71ca4ea4aa35301220a6355..e4115926c536af5548e2e9bc6a6c30f945776b71
--- 1/kernel/Makefile
--- 2/kernel/Makefile
+++ b/kernel/Makefile
@@@ -40,7 -40,11 +40,11 @@@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.
   obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
   obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
   obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
- obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+ ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y)
+ obj-y += smp.o
+ else
+ obj-y += up.o
+ endif
   obj-$(CONFIG_SMP) += spinlock.o
   obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
   obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
@@@ -89,7 -93,6 +93,7 @@@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT
   obj-$(CONFIG_FUNCTION_TRACER) += trace/
   obj-$(CONFIG_TRACING) += trace/
   obj-$(CONFIG_SMP) += sched_cpupri.o
+ +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
   
   ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
   # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --combined kernel/sched.c

index 43fd21233b93bb1c350673621f64c1508217fb20,8be2c13b50d018cc69280829e14e31612d4deddd..ce9fecab5f0201c054ed3a6b477fc0745025f1db
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -125,6 -125,9 +125,9 @@@ DEFINE_TRACE(sched_switch)
   DEFINE_TRACE(sched_migrate_task);
   
   #ifdef CONFIG_SMP
+ 
+ static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+ 
   /*
    * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
    * Since cpu_power is a 'constant', we can use a reciprocal divide.
@@@ -665,7 -668,7 +668,7 @@@ static inline int cpu_of(struct rq *rq
   #define task_rq(p)            cpu_rq(task_cpu(p))
   #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
   
- -static inline void update_rq_clock(struct rq *rq)
+ +inline void update_rq_clock(struct rq *rq)
   {
         rq->clock = sched_clock_cpu(cpu_of(rq));
   }
@@@ -976,26 -979,6 +979,26 @@@ static struct rq *task_rq_lock(struct t
         }
   }
   
+ +void curr_rq_lock_irq_save(unsigned long *flags)
+ +      __acquires(rq->lock)
+ +{
+ +      struct rq *rq;
+ +
+ +      local_irq_save(*flags);
+ +      rq = cpu_rq(smp_processor_id());
+ +      spin_lock(&rq->lock);
+ +}
+ +
+ +void curr_rq_unlock_irq_restore(unsigned long *flags)
+ +      __releases(rq->lock)
+ +{
+ +      struct rq *rq;
+ +
+ +      rq = cpu_rq(smp_processor_id());
+ +      spin_unlock(&rq->lock);
+ +      local_irq_restore(*flags);
+ +}
+ +
   void task_rq_unlock_wait(struct task_struct *p)
   {
         struct rq *rq = task_rq(p);
@@@ -1902,14 -1885,12 +1905,14 @@@ void set_task_cpu(struct task_struct *p
                 p->se.sleep_start -= clock_offset;
         if (p->se.block_start)
                 p->se.block_start -= clock_offset;
+ +#endif
         if (old_cpu != new_cpu) {
- -              schedstat_inc(p, se.nr_migrations);
+ +              p->se.nr_migrations++;
+ +#ifdef CONFIG_SCHEDSTATS
                 if (task_hot(p, old_rq->clock, NULL))
                         schedstat_inc(p, se.nr_forced2_migrations);
- -      }
   #endif
+ +      }
         p->se.vruntime -= old_cfsrq->min_vruntime -
                                          new_cfsrq->min_vruntime;
   
@@@ -2261,27 -2242,6 +2264,27 @@@ static int sched_balance_self(int cpu, 
   
   #endif /* CONFIG_SMP */
   
+ +/**
+ + * task_oncpu_function_call - call a function on the cpu on which a task runs
+ + * @p:                the task to evaluate
+ + * @func:     the function to be called
+ + * @info:     the function call argument
+ + *
+ + * Calls the function @func when the task is currently running. This might
+ + * be on the current CPU, which just calls the function directly
+ + */
+ +void task_oncpu_function_call(struct task_struct *p,
+ +                            void (*func) (void *info), void *info)
+ +{
+ +      int cpu;
+ +
+ +      preempt_disable();
+ +      cpu = task_cpu(p);
+ +      if (task_curr(p))
+ +              smp_call_function_single(cpu, func, info, 1);
+ +      preempt_enable();
+ +}
+ +
   /***
    * try_to_wake_up - wake up a thread
    * @p: the to-be-woken-up thread
@@@ -2424,7 -2384,6 +2427,7 @@@ static void __sched_fork(struct task_st
         p->se.exec_start                = 0;
         p->se.sum_exec_runtime          = 0;
         p->se.prev_sum_exec_runtime     = 0;
+ +      p->se.nr_migrations             = 0;
         p->se.last_wakeup               = 0;
         p->se.avg_overlap               = 0;
   
@@@ -2645,7 -2604,6 +2648,7 @@@ static void finish_task_switch(struct r
          */
         prev_state = prev->state;
         finish_arch_switch(prev);
+ +      perf_counter_task_sched_in(current, cpu_of(rq));
         finish_lock_switch(rq, prev);
   #ifdef CONFIG_SMP
         if (current->sched_class->post_schedule)
@@@ -4170,29 -4128,6 +4173,29 @@@ DEFINE_PER_CPU(struct kernel_stat, ksta
   
   EXPORT_PER_CPU_SYMBOL(kstat);
   
+ +/*
+ + * Return any ns on the sched_clock that have not yet been banked in
+ + * @p in case that task is currently running.
+ + */
+ +unsigned long long __task_delta_exec(struct task_struct *p, int update)
+ +{
+ +      s64 delta_exec;
+ +      struct rq *rq;
+ +
+ +      rq = task_rq(p);
+ +      WARN_ON_ONCE(!runqueue_is_locked());
+ +      WARN_ON_ONCE(!task_current(rq, p));
+ +
+ +      if (update)
+ +              update_rq_clock(rq);
+ +
+ +      delta_exec = rq->clock - p->se.exec_start;
+ +
+ +      WARN_ON_ONCE(delta_exec < 0);
+ +
+ +      return delta_exec;
+ +}
+ +
   /*
    * Return any ns on the sched_clock that have not yet been banked in
    * @p in case that task is currently running.
@@@ -4456,7 -4391,6 +4459,7 @@@ void scheduler_tick(void
         update_rq_clock(rq);
         update_cpu_load(rq);
         curr->sched_class->task_tick(rq, curr, 0);
+ +      perf_counter_task_tick(curr, cpu);
         spin_unlock(&rq->lock);
   
   #ifdef CONFIG_SMP
@@@ -4652,7 -4586,6 +4655,7 @@@ need_resched_nonpreemptible
   
         if (likely(prev != next)) {
                 sched_info_switch(prev, next);
+ +              perf_counter_task_sched_out(prev, cpu);
   
                 rq->nr_switches++;
                 rq->curr = next;
@@@ -7352,10 -7285,10 +7355,10 @@@ cpu_to_phys_group(int cpu, const struc
    * groups, so roll our own. Now each node has its own list of groups which
    * gets dynamically allocated.
    */
- static DEFINE_PER_CPU(struct sched_domain, node_domains);
+ static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
   static struct sched_group ***sched_group_nodes_bycpu;
   
- static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+ static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
   static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
   
   static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@@ -7630,7 -7563,7 +7633,7 @@@ static int __build_sched_domains(const 
   #ifdef CONFIG_NUMA
                 if (cpumask_weight(cpu_map) >
                                 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
-                       sd = &per_cpu(allnodes_domains, i);
+                       sd = &per_cpu(allnodes_domains, i).sd;
                         SD_INIT(sd, ALLNODES);
                         set_domain_attribute(sd, attr);
                         cpumask_copy(sched_domain_span(sd), cpu_map);
@@@ -7640,7 -7573,7 +7643,7 @@@
                 } else
                         p = NULL;
   
-               sd = &per_cpu(node_domains, i);
+               sd = &per_cpu(node_domains, i).sd;
                 SD_INIT(sd, NODE);
                 set_domain_attribute(sd, attr);
                 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@@ -7758,7 -7691,7 +7761,7 @@@
                 for_each_cpu(j, nodemask) {
                         struct sched_domain *sd;
   
-                       sd = &per_cpu(node_domains, j);
+                       sd = &per_cpu(node_domains, j).sd;
                         sd->groups = sg;
                 }
                 sg->__cpu_power = 0;
author	Ingo Molnar <mingo@elte.hu>
	Sun, 18 Jan 2009 17:15:49 +0000 (18:15 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 18 Jan 2009 17:15:49 +0000 (18:15 +0100)
		1	2
arch/powerpc/kernel/irq.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/ia32/ia32entry.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/hardirq_32.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/hardirq_64.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/irq_vectors.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/thread_info.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irq.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history