cputime: Generic on-demand virtual cputime accounting
authorFrederic Weisbecker <fweisbec@gmail.com>
Wed, 25 Jul 2012 05:56:04 +0000 (07:56 +0200)
committerFrederic Weisbecker <fweisbec@gmail.com>
Sun, 27 Jan 2013 18:23:27 +0000 (19:23 +0100)
If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.

Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.

However implementing CONFIG_VIRT_CPU_ACCOUNTING require
low level hooks and involves more overhead. But we already
have a generic context tracking subsystem that is required
for RCU needs by archs which plan to shut down the tick
outside idle.

This patch implements a generic virtual based cputime
accounting that relies on these generic kernel/user hooks.

There are some upsides of doing this:

- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if context tracking is already built (already necessary for RCU in full
tickless mode).

- We can rely on the generic context tracking subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.

And one downside:

- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
27 files changed:
arch/ia64/include/asm/cputime.h
arch/ia64/include/asm/thread_info.h
arch/ia64/include/asm/xen/minstate.h
arch/ia64/kernel/asm-offsets.c
arch/ia64/kernel/entry.S
arch/ia64/kernel/fsys.S
arch/ia64/kernel/head.S
arch/ia64/kernel/ivt.S
arch/ia64/kernel/minstate.h
arch/ia64/kernel/time.c
arch/powerpc/configs/chroma_defconfig
arch/powerpc/configs/corenet64_smp_defconfig
arch/powerpc/configs/pasemi_defconfig
arch/powerpc/include/asm/cputime.h
arch/powerpc/include/asm/lppaca.h
arch/powerpc/include/asm/ppc_asm.h
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/time.c
arch/powerpc/platforms/pseries/dtl.c
arch/powerpc/platforms/pseries/setup.c
include/asm-generic/cputime.h
include/asm-generic/cputime_nsecs.h
include/linux/kernel_stat.h
include/linux/vtime.h
init/Kconfig
kernel/context_tracking.c
kernel/sched/cputime.c

index 040b3163800123763abd1e0c1bb529518bbb83cb..e2d3f5baf265408b49a201e8fa07e3b3b24fe939 100644 (file)
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
  * Otherwise we measure cpu time in jiffies using the generic definitions.
  */
 
 #ifndef __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 # include <asm-generic/cputime.h>
 #else
 # include <asm/processor.h>
 # include <asm-generic/cputime_nsecs.h>
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #endif /* __IA64_CPUTIME_H */
index ff2ae41365840a616b4a7b6211aed1b2678c6e0a..020d655ed082bf9aabbf31a9091468e9045acb0d 100644 (file)
@@ -31,7 +31,7 @@ struct thread_info {
        mm_segment_t addr_limit;        /* user-level address space limit */
        int preempt_count;              /* 0=premptable, <0=BUG; will also serve as bh-counter */
        struct restart_block restart_block;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        __u64 ac_stamp;
        __u64 ac_leave;
        __u64 ac_stime;
@@ -69,7 +69,7 @@ struct thread_info {
 #define task_stack_page(tsk)   ((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define setup_thread_stack(p, org)                     \
        *task_thread_info(p) = *task_thread_info(org);  \
        task_thread_info(p)->ac_stime = 0;              \
index c57fa910f2c937e7473a9ea63e5ac9a7be74c86c..00cf03e0cb8295c8f489107cc025bf6fd377b108 100644 (file)
@@ -1,5 +1,5 @@
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define XEN_ACCOUNT_GET_STAMP          \
        MOV_FROM_ITC(pUStk, p6, r20, r2);
index a48bd9a9927bb3b42c7b2076bb0c4b359d83a2e5..46c9e3007315dedc0ca9c7aa38f8a1e522b3ca58 100644 (file)
@@ -41,7 +41,7 @@ void foo(void)
        DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
        DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
        DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
        DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
        DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
index 6bfd8429ee0f4a6aedd72aa051030928f72085fa..7a53530f22c219eb877d81d2392a85722e9503dc 100644 (file)
@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
 #endif
 .global __paravirt_work_processed_syscall;
 __paravirt_work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        adds r2=PT(LOADRS)+16,r12
        MOV_FROM_ITC(pUStk, p9, r22, r19)       // fetch time at leave
        adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
 
        ld8 r29=[r2],16         // M0|1 load cr.ipsr
        ld8 r28=[r3],16         // M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
        ;;
        ld8 r30=[r2],16         // M0|1 load cr.ifs
@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
        ld8.fill r1=[r3],16                     // M0|1 load r1
 (pUStk) mov r17=1                              // A
        ;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) st1 [r15]=r17                          // M2|3
 #else
 (pUStk) st1 [r14]=r17                          // M2|3
@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
        shr.u r18=r19,16                // I0|1 get byte size of existing "dirty" partition
        COVER                           // B    add current frame into dirty partition & set cr.ifs
        ;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        mov r19=ar.bsp                  // M2   get new backing store pointer
        st8 [r14]=r22                   // M    save time at leave
        mov f10=f0                      // F    clear f10
@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
        adds r16=PT(CR_IPSR)+16,r12
        adds r17=PT(CR_IIP)+16,r12
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        .pred.rel.mutex pUStk,pKStk
        MOV_FROM_PSR(pKStk, r22, r29)   // M2 read PSR now that interrupts are disabled
        MOV_FROM_ITC(pUStk, p9, r22, r29)       // M  fetch time at leave
@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
        ;;
        ld8.fill r12=[r16],16
        ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)        adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
 #else
 (pUStk)        adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
        ;;
        ld8 r20=[r16],16        // ar.fpsr
        ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)        adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18  // deferred
 #endif
        ;;
@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
        ld8.fill r2=[r17]
 (pUStk)        mov r17=1
        ;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        //  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
        //  mib  :  mov add br        ->  mib  : ld8 add br
        //  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
index e662f178b990ab660526154dfa505c5452d57bf6..c4cd45d97749bf610f55b8be24a04ba5b7cec107 100644 (file)
@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
        nop.i 0
        ;;
        mov ar.rsc=0                            // M2   set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        MOV_FROM_ITC(p0, p6, r30, r23)          // M    get cycle for accounting
 #else
        nop.m 0
@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
        cmp.ne pKStk,pUStk=r0,r0                // A    set pKStk <- 0, pUStk <- 1
        br.call.sptk.many b7=ia64_syscall_setup // B
        ;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        // mov.m r30=ar.itc is called in advance
        add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
        add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
index 4738ff7bd66a28e35b260ea3706058fbf56adbda..9be4e497f3d3c253aa36c87459cdb4830a9e4fa7 100644 (file)
@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
 sched_clock = ia64_native_sched_clock
 #endif
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 GLOBAL_ENTRY(cycle_to_cputime)
        alloc r16=ar.pfs,1,0,0,0
        addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
        shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
        br.ret.sptk.many rp
 END(cycle_to_cputime)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_IA64_BRL_EMU
 
index fa25689fc453b36e1cb455782af442bdbf8017ee..689ffcaa284e4ddbdbe1503b12d0713c7cb5c017 100644 (file)
@@ -784,7 +784,7 @@ ENTRY(break_fault)
 
 (p8)   adds r28=16,r28                         // A    switch cr.iip to next bundle
 (p9)   adds r8=1,r8                            // A    increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        ;;
        mov b6=r30                              // I0   setup syscall handler branch reg early
 #else
@@ -801,7 +801,7 @@ ENTRY(break_fault)
        //
 ///////////////////////////////////////////////////////////////////////
        st1 [r16]=r0                            // M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        MOV_FROM_ITC(p0, p14, r30, r18)         // M    get cycle for accounting
 #else
        mov b6=r30                              // I0   setup syscall handler branch reg early
@@ -817,7 +817,7 @@ ENTRY(break_fault)
        cmp.eq p14,p0=r9,r0                     // A    are syscalls being traced/audited?
        br.call.sptk.many b7=ia64_syscall_setup // B
 1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        // mov.m r30=ar.itc is called in advance, and r13 is current
        add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13  // A
        add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13  // A
@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
        DBG_FAULT(16)
        FAULT(16)
 
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
        /*
         * There is no particular reason for this code to be here, other than
         * that there happens to be space here that would go unused otherwise.
index d56753a11636b723b402a32df60fc762af3c5e38..cc82a7d744c985ce18b0a70512cd54234d71aefa 100644 (file)
@@ -4,7 +4,7 @@
 #include "entry.h"
 #include "paravirt_inst.h"
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define ACCOUNT_GET_STAMP                              \
 (pUStk) mov.m r20=ar.itc;
index 88a794536bc01b9e55ec0b41aa00b37c55653eea..a3a3f5a1cb3a0c6f796b08838d305084d31b4f34 100644 (file)
@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 
 #include <linux/kernel_stat.h>
 
@@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk)
        account_idle_time(vtime_delta(tsk));
 }
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
index 29bb11ec6c640677a73c3e16058897789f1fba2f..4f35fc4623856888f441489e3a43abc6750ccd50 100644 (file)
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=256
 CONFIG_EXPERIMENTAL=y
index 88fa5c46f66f5481e2d62dfc0e4990fff4f782f3..f7df8362911fc80644b03badd8c639538ff59afd 100644 (file)
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
index 840a2c2d043085434b715dfffa3d4399e70f383b..bcedeea0df8934424b94fc30d7c567f0a0b611ca 100644 (file)
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_ALTIVEC=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
index 483733bd06d4e9bda0689c4fba66849e2471b7c2..607559ab271ff98b45de1f11964416bebaf32841 100644 (file)
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
  * the same units as the timebase.  Otherwise we measure cpu time
  * in jiffies using the generic definitions.
  */
@@ -16,7 +16,7 @@
 #ifndef __POWERPC_CPUTIME_H
 #define __POWERPC_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #include <asm-generic/cputime.h>
 #ifdef __KERNEL__
 static inline void setup_cputime_one_jiffy(void) { }
@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 
 #endif /* __KERNEL__ */
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #endif /* __POWERPC_CPUTIME_H */
index 531fe0c3108f8e157b8a255cb6a2ae8fce142059..b1e7f2af1016c82e6584731a0d0112b80a5eb277 100644 (file)
@@ -145,7 +145,7 @@ struct dtl_entry {
 extern struct kmem_cache *dtl_cache;
 
 /*
- * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
  * DTL entries, it can set this pointer to a function that will get
  * called once for each DTL entry that gets processed.
index ea2a86e8ff95bf731a399850cf2c8934b3fa8a19..2d0e1f5d83394a60544cc3a3fa003973c868596b 100644 (file)
@@ -24,7 +24,7 @@
  * user_time and system_time fields in the paca.
  */
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
 #define ACCOUNT_STOLEN_TIME
@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 
 #endif /* CONFIG_PPC_SPLPAR */
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 /*
  * Macros for storing registers into and loading registers from
index b310a0573625dec8a672c267ab6158f8e90b37a6..a0ca42fb1541f49c61965a2cfdaf3f73b9849eb1 100644 (file)
@@ -94,7 +94,7 @@ system_call_common:
        addi    r9,r1,STACK_FRAME_OVERHEAD
        ld      r11,exception_marker@toc(r2)
        std     r11,-16(r9)             /* "regshere" marker */
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
 BEGIN_FW_FTR_SECTION
        beq     33f
        /* if from user, see if there are any DTL entries to process */
@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
        addi    r9,r1,STACK_FRAME_OVERHEAD
 33:
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
 
        /*
         * A syscall should always be called with interrupts enabled
index 6f6b1cccc91662037115e69dc072ac1f7a376be5..22c9b67f9983d4ef103bd6fb29ef83498ee4403a 100644 (file)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Factors for converting from cputime_t (timebase ticks) to
  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk)
        account_user_time(tsk, utime, utimescaled);
 }
 
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #define calc_cputime_factors()
 #endif
 
index a7648543c59e05435eac37c5099eeda84db9584d..0cc0ac07a55dc661e0e0ad3acceaa55d33818800 100644 (file)
@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
  */
 static int dtl_buf_entries = N_DISPATCH_LOG;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 struct dtl_ring {
        u64     write_index;
        struct dtl_entry *write_ptr;
@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
        return per_cpu(dtl_rings, dtl->cpu).write_index;
 }
 
-#else /* CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int dtl_start(struct dtl *dtl)
 {
@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 {
        return lppaca_of(dtl->cpu).dtl_idx;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int dtl_enable(struct dtl *dtl)
 {
index ca55882465d6b0d027ef785f941c64a342985f99..527e12c9573be178757b8eb7920b94654224e589 100644 (file)
@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
 
 struct kmem_cache *dtl_cache;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Allocate space for the dispatch trace log for all possible cpus
  * and register the buffers with the hypervisor.  This is used for
@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
 
        return 0;
 }
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 static inline int alloc_dispatch_logs(void)
 {
        return 0;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int alloc_dispatch_log_kmem_cache(void)
 {
index c6eddf50eaf9f8948857b49a61c2c0d9d2372a1c..51969436b8b83b21730d25a5cef434ac6db3ef06 100644 (file)
@@ -4,6 +4,12 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 
-#include <asm-generic/cputime_jiffies.h>
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+# include <asm-generic/cputime_jiffies.h>
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# include <asm-generic/cputime_nsecs.h>
+#endif
 
 #endif
index c73d182f4751c585396b2844d2273a2ef4e0afce..b6485cafb7bdccef586116e800216c75dd7b529b 100644 (file)
@@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t;
  */
 #define cputime_to_jiffies(__ct)       \
        ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define cputime_to_scaled(__ct)                (__ct)
 #define jiffies_to_cputime(__jif)      \
        (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
 #define cputime64_to_jiffies64(__ct)   \
@@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t;
 #define jiffies64_to_cputime64(__jif)  \
        (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
+
+/*
+ * Convert cputime <-> nanoseconds
+ */
+#define nsecs_to_cputime(__nsecs)      ((__force u64)(__nsecs))
+
+
 /*
  * Convert cputime <-> microseconds
  */
index 66b70780e910dfb846241bc3b70ce1a97aeeee47..ed5f6ed6eb772797ea1c7eb0e98e46f55dbcf027 100644 (file)
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
 {
        vtime_account_user(tsk);
index ae30ab58431ab7fd30971fbee3f655a3c8aeb2b1..21ef703d1b25c1856712c4d02996741a117b2185 100644 (file)
@@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk);
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
+static inline void vtime_user_enter(struct task_struct *tsk)
+{
+       vtime_account_system(tsk);
+}
+static inline void vtime_user_exit(struct task_struct *tsk)
+{
+       vtime_account_user(tsk);
+}
+#else
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+#endif
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 extern void irqtime_account_irq(struct task_struct *tsk);
 #else
index be8b7f55312d1f0ea3cbb64c342eb5b456ea34a1..a05f843e7e5261eb957d0cc3caefad69c2b5258f 100644 (file)
@@ -326,6 +326,9 @@ source "kernel/time/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
+config VIRT_CPU_ACCOUNTING
+       bool
+
 choice
        prompt "Cputime accounting"
        default TICK_CPU_ACCOUNTING if !PPC64
@@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING
 
          If unsure, say Y.
 
-config VIRT_CPU_ACCOUNTING
+config VIRT_CPU_ACCOUNTING_NATIVE
        bool "Deterministic task and CPU time accounting"
        depends on HAVE_VIRT_CPU_ACCOUNTING
+       select VIRT_CPU_ACCOUNTING
        help
          Select this option to enable more accurate task and CPU time
          accounting.  This is done by reading a CPU counter on each
@@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING
          this also enables accounting of stolen time on logically-partitioned
          systems.
 
+config VIRT_CPU_ACCOUNTING_GEN
+       bool "Full dynticks CPU time accounting"
+       depends on HAVE_CONTEXT_TRACKING && 64BIT
+       select VIRT_CPU_ACCOUNTING
+       select CONTEXT_TRACKING
+       help
+         Select this option to enable task and CPU time accounting on full
+         dynticks systems. This accounting is implemented by watching every
+         kernel-user boundaries using the context tracking subsystem.
+         The accounting is thus performed at the expense of some significant
+         overhead.
+
+         For now this is only useful if you are working on the full
+         dynticks subsystem development.
+
+         If unsure, say N.
+
 config IRQ_TIME_ACCOUNTING
        bool "Fine granularity task level IRQ time accounting"
        depends on HAVE_IRQ_TIME_ACCOUNTING
index 54f471e536dca2ed3e682c900cba3fdca555cab8..9002e92e6372c830fdbc9f801318bba0f873990d 100644 (file)
@@ -30,8 +30,9 @@ void user_enter(void)
        local_irq_save(flags);
        if (__this_cpu_read(context_tracking.active) &&
            __this_cpu_read(context_tracking.state) != IN_USER) {
-               __this_cpu_write(context_tracking.state, IN_USER);
+               vtime_user_enter(current);
                rcu_user_enter();
+               __this_cpu_write(context_tracking.state, IN_USER);
        }
        local_irq_restore(flags);
 }
@@ -53,8 +54,9 @@ void user_exit(void)
 
        local_irq_save(flags);
        if (__this_cpu_read(context_tracking.state) == IN_USER) {
-               __this_cpu_write(context_tracking.state, IN_KERNEL);
                rcu_user_exit();
+               vtime_user_exit(current);
+               __this_cpu_write(context_tracking.state, IN_KERNEL);
        }
        local_irq_restore(flags);
 }
index 5849448b981e743a28cdc8c5c69c8dab432be585..1c964eced92cbd29085bf1f1a00a3b016e45de90 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
+#include <linux/context_tracking.h>
 #include "sched.h"
 
 
@@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev)
        else
                vtime_account_system(prev);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        vtime_account_user(prev);
+#endif
        arch_vtime_task_switch(prev);
 }
 #endif
@@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev)
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 void vtime_account(struct task_struct *tsk)
 {
-       if (in_interrupt() || !is_idle_task(tsk))
-               vtime_account_system(tsk);
-       else
-               vtime_account_idle(tsk);
+       if (!in_interrupt()) {
+               /*
+                * If we interrupted user, context_tracking_in_user()
+                * is 1 because the context tracking don't hook
+                * on irq entry/exit. This way we know if
+                * we need to flush user time on kernel entry.
+                */
+               if (context_tracking_in_user()) {
+                       vtime_account_user(tsk);
+                       return;
+               }
+
+               if (is_idle_task(tsk)) {
+                       vtime_account_idle(tsk);
+                       return;
+               }
+       }
+       vtime_account_system(tsk);
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
        cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 #endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static DEFINE_PER_CPU(unsigned long long, cputime_snap);
+
+static cputime_t get_vtime_delta(void)
+{
+       unsigned long long delta;
+
+       delta = sched_clock() - __this_cpu_read(cputime_snap);
+       __this_cpu_add(cputime_snap, delta);
+
+       /* CHECKME: always safe to convert nsecs to cputime? */
+       return nsecs_to_cputime(delta);
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+       cputime_t delta_cpu = get_vtime_delta();
+
+       account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_user(struct task_struct *tsk)
+{
+       cputime_t delta_cpu = get_vtime_delta();
+
+       account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+       cputime_t delta_cpu = get_vtime_delta();
+
+       account_idle_time(delta_cpu);
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */