F: drivers/idle/intel_idle.c
INTEL PSTATE DRIVER
-M: Kristen Carlson Accardi <kristen@linux.intel.com>
+M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+M: Len Brown <lenb@kernel.org>
L: linux-pm@vger.kernel.org
S: Supported
F: drivers/cpufreq/intel_pstate.c
select CPU_PM if (SUSPEND || CPU_IDLE)
select DCACHE_WORD_ACCESS
select EDAC_SUPPORT
+ select FRAME_POINTER
select GENERIC_ALLOCATOR
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST
source "lib/Kconfig.debug"
-config FRAME_POINTER
- bool
- default y
-
config ARM64_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
+CONFIG_CRYPTO_CRC32_ARM64=y
#undef __CMPXCHG_CASE
#define __CMPXCHG_DBL(name, mb, rel, cl) \
-__LL_SC_INLINE int \
+__LL_SC_INLINE long \
__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
#define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op)
#define __CMPXCHG_DBL(name, mb, cl...) \
-static inline int __cmpxchg_double##name(unsigned long old1, \
+static inline long __cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
unsigned long new2, \
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
#ifndef __ASSEMBLY__
#include <linux/psci.h>
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/ptrace.h>
#define __KVM_HAVE_GUEST_DEBUG
}
/* Check if we have a particular HWCAP enabled */
-static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
+static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
{
bool rc;
* cpu logical map array containing MPIDR values related to logical
* cpus. Assumes that cpu_logical_map(0) has already been initialized.
*/
-void __init of_parse_and_init_cpus(void)
+static void __init of_parse_and_init_cpus(void)
{
struct device_node *dn = NULL;
* time the notifier runs debug exceptions might have been enabled already,
* with HW breakpoints registers content still in an unknown state.
*/
-void (*hw_breakpoint_restore)(void *);
+static void (*hw_breakpoint_restore)(void *);
void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
{
/* Prevent multiple restore hook initializations */
ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
# down to collect2, resulting in silent corruption of the vDSO image.
ccflags-y += -Wl,-shared
if (((addr | next | phys) & ~CONT_MASK) == 0) {
/* a block of CONT_PTES */
__populate_init_pte(pte, addr, next, phys,
- prot | __pgprot(PTE_CONT));
+ __pgprot(pgprot_val(prot) | PTE_CONT));
} else {
/*
* If the range being split is already inside of a
} while (addr != end);
}
-void split_pud(pud_t *old_pud, pmd_t *pmd)
+static void split_pud(pud_t *old_pud, pmd_t *pmd)
{
unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
}
-void __init fixup_executable(void)
+static void __init fixup_executable(void)
{
#ifdef CONFIG_DEBUG_RODATA
/* now that we are actually fully mapped, make the start/end more fine grained */
void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
- pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
+ pgprot_t prot = PAGE_KERNEL_RO;
int size, offset;
void *dt_virt;
/*
* BPF JIT compiler for ARM64
*
- * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \
AARCH64_INSN_BRANCH_COMP_##type)
#define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO)
+#define A64_CBNZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, NONZERO)
/* Conditional branch (immediate) */
#define A64_COND_BRANCH(cond, offset) \
/*
* BPF JIT compiler for ARM64
*
- * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
u8 jmp_cond;
s32 jmp_offset;
+#define check_imm(bits, imm) do { \
+ if ((((imm) > 0) && ((imm) >> (bits))) || \
+ (((imm) < 0) && (~(imm) >> (bits)))) { \
+ pr_info("[%2d] imm=%d(0x%x) out of range\n", \
+ i, imm, imm); \
+ return -EINVAL; \
+ } \
+} while (0)
+#define check_imm19(imm) check_imm(19, imm)
+#define check_imm26(imm) check_imm(26, imm)
+
switch (code) {
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
break;
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
- emit(A64_UDIV(is64, dst, dst, src), ctx);
- break;
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
- ctx->tmp_used = 1;
- emit(A64_UDIV(is64, tmp, dst, src), ctx);
- emit(A64_MUL(is64, tmp, tmp, src), ctx);
- emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ {
+ const u8 r0 = bpf2a64[BPF_REG_0];
+
+ /* if (src == 0) return 0 */
+ jmp_offset = 3; /* skip ahead to else path */
+ check_imm19(jmp_offset);
+ emit(A64_CBNZ(is64, src, jmp_offset), ctx);
+ emit(A64_MOVZ(1, r0, 0, 0), ctx);
+ jmp_offset = epilogue_offset(ctx);
+ check_imm26(jmp_offset);
+ emit(A64_B(jmp_offset), ctx);
+ /* else */
+ switch (BPF_OP(code)) {
+ case BPF_DIV:
+ emit(A64_UDIV(is64, dst, dst, src), ctx);
+ break;
+ case BPF_MOD:
+ ctx->tmp_used = 1;
+ emit(A64_UDIV(is64, tmp, dst, src), ctx);
+ emit(A64_MUL(is64, tmp, tmp, src), ctx);
+ emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ break;
+ }
break;
+ }
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(A64_LSLV(is64, dst, dst, src), ctx);
emit(A64_ASR(is64, dst, dst, imm), ctx);
break;
-#define check_imm(bits, imm) do { \
- if ((((imm) > 0) && ((imm) >> (bits))) || \
- (((imm) < 0) && (~(imm) >> (bits)))) { \
- pr_info("[%2d] imm=%d(0x%x) out of range\n", \
- i, imm, imm); \
- return -EINVAL; \
- } \
-} while (0)
-#define check_imm19(imm) check_imm(19, imm)
-#define check_imm26(imm) check_imm(26, imm)
-
/* JUMP off */
case BPF_JMP | BPF_JA:
jmp_offset = bpf2a64_offset(i + off, i, ctx);
select OF_EARLY_FLATTREE
select HAVE_MEMBLOCK
select HAVE_DMA_ATTRS
+ select CLKSRC_OF
config RWSEM_GENERIC_SPINLOCK
def_bool y
KBUILD_AFLAGS += $(aflags-y)
LDFLAGS += $(ldflags-y)
+ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := h8300-unknown-linux-
+endif
core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/
ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""'
# in order to suppress error message.
#
CONFIG_MEMORY_START ?= 0x00400000
-CONFIG_BOOT_LINK_OFFSET ?= 0x00140000
+CONFIG_BOOT_LINK_OFFSET ?= 0x00280000
IMAGE_OFFSET := $(shell printf "0x%08x" $$(($(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET))))
LIBGCC := $(shell $(CROSS-COMPILE)$(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup $(obj)/vmlinux.lds
+LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup -T $(obj)/vmlinux.lds \
+ --defsym output=$(CONFIG_MEMORY_START)
$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
$(call if_changed,ld)
.section .text..startup,"ax"
.global startup
startup:
+ mov.l #startup, sp
mov.l er0, er4
- mov.l er0, sp
mov.l #__sbss, er0
mov.l #__ebss, er1
sub.l er0, er1
bne 1b
jsr @decompress_kernel
mov.l er4, er0
- jmp @0x400000
+ jmp @output
.align 9
fake_headers_as_bzImage:
extern char input_data[];
extern int input_len;
-static unsigned char *output;
+extern char output[];
#define HEAP_SIZE 0x10000
static void error(char *x)
{
-
while (1)
; /* Halt */
}
-#define STACK_SIZE (4096)
-long user_stack[STACK_SIZE];
-long *stack_start = &user_stack[STACK_SIZE];
-
void decompress_kernel(void)
{
free_mem_ptr = (unsigned long)&_end;
*(.bss*)
. = ALIGN(0x4) ;
__ebss = . ;
- __end = . ;
}
+ _end = . ;
}
chosen {
bootargs = "console=ttySC2,38400";
- stdout-path = <&sci2>;
+ stdout-path = &sci2;
};
aliases {
serial0 = &sci0;
compatible = "renesas,h8s2678-pll-clock";
clocks = <&xclk>;
#clock-cells = <0>;
- reg = <0xfee03b 2>, <0xfee045 2>;
+ reg = <0xffff3b 1>, <0xffff45 1>;
};
core_clk: core_clk {
compatible = "renesas,h8300-div-clock";
clocks = <&pllclk>;
#clock-cells = <0>;
- reg = <0xfee03b 2>;
+ reg = <0xffff3b 1>;
renesas,width = <3>;
};
fclk: fclk {
*(volatile unsigned long *)addr = b;
}
-static inline void ctrl_bclr(int b, unsigned long addr)
+static inline void ctrl_bclr(int b, unsigned char *addr)
{
if (__builtin_constant_p(b))
- __asm__("bclr %1,%0" : : "WU"(addr), "i"(b));
+ __asm__("bclr %1,%0" : "+WU"(*addr): "i"(b));
else
- __asm__("bclr %w1,%0" : : "WU"(addr), "r"(b));
+ __asm__("bclr %w1,%0" : "+WU"(*addr): "r"(b));
}
-static inline void ctrl_bset(int b, unsigned long addr)
+static inline void ctrl_bset(int b, unsigned char *addr)
{
if (__builtin_constant_p(b))
- __asm__("bset %1,%0" : : "WU"(addr), "i"(b));
+ __asm__("bset %1,%0" : "+WU"(*addr): "i"(b));
else
- __asm__("bset %w1,%0" : : "WU"(addr), "r"(b));
+ __asm__("bset %w1,%0" : "+WU"(*addr): "r"(b));
}
#endif /* __KERNEL__ */
#ifdef __KERNEL__
+/*
+ * Size of kernel stack for each process. This must be a power of 2...
+ */
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE 8192 /* 2 pages */
+
#ifndef __ASSEMBLY__
/*
#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
-
-/*
- * Size of kernel stack for each process. This must be a power of 2...
- */
-#define THREAD_SIZE_ORDER 1
-#define THREAD_SIZE 8192 /* 2 pages */
-
-
/* how to get the thread information struct from C */
static inline struct thread_info *current_thread_info(void)
{
#include <linux/clk-provider.h>
#include <linux/memblock.h>
#include <linux/screen_info.h>
+#include <linux/clocksource.h>
#include <asm/setup.h>
#include <asm/irq.h>
void __init time_init(void)
{
of_clk_init(NULL);
+ clocksource_probe();
}
#include <asm-generic/vmlinux.lds.h>
#include <asm/page.h>
+#include <asm/thread_info.h>
#define ROMTOP 0x000000
#define RAMTOP 0x400000
. = RAMTOP;
_ramstart = .;
#define ADDR(x) ROMEND
-#else
#endif
_sdata = . ;
__data_start = . ;
- RW_DATA_SECTION(0,0,0)
+ RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE)
#if defined(CONFIG_ROMKERNEL)
#undef ADDR
#endif
return false;
n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
}
- if (n_subcores > 3 || large_sub < 0)
+ if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2))
return false;
/*
beq 3f
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
- bne 1f /* if no SLB entry found */
+ li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
+ bne 7f /* if no SLB entry found */
4: std r4, VCPU_FAULT_DAR(r9)
stw r6, VCPU_FAULT_DSISR(r9)
cmpdi r3, -2 /* MMIO emulation; need instr word */
beq 2f
- /* Synthesize a DSI for the guest */
+ /* Synthesize a DSI (or DSegI) for the guest */
ld r4, VCPU_FAULT_DAR(r9)
mr r6, r3
-1: mtspr SPRN_DAR, r4
+1: li r0, BOOK3S_INTERRUPT_DATA_STORAGE
mtspr SPRN_DSISR, r6
+7: mtspr SPRN_DAR, r4
mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
- li r10, BOOK3S_INTERRUPT_DATA_STORAGE
+ mr r10, r0
bl kvmppc_msr_interrupt
fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
beq 3f
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
- bne 1f /* if no SLB entry found */
+ li r0, BOOK3S_INTERRUPT_INST_SEGMENT
+ bne 7f /* if no SLB entry found */
4:
/* Search the hash table. */
mr r3, r9 /* vcpu pointer */
cmpdi r3, -1 /* handle in kernel mode */
beq guest_exit_cont
- /* Synthesize an ISI for the guest */
+ /* Synthesize an ISI (or ISegI) for the guest */
mr r11, r3
-1: mtspr SPRN_SRR0, r10
+1: li r0, BOOK3S_INTERRUPT_INST_STORAGE
+7: mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
- li r10, BOOK3S_INTERRUPT_INST_STORAGE
+ mr r10, r0
bl kvmppc_msr_interrupt
b fast_interrupt_c_return
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
+ u64 tsc_scaling_ratio;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
+ void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
bool (*invpcid_supported)(void);
- void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
+ void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
bool (*has_wbinvd_exit)(void);
- void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
extern struct kvm_x86_ops *kvm_x86_ops;
-static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
- s64 adjustment)
-{
- kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
-}
-
-static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
-{
- kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
-}
-
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
/* control of guest tsc rate supported? */
extern bool kvm_has_tsc_control;
-/* minimum supported tsc_khz for guests */
-extern u32 kvm_min_guest_tsc_khz;
/* maximum supported tsc_khz for guests */
extern u32 kvm_max_guest_tsc_khz;
+/* number of bits of the fractional part of the TSC scaling ratio */
+extern u8 kvm_tsc_scaling_ratio_frac_bits;
+/* maximum allowed value of TSC scaling ratio */
+extern u64 kvm_max_tsc_scaling_ratio;
enum emulation_result {
EMULATE_DONE, /* no further processing */
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
+
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_PCOMMIT 0x00200000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
{ SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
+ { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
{ SVM_EXIT_INTR, "interrupt" }, \
{ SVM_EXIT_NMI, "nmi" }, \
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
- guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc());
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
local_irq_save(flags);
now = apic->lapic_timer.timer.base->get_time();
- guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc());
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
do_div(ns, this_tsc_khz);
return reserved;
}
-int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
u64 spte;
bool reserved;
return RET_MMIO_PF_EMULATE;
reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
- if (unlikely(reserved))
+ if (WARN_ON(reserved))
return RET_MMIO_PF_BUG;
if (is_mmio_spte(spte)) {
*/
return RET_MMIO_PF_RETRY;
}
-EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common);
-
-static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
- u32 error_code, bool direct)
-{
- int ret;
-
- ret = handle_mmio_page_fault_common(vcpu, addr, direct);
- WARN_ON(ret == RET_MMIO_PF_BUG);
- return ret;
-}
+EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code, bool prefault)
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, gva, error_code, true);
+ r = handle_mmio_page_fault(vcpu, gva, true);
if (likely(r != RET_MMIO_PF_INVALID))
return r;
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
+ r = handle_mmio_page_fault(vcpu, gpa, true);
if (likely(r != RET_MMIO_PF_INVALID))
return r;
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
/*
- * Return values of handle_mmio_page_fault_common:
+ * Return values of handle_mmio_page_fault:
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
* directly.
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
* fault path update the mmio spte.
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
- * RET_MMIO_PF_BUG: bug is detected.
+ * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
*/
enum {
RET_MMIO_PF_EMULATE = 1,
RET_MMIO_PF_BUG = -1
};
-int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
+int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
if (unlikely(error_code & PFERR_RSVD_MASK)) {
- r = handle_mmio_page_fault(vcpu, addr, error_code,
- mmu_is_nested(vcpu));
+ r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID))
return r;
unsigned long int3_rip;
u32 apf_reason;
- u64 tsc_ratio;
-
/* cached guest cpuid flags for faster access */
bool nrips_enabled : 1;
};
static int nested_svm_vmexit(struct vcpu_svm *svm);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
-static u64 __scale_tsc(u64 ratio, u64 tsc);
enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
kvm_enable_efer_bits(EFER_FFXSR);
if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- u64 max;
-
kvm_has_tsc_control = true;
-
- /*
- * Make sure the user can only configure tsc_khz values that
- * fit into a signed integer.
- * A min value is not calculated needed because it will always
- * be 1 on all machines and a value of 0 is used to disable
- * tsc-scaling for the vcpu.
- */
- max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
-
- kvm_max_guest_tsc_khz = max;
+ kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 32;
}
if (nested) {
seg->base = 0;
}
-static u64 __scale_tsc(u64 ratio, u64 tsc)
-{
- u64 mult, frac, _tsc;
-
- mult = ratio >> 32;
- frac = ratio & ((1ULL << 32) - 1);
-
- _tsc = tsc;
- _tsc *= mult;
- _tsc += (tsc >> 32) * frac;
- _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
-
- return _tsc;
-}
-
-static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u64 _tsc = tsc;
-
- if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
- _tsc = __scale_tsc(svm->tsc_ratio, tsc);
-
- return _tsc;
-}
-
-static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u64 ratio;
- u64 khz;
-
- /* Guest TSC same frequency as host TSC? */
- if (!scale) {
- svm->tsc_ratio = TSC_RATIO_DEFAULT;
- return;
- }
-
- /* TSC scaling supported? */
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- if (user_tsc_khz > tsc_khz) {
- vcpu->arch.tsc_catchup = 1;
- vcpu->arch.tsc_always_catchup = 1;
- } else
- WARN(1, "user requested TSC rate below hardware speed\n");
- return;
- }
-
- khz = user_tsc_khz;
-
- /* TSC scaling required - calculate ratio */
- ratio = khz << 32;
- do_div(ratio, tsc_khz);
-
- if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
- WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
- user_tsc_khz);
- return;
- }
- svm->tsc_ratio = ratio;
-}
-
static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
+static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (host) {
- if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
- WARN_ON(adjustment < 0);
- adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
- }
-
svm->vmcb->control.tsc_offset += adjustment;
if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
-{
- u64 tsc;
-
- tsc = svm_scale_tsc(vcpu, rdtsc());
-
- return target_tsc - tsc;
-}
-
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
set_exception_intercept(svm, MC_VECTOR);
+ set_exception_intercept(svm, AC_VECTOR);
+ set_exception_intercept(svm, DB_VECTOR);
set_intercept(svm, INTERCEPT_INTR);
set_intercept(svm, INTERCEPT_NMI);
goto out;
}
- svm->tsc_ratio = TSC_RATIO_DEFAULT;
-
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
if (err)
goto free_svm;
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
- if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
- svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
- __this_cpu_write(current_tsc_ratio, svm->tsc_ratio);
- wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
+ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+ if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
+ __this_cpu_write(current_tsc_ratio, tsc_ratio);
+ wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
+ }
}
}
mark_dirty(svm->vmcb, VMCB_SEG);
}
-static void update_db_bp_intercept(struct kvm_vcpu *vcpu)
+static void update_bp_intercept(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- clr_exception_intercept(svm, DB_VECTOR);
clr_exception_intercept(svm, BP_VECTOR);
- if (svm->nmi_singlestep)
- set_exception_intercept(svm, DB_VECTOR);
-
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- set_exception_intercept(svm, DB_VECTOR);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
set_exception_intercept(svm, BP_VECTOR);
} else
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
svm->vmcb->save.rflags &=
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- update_db_bp_intercept(&svm->vcpu);
}
if (svm->vcpu.guest_debug &
return 1;
}
+static int ac_interception(struct vcpu_svm *svm)
+{
+ kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
+ return 1;
+}
+
static void svm_fpu_activate(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
- return vmcb->control.tsc_offset +
- svm_scale_tsc(vcpu, host_tsc);
+ return vmcb->control.tsc_offset + host_tsc;
}
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_IA32_TSC: {
msr_info->data = svm->vmcb->control.tsc_offset +
- svm_scale_tsc(vcpu, rdtsc());
+ kvm_scale_tsc(vcpu, rdtsc());
break;
}
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
+ [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
[SVM_EXIT_SMI] = nop_on_interception,
*/
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
- update_db_bp_intercept(vcpu);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
- .update_db_bp_intercept = update_db_bp_intercept,
+ .update_bp_intercept = update_bp_intercept,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base,
.has_wbinvd_exit = svm_has_wbinvd_exit,
- .set_tsc_khz = svm_set_tsc_khz,
.read_tsc_offset = svm_read_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
- .adjust_tsc_offset = svm_adjust_tsc_offset,
- .compute_tsc_offset = svm_compute_tsc_offset,
+ .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
.read_l1_tsc = svm_read_l1_tsc,
.set_tdp_cr3 = set_tdp_cr3,
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
+#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
+
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
}
+static inline bool cpu_has_vmx_tsc_scaling(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_TSC_SCALING;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
u32 eb;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
- (1u << NM_VECTOR) | (1u << DB_VECTOR);
+ (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+
+ /* Setup TSC multiplier */
+ if (cpu_has_vmx_tsc_scaling())
+ vmcs_write64(TSC_MULTIPLIER,
+ vcpu->arch.tsc_scaling_ratio);
+
vmx->loaded_vmcs->cpu = cpu;
}
/*
* reads and returns guest's timestamp counter "register"
- * guest_tsc = host_tsc + tsc_offset -- 21.3
+ * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
+ * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3
*/
-static u64 guest_read_tsc(void)
+static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
{
u64 host_tsc, tsc_offset;
host_tsc = rdtsc();
tsc_offset = vmcs_read64(TSC_OFFSET);
- return host_tsc + tsc_offset;
+ return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
}
/*
return host_tsc + tsc_offset;
}
-/*
- * Engage any workarounds for mis-matched TSC rates. Currently limited to
- * software catchup for faster rates on slower CPUs.
- */
-static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
- if (!scale)
- return;
-
- if (user_tsc_khz > tsc_khz) {
- vcpu->arch.tsc_catchup = 1;
- vcpu->arch.tsc_always_catchup = 1;
- } else
- WARN(1, "user requested TSC rate below hardware speed\n");
-}
-
static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
{
return vmcs_read64(TSC_OFFSET);
}
}
-static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
+static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
{
u64 offset = vmcs_read64(TSC_OFFSET);
offset + adjustment);
}
-static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
-{
- return target_tsc - rdtsc();
-}
-
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info);
case MSR_IA32_TSC:
- msr_info->data = guest_read_tsc();
+ msr_info->data = guest_read_tsc(vcpu);
break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_PCOMMIT;
+ SECONDARY_EXEC_PCOMMIT |
+ SECONDARY_EXEC_TSC_SCALING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
return handle_rmode_exception(vcpu, ex_no, error_code);
switch (ex_no) {
+ case AC_VECTOR:
+ kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+ return 1;
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
return 1;
}
- ret = handle_mmio_page_fault_common(vcpu, gpa, true);
+ ret = handle_mmio_page_fault(vcpu, gpa, true);
if (likely(ret == RET_MMIO_PF_EMULATE))
return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
EMULATE_DONE;
if (!cpu_has_vmx_apicv())
enable_apicv = 0;
+ if (cpu_has_vmx_tsc_scaling()) {
+ kvm_has_tsc_control = true;
+ kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 48;
+ }
+
if (enable_apicv)
kvm_x86_ops->update_cr8_intercept = NULL;
else {
vmcs_read32(IDT_VECTORING_INFO_FIELD),
vmcs_read32(IDT_VECTORING_ERROR_CODE));
pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
+ if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
+ pr_err("TSC Multiplier = 0x%016lx\n",
+ vmcs_readl(TSC_MULTIPLIER));
if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
- .update_db_bp_intercept = update_exception_bitmap,
+ .update_bp_intercept = update_exception_bitmap,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
- .set_tsc_khz = vmx_set_tsc_khz,
.read_tsc_offset = vmx_read_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset,
- .adjust_tsc_offset = vmx_adjust_tsc_offset,
- .compute_tsc_offset = vmx_compute_tsc_offset,
+ .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
.read_l1_tsc = vmx_read_l1_tsc,
.set_tdp_cr3 = vmx_set_cr3,
static void process_nmi(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
-struct kvm_x86_ops *kvm_x86_ops;
+struct kvm_x86_ops *kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
-static bool ignore_msrs = 0;
+static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
unsigned int min_timer_period_us = 500;
static bool __read_mostly kvmclock_periodic_sync = true;
module_param(kvmclock_periodic_sync, bool, S_IRUGO);
-bool kvm_has_tsc_control;
+bool __read_mostly kvm_has_tsc_control;
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
-u32 kvm_max_guest_tsc_khz;
+u32 __read_mostly kvm_max_guest_tsc_khz;
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
+EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
+u64 __read_mostly kvm_max_tsc_scaling_ratio;
+EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
+static u64 __read_mostly kvm_default_tsc_scaling_ratio;
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
-static u32 tsc_tolerance_ppm = 250;
+static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int lapic_timer_advance_ns = 0;
+unsigned int __read_mostly lapic_timer_advance_ns = 0;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-static bool backwards_tsc_observed = false;
+static bool __read_mostly backwards_tsc_observed = false;
#define KVM_NR_SHARED_MSRS 16
return v;
}
-static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
+static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
+{
+ u64 ratio;
+
+ /* Guest TSC same frequency as host TSC? */
+ if (!scale) {
+ vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
+ return 0;
+ }
+
+ /* TSC scaling supported? */
+ if (!kvm_has_tsc_control) {
+ if (user_tsc_khz > tsc_khz) {
+ vcpu->arch.tsc_catchup = 1;
+ vcpu->arch.tsc_always_catchup = 1;
+ return 0;
+ } else {
+ WARN(1, "user requested TSC rate below hardware speed\n");
+ return -1;
+ }
+ }
+
+ /* TSC scaling required - calculate ratio */
+ ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
+ user_tsc_khz, tsc_khz);
+
+ if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
+ WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+ user_tsc_khz);
+ return -1;
+ }
+
+ vcpu->arch.tsc_scaling_ratio = ratio;
+ return 0;
+}
+
+static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
{
u32 thresh_lo, thresh_hi;
int use_scaling = 0;
/* tsc_khz can be zero if TSC calibration fails */
- if (this_tsc_khz == 0)
- return;
+ if (this_tsc_khz == 0) {
+ /* set tsc_scaling_ratio to a safe value */
+ vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
+ return -1;
+ }
/* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
use_scaling = 1;
}
- kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
+ return set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
}
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
+/*
+ * Multiply tsc by a fixed point number represented by ratio.
+ *
+ * The most significant 64-N bits (mult) of ratio represent the
+ * integral part of the fixed point number; the remaining N bits
+ * (frac) represent the fractional part, ie. ratio represents a fixed
+ * point number (mult + frac * 2^(-N)).
+ *
+ * N equals to kvm_tsc_scaling_ratio_frac_bits.
+ */
+static inline u64 __scale_tsc(u64 ratio, u64 tsc)
+{
+ return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
+}
+
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
+{
+ u64 _tsc = tsc;
+ u64 ratio = vcpu->arch.tsc_scaling_ratio;
+
+ if (ratio != kvm_default_tsc_scaling_ratio)
+ _tsc = __scale_tsc(ratio, tsc);
+
+ return _tsc;
+}
+EXPORT_SYMBOL_GPL(kvm_scale_tsc);
+
+static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+{
+ u64 tsc;
+
+ tsc = kvm_scale_tsc(vcpu, rdtsc());
+
+ return target_tsc - tsc;
+}
+
+u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+{
+ return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
+}
+EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
+
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct kvm *kvm = vcpu->kvm;
u64 data = msr->data;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
- offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
+ offset = kvm_compute_tsc_offset(vcpu, data);
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta;
- offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
+ offset = kvm_compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
matched = true;
EXPORT_SYMBOL_GPL(kvm_write_tsc);
+static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
+ s64 adjustment)
+{
+ kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+}
+
+static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
+{
+ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
+ WARN_ON(adjustment < 0);
+ adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
+ kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+}
+
#ifdef CONFIG_X86_64
static cycle_t read_tsc(void)
static int kvm_guest_time_update(struct kvm_vcpu *v)
{
- unsigned long flags, this_tsc_khz;
+ unsigned long flags, this_tsc_khz, tgt_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns;
kernel_ns = get_kernel_ns();
}
- tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
+ tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
/*
* We may have to catch up the TSC to match elapsed wall clock
return 0;
if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
- kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
+ tgt_tsc_khz = kvm_has_tsc_control ?
+ vcpu->virtual_tsc_khz : this_tsc_khz;
+ kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz,
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
vcpu->hw_tsc_khz = this_tsc_khz;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) {
- u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
+ u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_x86_ops->write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
if (user_tsc_khz == 0)
user_tsc_khz = tsc_khz;
- kvm_set_tsc_khz(vcpu, user_tsc_khz);
+ if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
+ r = 0;
- r = 0;
goto out;
}
case KVM_GET_TSC_KHZ: {
if (hw_breakpoint_active())
hw_breakpoint_restore();
- vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
- rdtsc());
+ vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
*/
kvm_set_rflags(vcpu, rflags);
- kvm_x86_ops->update_db_bp_intercept(vcpu);
+ kvm_x86_ops->update_bp_intercept(vcpu);
r = 0;
if (r != 0)
return r;
+ if (kvm_has_tsc_control) {
+ /*
+ * Make sure the user can only configure tsc_khz values that
+ * fit into a signed integer.
+ * A min value is not calculated needed because it will always
+ * be 1 on all machines.
+ */
+ u64 max = min(0x7fffffffULL,
+ __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
+ kvm_max_guest_tsc_khz = max;
+
+ kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
+ }
+
kvm_init_msr_list();
return 0;
}
pdevinfo.res = resources;
pdevinfo.num_res = count;
pdevinfo.fwnode = acpi_fwnode_handle(adev);
- pdevinfo.dma_mask = acpi_check_dma(adev, NULL) ? DMA_BIT_MASK(32) : 0;
+
+ if (acpi_dma_supported(adev))
+ pdevinfo.dma_mask = DMA_BIT_MASK(32);
+ else
+ pdevinfo.dma_mask = 0;
+
pdev = platform_device_register_full(&pdevinfo);
if (IS_ERR(pdev))
dev_err(&adev->dev, "platform device creation failed: %ld\n",
static int disable_backlight_sysfs_if = -1;
module_param(disable_backlight_sysfs_if, int, 0444);
+static bool device_id_scheme = false;
+module_param(device_id_scheme, bool, 0444);
+
+static bool only_lcd = false;
+module_param(only_lcd, bool, 0444);
+
static int register_count;
static DEFINE_MUTEX(register_count_mutex);
static struct mutex video_list_lock;
return 0;
}
+static int video_set_device_id_scheme(const struct dmi_system_id *d)
+{
+ device_id_scheme = true;
+ return 0;
+}
+
+static int video_enable_only_lcd(const struct dmi_system_id *d)
+{
+ only_lcd = true;
+ return 0;
+}
+
static struct dmi_system_id video_dmi_table[] = {
/*
* Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121
DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE R830"),
},
},
+ /*
+ * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set
+ * but the IDs actually follow the Device ID Scheme.
+ */
+ {
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */
+ .callback = video_set_device_id_scheme,
+ .ident = "ESPRIMO Mobile M9410",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"),
+ },
+ },
+ /*
+ * Some machines have multiple video output devices, but only the one
+ * that is the type of LCD can do the backlight control so we should not
+ * register backlight interface for other video output devices.
+ */
+ {
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */
+ .callback = video_enable_only_lcd,
+ .ident = "ESPRIMO Mobile M9410",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"),
+ },
+ },
{}
};
attribute = acpi_video_get_device_attr(video, device_id);
- if (attribute && attribute->device_id_scheme) {
+ if (attribute && (attribute->device_id_scheme || device_id_scheme)) {
switch (attribute->display_type) {
case ACPI_VIDEO_DISPLAY_CRT:
data->flags.crt = 1;
static int count;
char *name;
- /*
- * Do not create backlight device for video output
- * device that is not in the enumerated list.
- */
- if (!acpi_video_device_in_dod(device)) {
- dev_dbg(&device->dev->dev, "not in _DOD list, ignore\n");
- return;
- }
-
result = acpi_video_init_brightness(device);
if (result)
return;
mutex_unlock(&video->device_list_lock);
}
+static bool acpi_video_should_register_backlight(struct acpi_video_device *dev)
+{
+ /*
+ * Do not create backlight device for video output
+ * device that is not in the enumerated list.
+ */
+ if (!acpi_video_device_in_dod(dev)) {
+ dev_dbg(&dev->dev->dev, "not in _DOD list, ignore\n");
+ return false;
+ }
+
+ if (only_lcd)
+ return dev->flags.lcd;
+ return true;
+}
+
static int acpi_video_bus_register_backlight(struct acpi_video_bus *video)
{
struct acpi_video_device *dev;
return 0;
mutex_lock(&video->device_list_lock);
- list_for_each_entry(dev, &video->video_device_list, entry)
- acpi_video_dev_register_backlight(dev);
+ list_for_each_entry(dev, &video->video_device_list, entry) {
+ if (acpi_video_should_register_backlight(dev))
+ acpi_video_dev_register_backlight(dev);
+ }
mutex_unlock(&video->device_list_lock);
video->backlight_registered = true;
struct list_head *physnode_list;
unsigned int node_id;
int retval = -EINVAL;
- bool coherent;
+ enum dev_dma_attr attr;
if (has_acpi_companion(dev)) {
if (acpi_dev) {
if (!has_acpi_companion(dev))
ACPI_COMPANION_SET(dev, acpi_dev);
- if (acpi_check_dma(acpi_dev, &coherent))
- arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
+ attr = acpi_get_dma_attr(acpi_dev);
+ if (attr != DEV_DMA_NOT_SUPPORTED)
+ arch_setup_dma_ops(dev, 0, 0, NULL,
+ attr == DEV_DMA_COHERENT);
acpi_physnode_link_name(physical_node_name, node_id);
retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
kfree(pnp->unique_id);
}
+/**
+ * acpi_dma_supported - Check DMA support for the specified device.
+ * @adev: The pointer to acpi device
+ *
+ * Return false if DMA is not supported. Otherwise, return true
+ */
+bool acpi_dma_supported(struct acpi_device *adev)
+{
+ if (!adev)
+ return false;
+
+ if (adev->flags.cca_seen)
+ return true;
+
+ /*
+ * Per ACPI 6.0 sec 6.2.17, assume devices can do cache-coherent
+ * DMA on "Intel platforms". Presumably that includes all x86 and
+ * ia64, and other arches will set CONFIG_ACPI_CCA_REQUIRED=y.
+ */
+ if (!IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED))
+ return true;
+
+ return false;
+}
+
+/**
+ * acpi_get_dma_attr - Check the supported DMA attr for the specified device.
+ * @adev: The pointer to acpi device
+ *
+ * Return enum dev_dma_attr.
+ */
+enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
+{
+ if (!acpi_dma_supported(adev))
+ return DEV_DMA_NOT_SUPPORTED;
+
+ if (adev->flags.coherent_dma)
+ return DEV_DMA_COHERENT;
+ else
+ return DEV_DMA_NON_COHERENT;
+}
+
static void acpi_init_coherency(struct acpi_device *adev)
{
unsigned long long cca = 0;
"900X3C/900X3D/900X3E/900X4C/900X4D"),
},
},
+ {
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1272633 */
+ .callback = video_detect_force_video,
+ .ident = "Dell XPS14 L421X",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "XPS L421X"),
+ },
+ },
{
/* https://bugzilla.redhat.com/show_bug.cgi?id=1163574 */
.callback = video_detect_force_video,
if (stat > PM_QOS_FLAGS_NONE)
return -EBUSY;
- if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev)
- || pdd->dev->power.irq_safe))
+ if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe)
not_suspended++;
}
int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
struct generic_pm_domain *subdomain)
{
- struct gpd_link *link;
+ struct gpd_link *link, *itr;
int ret = 0;
if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)
|| genpd == subdomain)
return -EINVAL;
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
mutex_lock(&genpd->lock);
mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
goto out;
}
- list_for_each_entry(link, &genpd->master_links, master_node) {
- if (link->slave == subdomain && link->master == genpd) {
+ list_for_each_entry(itr, &genpd->master_links, master_node) {
+ if (itr->slave == subdomain && itr->master == genpd) {
ret = -EINVAL;
goto out;
}
}
- link = kzalloc(sizeof(*link), GFP_KERNEL);
- if (!link) {
- ret = -ENOMEM;
- goto out;
- }
link->master = genpd;
list_add_tail(&link->master_node, &genpd->master_links);
link->slave = subdomain;
out:
mutex_unlock(&subdomain->lock);
mutex_unlock(&genpd->lock);
-
+ if (ret)
+ kfree(link);
return ret;
}
EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain);
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/slab.h>
*/
static LIST_HEAD(dev_opp_list);
/* Lock to allow exclusive modification to the device and opp lists */
-static DEFINE_MUTEX(dev_opp_list_lock);
+DEFINE_MUTEX(dev_opp_list_lock);
#define opp_rcu_lockdep_assert() \
do { \
* Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or
* -EINVAL based on type of error.
*
- * Locking: This function must be called under rcu_read_lock(). device_opp
- * is a RCU protected pointer. This means that device_opp is valid as long
- * as we are under RCU lock.
+ * Locking: For readers, this function must be called under rcu_read_lock().
+ * device_opp is a RCU protected pointer, which means that device_opp is valid
+ * as long as we are under RCU lock.
+ *
+ * For Writers, this function must be called with dev_opp_list_lock held.
*/
struct device_opp *_find_device_opp(struct device *dev)
{
struct device_opp *dev_opp;
+ opp_rcu_lockdep_assert();
+
if (IS_ERR_OR_NULL(dev)) {
pr_err("%s: Invalid parameters\n", __func__);
return ERR_PTR(-EINVAL);
}
/**
- * _opp_add_dynamic() - Allocate a dynamic OPP.
+ * _opp_add_v1() - Allocate a OPP based on v1 bindings.
* @dev: device for which we do this operation
* @freq: Frequency in Hz for this OPP
* @u_volt: Voltage in uVolts for this OPP
* Duplicate OPPs (both freq and volt are same) and !opp->available
* -ENOMEM Memory allocation failure
*/
-static int _opp_add_dynamic(struct device *dev, unsigned long freq,
- long u_volt, bool dynamic)
+static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
+ bool dynamic)
{
struct device_opp *dev_opp;
struct dev_pm_opp *new_opp;
}
/* TODO: Support multiple regulators */
-static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev)
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
{
u32 microvolt[3] = {0};
+ u32 val;
int count, ret;
/* Missing property isn't a problem, but an invalid entry is */
opp->u_volt_min = microvolt[1];
opp->u_volt_max = microvolt[2];
+ if (!of_property_read_u32(opp->np, "opp-microamp", &val))
+ opp->u_amp = val;
+
return 0;
}
if (!of_property_read_u32(np, "clock-latency-ns", &val))
new_opp->clock_latency_ns = val;
- ret = opp_get_microvolt(new_opp, dev);
+ ret = opp_parse_supplies(new_opp, dev);
if (ret)
goto free_opp;
- if (!of_property_read_u32(new_opp->np, "opp-microamp", &val))
- new_opp->u_amp = val;
-
ret = _opp_add(dev, new_opp, dev_opp);
if (ret)
goto free_opp;
*/
int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
{
- return _opp_add_dynamic(dev, freq, u_volt, true);
+ return _opp_add_v1(dev, freq, u_volt, true);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_add);
struct device_opp *dev_opp;
int ret = 0, count = 0;
+ mutex_lock(&dev_opp_list_lock);
+
dev_opp = _managed_opp(opp_np);
if (dev_opp) {
/* OPPs are already managed */
if (!_add_list_dev(dev, dev_opp))
ret = -ENOMEM;
+ mutex_unlock(&dev_opp_list_lock);
return ret;
}
+ mutex_unlock(&dev_opp_list_lock);
/* We have opp-list node now, iterate over it and add OPPs */
for_each_available_child_of_node(opp_np, np) {
if (WARN_ON(!count))
return -ENOENT;
+ mutex_lock(&dev_opp_list_lock);
+
dev_opp = _find_device_opp(dev);
if (WARN_ON(IS_ERR(dev_opp))) {
ret = PTR_ERR(dev_opp);
+ mutex_unlock(&dev_opp_list_lock);
goto free_table;
}
dev_opp->np = opp_np;
dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+ mutex_unlock(&dev_opp_list_lock);
+
return 0;
free_table:
unsigned long freq = be32_to_cpup(val++) * 1000;
unsigned long volt = be32_to_cpup(val++);
- if (_opp_add_dynamic(dev, freq, volt, false))
+ if (_opp_add_v1(dev, freq, volt, false))
dev_warn(dev, "%s: Failed to add OPP %ld\n",
__func__, freq);
nr -= 2;
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/err.h>
struct device *dev;
int cpu, ret = 0;
- rcu_read_lock();
+ mutex_lock(&dev_opp_list_lock);
dev_opp = _find_device_opp(cpu_dev);
if (IS_ERR(dev_opp)) {
ret = -EINVAL;
- goto out_rcu_read_unlock;
+ goto unlock;
}
for_each_cpu(cpu, cpumask) {
continue;
}
}
-out_rcu_read_unlock:
- rcu_read_unlock();
+unlock:
+ mutex_unlock(&dev_opp_list_lock);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+/* Lock to allow exclusive modification to the device and opp lists */
+extern struct mutex dev_opp_list_lock;
+
/*
* Internal data structure organization with the OPP layer library is as
* follows:
}
EXPORT_SYMBOL_GPL(device_get_child_node_count);
-bool device_dma_is_coherent(struct device *dev)
+bool device_dma_supported(struct device *dev)
{
- bool coherent = false;
-
+ /* For DT, this is always supported.
+ * For ACPI, this depends on CCA, which
+ * is determined by the acpi_dma_supported().
+ */
if (IS_ENABLED(CONFIG_OF) && dev->of_node)
- coherent = of_dma_is_coherent(dev->of_node);
- else
- acpi_check_dma(ACPI_COMPANION(dev), &coherent);
+ return true;
+
+ return acpi_dma_supported(ACPI_COMPANION(dev));
+}
+EXPORT_SYMBOL_GPL(device_dma_supported);
- return coherent;
+enum dev_dma_attr device_get_dma_attr(struct device *dev)
+{
+ enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED;
+
+ if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
+ if (of_dma_is_coherent(dev->of_node))
+ attr = DEV_DMA_COHERENT;
+ else
+ attr = DEV_DMA_NON_COHERENT;
+ } else
+ attr = acpi_get_dma_attr(ACPI_COMPANION(dev));
+
+ return attr;
}
-EXPORT_SYMBOL_GPL(device_dma_is_coherent);
+EXPORT_SYMBOL_GPL(device_get_dma_attr);
/**
* device_get_phy_mode - Get phy mode for given device
const char *parent_name;
void __iomem *divcr = NULL;
int width;
+ int offset;
num_parents = of_clk_get_parent_count(node);
if (num_parents < 1) {
pr_err("%s: failed to map divide register", clk_name);
goto error;
}
+ offset = (unsigned long)divcr & 3;
+ offset = (3 - offset) * 8;
+ divcr = (void *)((unsigned long)divcr & ~3);
parent_name = of_clk_get_parent_name(node, 0);
of_property_read_u32(node, "renesas,width", &width);
clk = clk_register_divider(NULL, clk_name, parent_name,
- CLK_SET_RATE_GATE, divcr, 0, width,
+ CLK_SET_RATE_GATE, divcr, offset, width,
CLK_DIVIDER_POWER_OF_TWO, &clklock);
if (!IS_ERR(clk)) {
of_clk_add_provider(node, of_clk_src_simple_get, clk);
__func__, cpu, old_cluster, new_cluster, new_rate);
ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+ if (!ret) {
+ /*
+ * FIXME: clk_set_rate hasn't returned an error here however it
+ * may be that clk_change_rate failed due to hardware or
+ * firmware issues and wasn't able to report that due to the
+ * current design of the clk core layer. To work around this
+ * problem we will read back the clock rate and check it is
+ * correct. This needs to be removed once clk core is fixed.
+ */
+ if (clk_get_rate(clk[new_cluster]) != new_rate * 1000)
+ ret = -EIO;
+ }
+
if (WARN_ON(ret)) {
pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
new_cluster);
mutex_unlock(&cluster_lock[old_cluster]);
}
- /*
- * FIXME: clk_set_rate has to handle the case where clk_change_rate
- * can fail due to hardware or firmware issues. Until the clk core
- * layer is fixed, we can check here. In most of the cases we will
- * be reading only the cached value anyway. This needs to be removed
- * once clk core is fixed.
- */
- if (bL_cpufreq_get_rate(cpu) != new_rate)
- return -EIO;
return 0;
}
out:
for_each_possible_cpu(i)
- if (all_cpu_data[i])
- kfree(all_cpu_data[i]);
+ kfree(all_cpu_data[i]);
kfree(all_cpu_data);
return -ENODEV;
{
int i;
- mutex_lock(&cpufreq_governor_lock);
- if (!policy->governor_enabled)
- goto out_unlock;
-
if (!all_cpus) {
/*
* Use raw_smp_processor_id() to avoid preemptible warnings.
for_each_cpu(i, policy->cpus)
__gov_queue_work(i, dbs_data, delay);
}
-
-out_unlock:
- mutex_unlock(&cpufreq_governor_lock);
}
EXPORT_SYMBOL_GPL(gov_queue_work);
struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
dwork.work);
struct cpu_common_dbs_info *shared = cdbs->shared;
- struct cpufreq_policy *policy = shared->policy;
- struct dbs_data *dbs_data = policy->governor_data;
+ struct cpufreq_policy *policy;
+ struct dbs_data *dbs_data;
unsigned int sampling_rate, delay;
bool modify_all = true;
mutex_lock(&shared->timer_mutex);
+ policy = shared->policy;
+
+ /*
+ * Governor might already be disabled and there is no point continuing
+ * with the work-handler.
+ */
+ if (!policy)
+ goto unlock;
+
+ dbs_data = policy->governor_data;
+
if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
gov_queue_work(dbs_data, policy, delay, modify_all);
+unlock:
mutex_unlock(&shared->timer_mutex);
}
if (!shared || !shared->policy)
return -EBUSY;
+ /*
+ * Work-handler must see this updated, as it should not proceed any
+ * further after governor is disabled. And so timer_mutex is taken while
+ * updating this value.
+ */
+ mutex_lock(&shared->timer_mutex);
+ shared->policy = NULL;
+ mutex_unlock(&shared->timer_mutex);
+
gov_cancel_work(dbs_data, policy);
- shared->policy = NULL;
mutex_destroy(&shared->timer_mutex);
return 0;
}
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
- pr_info("intel_pstate: HWP enabled\n");
-
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
}
if (!all_cpu_data)
return -ENOMEM;
- if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp)
+ if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
+ pr_info("intel_pstate: HWP enabled\n");
hwp_active++;
+ }
if (!hwp_active && hwp_only)
goto out;
if (!strcmp(str, "disable"))
no_load = 1;
- if (!strcmp(str, "no_hwp"))
+ if (!strcmp(str, "no_hwp")) {
+ pr_info("intel_pstate: HWP disabled\n");
no_hwp = 1;
+ }
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
/* Find current DRAM frequency */
tmp = s5pv210_dram_conf[ch].freq;
- do_div(tmp, freq);
+ tmp /= freq;
tmp1 = s5pv210_dram_conf[ch].refresh;
- do_div(tmp1, tmp);
+ tmp1 /= tmp;
__raw_writel(tmp1, reg);
}
struct ccp_device *ccp;
struct ccp_platform *ccp_platform;
struct device *dev = &pdev->dev;
+ enum dev_dma_attr attr;
struct resource *ior;
int ret;
}
ccp->io_regs = ccp->io_map;
- ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
- if (ret) {
- dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+ attr = device_get_dma_attr(dev);
+ if (attr == DEV_DMA_NOT_SUPPORTED) {
+ dev_err(dev, "DMA is not supported");
goto e_err;
}
- ccp_platform->coherent = device_dma_is_coherent(ccp->dev);
+ ccp_platform->coherent = (attr == DEV_DMA_COHERENT);
if (ccp_platform->coherent)
ccp->axcache = CACHE_WB_NO_ALLOC;
else
ccp->axcache = CACHE_NONE;
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+ goto e_err;
+ }
+
dev_set_drvdata(dev, ccp);
ret = ccp_init(ccp);
struct resource *res;
const char *phy_mode;
unsigned int i, phy_memnum, phy_irqnum;
+ enum dev_dma_attr attr;
int ret;
DBGPR("--> xgbe_probe\n");
goto err_io;
/* Set the DMA coherency values */
- pdata->coherent = device_dma_is_coherent(pdata->dev);
+ attr = device_get_dma_attr(dev);
+ if (attr == DEV_DMA_NOT_SUPPORTED) {
+ dev_err(dev, "DMA is not supported");
+ goto err_io;
+ }
+ pdata->coherent = (attr == DEV_DMA_COHERENT);
if (pdata->coherent) {
pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
pdata->arcache = XGBE_DMA_OS_ARCACHE;
}
EXPORT_SYMBOL_GPL(of_pci_check_probe_only);
-/**
- * of_pci_dma_configure - Setup DMA configuration
- * @dev: ptr to pci_dev struct of the PCI device
- *
- * Function to update PCI devices's DMA configuration using the same
- * info from the OF node of host bridge's parent (if any).
- */
-void of_pci_dma_configure(struct pci_dev *pci_dev)
-{
- struct device *dev = &pci_dev->dev;
- struct device *bridge = pci_get_host_bridge_device(pci_dev);
-
- if (!bridge->parent)
- return;
-
- of_dma_configure(dev, bridge->parent->of_node);
- pci_put_host_bridge_device(bridge);
-}
-EXPORT_SYMBOL_GPL(of_pci_dma_configure);
-
#if defined(CONFIG_OF_ADDRESS)
/**
* of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/pci.h>
+#include <linux/of_device.h>
#include <linux/of_pci.h>
#include <linux/pci_hotplug.h>
#include <linux/slab.h>
#include <linux/cpumask.h>
#include <linux/pci-aspm.h>
#include <linux/aer.h>
+#include <linux/acpi.h>
#include <asm-generic/pci-bridge.h>
#include "pci.h"
dev_set_msi_domain(&dev->dev, d);
}
+/**
+ * pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to pci_dev struct of the PCI device
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node or ACPI node of host bridge's parent (if any).
+ */
+static void pci_dma_configure(struct pci_dev *dev)
+{
+ struct device *bridge = pci_get_host_bridge_device(dev);
+
+ if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) {
+ if (bridge->parent)
+ of_dma_configure(&dev->dev, bridge->parent->of_node);
+ } else if (has_acpi_companion(bridge)) {
+ struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
+ enum dev_dma_attr attr = acpi_get_dma_attr(adev);
+
+ if (attr == DEV_DMA_NOT_SUPPORTED)
+ dev_warn(&dev->dev, "DMA not supported.\n");
+ else
+ arch_setup_dma_ops(&dev->dev, 0, 0, NULL,
+ attr == DEV_DMA_COHERENT);
+ }
+
+ pci_put_host_bridge_device(bridge);
+}
+
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{
int ret;
dev->dev.dma_mask = &dev->dma_mask;
dev->dev.dma_parms = &dev->dma_parms;
dev->dev.coherent_dma_mask = 0xffffffffull;
- of_pci_dma_configure(dev);
+ pci_dma_configure(dev);
pci_set_dma_max_seg_size(dev, 65536);
pci_set_dma_seg_boundary(dev, 0xffffffff);
struct completion kobj_done;
};
-static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
-{
- bool ret = false;
-
- if (!adev)
- return ret;
-
- /**
- * Currently, we only support _CCA=1 (i.e. coherent_dma=1)
- * This should be equivalent to specifyig dma-coherent for
- * a device in OF.
- *
- * For the case when _CCA=0 (i.e. coherent_dma=0 && cca_seen=1),
- * There are two cases:
- * case 1. Do not support and disable DMA.
- * case 2. Support but rely on arch-specific cache maintenance for
- * non-coherence DMA operations.
- * Currently, we implement case 1 above.
- *
- * For the case when _CCA is missing (i.e. cca_seen=0) and
- * platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
- * and fallback to arch-specific default handling.
- *
- * See acpi_init_coherency() for more info.
- */
- if (adev->flags.coherent_dma) {
- ret = true;
- if (coherent)
- *coherent = adev->flags.coherent_dma;
- }
- return ret;
-}
-
static inline bool is_acpi_node(struct fwnode_handle *fwnode)
{
return fwnode && (fwnode->type == FWNODE_ACPI
/* helper */
+bool acpi_dma_supported(struct acpi_device *adev);
+enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
+
struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
u64 address, bool check_children);
int acpi_is_root_bridge(acpi_handle);
#define put_user(x, ptr) \
({ \
+ void *__p = (ptr); \
might_fault(); \
- access_ok(VERIFY_WRITE, ptr, sizeof(*ptr)) ? \
- __put_user(x, ptr) : \
+ access_ok(VERIFY_WRITE, __p, sizeof(*ptr)) ? \
+ __put_user((x), ((__typeof__(*(ptr)) *)__p)) : \
-EFAULT; \
})
#define get_user(x, ptr) \
({ \
+ const void *__p = (ptr); \
might_fault(); \
- access_ok(VERIFY_READ, ptr, sizeof(*ptr)) ? \
- __get_user(x, ptr) : \
+ access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \
+ __get_user((x), (__typeof__(*(ptr)) *)__p) : \
-EFAULT; \
})
return -ENODEV;
}
-static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
+static inline bool acpi_dma_supported(struct acpi_device *adev)
{
return false;
}
+static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
+{
+ return DEV_DMA_NOT_SUPPORTED;
+}
+
#define ACPI_PTR(_ptr) (NULL)
#endif /* !CONFIG_ACPI */
#ifdef CONFIG_CONTEXT_TRACKING
extern void context_tracking_cpu_set(int cpu);
+/* Called with interrupts disabled. */
+extern void __context_tracking_enter(enum ctx_state state);
+extern void __context_tracking_exit(enum ctx_state state);
+
extern void context_tracking_enter(enum ctx_state state);
extern void context_tracking_exit(enum ctx_state state);
extern void context_tracking_user_enter(void);
static inline void user_enter(void)
{
if (context_tracking_is_enabled())
- context_tracking_user_enter();
+ context_tracking_enter(CONTEXT_USER);
}
static inline void user_exit(void)
{
if (context_tracking_is_enabled())
- context_tracking_user_exit();
+ context_tracking_exit(CONTEXT_USER);
}
static inline enum ctx_state exception_enter(void)
current->flags |= PF_VCPU;
if (context_tracking_is_enabled())
- context_tracking_enter(CONTEXT_GUEST);
+ __context_tracking_enter(CONTEXT_GUEST);
}
static inline void guest_exit(void)
{
if (context_tracking_is_enabled())
- context_tracking_exit(CONTEXT_GUEST);
+ __context_tracking_exit(CONTEXT_GUEST);
if (vtime_accounting_enabled())
vtime_guest_exit(current);
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
+
#endif
}
#endif /* mul_u64_u32_shr */
+#ifndef mul_u64_u64_shr
+static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
+{
+ return (u64)(((unsigned __int128)a * mul) >> shift);
+}
+#endif /* mul_u64_u64_shr */
+
#else
#ifndef mul_u64_u32_shr
}
#endif /* mul_u64_u32_shr */
+#ifndef mul_u64_u64_shr
+static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
+{
+ union {
+ u64 ll;
+ struct {
+#ifdef __BIG_ENDIAN
+ u32 high, low;
+#else
+ u32 low, high;
+#endif
+ } l;
+ } rl, rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rl.ll = (u64)a0.l.low * b0.l.low;
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ /*
+ * Each of these lines computes a 64-bit intermediate result into "c",
+ * starting at bits 32-95. The low 32-bits go into the result of the
+ * multiplication, the high 32-bits are carried into the next step.
+ */
+ rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
+ rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ /*
+ * The 128-bit result of the multiplication is in rl.ll and rh.ll,
+ * shift it right and throw away the high part of the result.
+ */
+ if (shift == 0)
+ return rl.ll;
+ if (shift < 64)
+ return (rl.ll >> shift) | (rh.ll << (64 - shift));
+ return rh.ll >> (shift & 63);
+}
+#endif /* mul_u64_u64_shr */
+
#endif
+#ifndef mul_u64_u32_div
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
+{
+ union {
+ u64 ll;
+ struct {
+#ifdef __BIG_ENDIAN
+ u32 high, low;
+#else
+ u32 low, high;
+#endif
+ } l;
+ } u, rl, rh;
+
+ u.ll = a;
+ rl.ll = (u64)u.l.low * mul;
+ rh.ll = (u64)u.l.high * mul + rl.l.high;
+
+ /* Bits 32-63 of the result will be in rh.l.low. */
+ rl.l.high = do_div(rh.ll, divisor);
+
+ /* Bits 0-31 of the result will be in rl.l.low. */
+ do_div(rl.ll, divisor);
+
+ rl.l.high = rh.l.low;
+ return rl.ll;
+}
+#endif /* mul_u64_u32_div */
+
#endif /* _LINUX_MATH64_H */
int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
int of_get_pci_domain_nr(struct device_node *node);
-void of_pci_dma_configure(struct pci_dev *pci_dev);
void of_pci_check_probe_only(void);
#else
static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
return -1;
}
-static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { }
-
static inline void of_pci_check_probe_only(void) { }
#endif
DEV_PROP_MAX,
};
+enum dev_dma_attr {
+ DEV_DMA_NOT_SUPPORTED,
+ DEV_DMA_NON_COHERENT,
+ DEV_DMA_COHERENT,
+};
+
bool device_property_present(struct device *dev, const char *propname);
int device_property_read_u8_array(struct device *dev, const char *propname,
u8 *val, size_t nval);
void device_add_property_set(struct device *dev, struct property_set *pset);
-bool device_dma_is_coherent(struct device *dev);
+bool device_dma_supported(struct device *dev);
+
+enum dev_dma_attr device_get_dma_attr(struct device *dev);
int device_get_phy_mode(struct device *dev);
* instructions to execute won't use any RCU read side critical section
* because this function sets RCU in extended quiescent state.
*/
-void context_tracking_enter(enum ctx_state state)
+void __context_tracking_enter(enum ctx_state state)
{
- unsigned long flags;
-
- /*
- * Repeat the user_enter() check here because some archs may be calling
- * this from asm and if no CPU needs context tracking, they shouldn't
- * go further. Repeat the check here until they support the inline static
- * key check.
- */
- if (!context_tracking_is_enabled())
- return;
-
- /*
- * Some contexts may involve an exception occuring in an irq,
- * leading to that nesting:
- * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
- * This would mess up the dyntick_nesting count though. And rcu_irq_*()
- * helpers are enough to protect RCU uses inside the exception. So
- * just return immediately if we detect we are in an IRQ.
- */
- if (in_interrupt())
- return;
-
/* Kernel threads aren't supposed to go to userspace */
WARN_ON_ONCE(!current->mm);
- local_irq_save(flags);
if (!context_tracking_recursion_enter())
- goto out_irq_restore;
+ return;
if ( __this_cpu_read(context_tracking.state) != state) {
if (__this_cpu_read(context_tracking.active)) {
__this_cpu_write(context_tracking.state, state);
}
context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_enter);
+EXPORT_SYMBOL_GPL(__context_tracking_enter);
+
+void context_tracking_enter(enum ctx_state state)
+{
+ unsigned long flags;
+
+ /*
+ * Some contexts may involve an exception occuring in an irq,
+ * leading to that nesting:
+ * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+ * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+ * helpers are enough to protect RCU uses inside the exception. So
+ * just return immediately if we detect we are in an IRQ.
+ */
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+ __context_tracking_enter(state);
local_irq_restore(flags);
}
NOKPROBE_SYMBOL(context_tracking_enter);
void context_tracking_user_enter(void)
{
- context_tracking_enter(CONTEXT_USER);
+ user_enter();
}
NOKPROBE_SYMBOL(context_tracking_user_enter);
* This call supports re-entrancy. This way it can be called from any exception
* handler without needing to know if we came from userspace or not.
*/
-void context_tracking_exit(enum ctx_state state)
+void __context_tracking_exit(enum ctx_state state)
{
- unsigned long flags;
-
- if (!context_tracking_is_enabled())
- return;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
if (!context_tracking_recursion_enter())
- goto out_irq_restore;
+ return;
if (__this_cpu_read(context_tracking.state) == state) {
if (__this_cpu_read(context_tracking.active)) {
__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
}
context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_exit);
+EXPORT_SYMBOL_GPL(__context_tracking_exit);
+
+void context_tracking_exit(enum ctx_state state)
+{
+ unsigned long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+ __context_tracking_exit(state);
local_irq_restore(flags);
}
NOKPROBE_SYMBOL(context_tracking_exit);
void context_tracking_user_exit(void)
{
- context_tracking_exit(CONTEXT_USER);
+ user_exit();
}
NOKPROBE_SYMBOL(context_tracking_user_exit);
}
static struct option info_opts[] = {
- {.name = "numpst", .has_arg=no_argument, .flag=NULL, .val='n'},
+ {"numpst", no_argument, NULL, 'n'},
};
void print_help(void)
Enable a specific processor sleep state.
.TP
\fB\-D\fR \fB\-\-disable-by-latency\fR <LATENCY>
-Disable all idle states with a equal or higher latency than <LATENCY>
+Disable all idle states with a equal or higher latency than <LATENCY>.
+
+Enable all idle states with a latency lower than <LATENCY>.
.TP
\fB\-E\fR \fB\-\-enable-all\fR
Enable all idle states if not enabled already.
}
static struct option info_opts[] = {
- { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'},
- { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'},
- { .name = "freq", .has_arg = no_argument, .flag = NULL, .val = 'f'},
- { .name = "hwfreq", .has_arg = no_argument, .flag = NULL, .val = 'w'},
- { .name = "hwlimits", .has_arg = no_argument, .flag = NULL, .val = 'l'},
- { .name = "driver", .has_arg = no_argument, .flag = NULL, .val = 'd'},
- { .name = "policy", .has_arg = no_argument, .flag = NULL, .val = 'p'},
- { .name = "governors", .has_arg = no_argument, .flag = NULL, .val = 'g'},
- { .name = "related-cpus", .has_arg = no_argument, .flag = NULL, .val = 'r'},
- { .name = "affected-cpus",.has_arg = no_argument, .flag = NULL, .val = 'a'},
- { .name = "stats", .has_arg = no_argument, .flag = NULL, .val = 's'},
- { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'},
- { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'},
- { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'},
- { .name = "no-rounding", .has_arg = no_argument, .flag = NULL, .val = 'n'},
+ {"debug", no_argument, NULL, 'e'},
+ {"boost", no_argument, NULL, 'b'},
+ {"freq", no_argument, NULL, 'f'},
+ {"hwfreq", no_argument, NULL, 'w'},
+ {"hwlimits", no_argument, NULL, 'l'},
+ {"driver", no_argument, NULL, 'd'},
+ {"policy", no_argument, NULL, 'p'},
+ {"governors", no_argument, NULL, 'g'},
+ {"related-cpus", no_argument, NULL, 'r'},
+ {"affected-cpus", no_argument, NULL, 'a'},
+ {"stats", no_argument, NULL, 's'},
+ {"latency", no_argument, NULL, 'y'},
+ {"proc", no_argument, NULL, 'o'},
+ {"human", no_argument, NULL, 'm'},
+ {"no-rounding", no_argument, NULL, 'n'},
{ },
};
#define NORM_FREQ_LEN 32
static struct option set_opts[] = {
- { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'},
- { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'},
- { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'},
- { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'},
- { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'},
+ {"min", required_argument, NULL, 'd'},
+ {"max", required_argument, NULL, 'u'},
+ {"governor", required_argument, NULL, 'g'},
+ {"freq", required_argument, NULL, 'f'},
+ {"related", no_argument, NULL, 'r'},
{ },
};
}
static struct option info_opts[] = {
- { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'},
- { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'},
+ {"silent", no_argument, NULL, 's'},
+ {"proc", no_argument, NULL, 'o'},
{ },
};
#include "helpers/sysfs.h"
static struct option info_opts[] = {
- { .name = "disable",
- .has_arg = required_argument, .flag = NULL, .val = 'd'},
- { .name = "enable",
- .has_arg = required_argument, .flag = NULL, .val = 'e'},
- { .name = "disable-by-latency",
- .has_arg = required_argument, .flag = NULL, .val = 'D'},
- { .name = "enable-all",
- .has_arg = no_argument, .flag = NULL, .val = 'E'},
- { },
+ {"disable", required_argument, NULL, 'd'},
+ {"enable", required_argument, NULL, 'e'},
+ {"disable-by-latency", required_argument, NULL, 'D'},
+ {"enable-all", no_argument, NULL, 'E'},
+ { },
};
(cpu, idlestate);
state_latency = sysfs_get_idlestate_latency
(cpu, idlestate);
- printf("CPU: %u - idlestate %u - state_latency: %llu - latency: %llu\n",
- cpu, idlestate, state_latency, latency);
- if (disabled == 1 || latency > state_latency)
+ if (disabled == 1) {
+ if (latency > state_latency){
+ ret = sysfs_idlestate_disable
+ (cpu, idlestate, 0);
+ if (ret == 0)
+ printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
+ }
continue;
- ret = sysfs_idlestate_disable
- (cpu, idlestate, 1);
- if (ret == 0)
+ }
+ if (latency <= state_latency){
+ ret = sysfs_idlestate_disable
+ (cpu, idlestate, 1);
+ if (ret == 0)
printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
+ }
}
break;
case 'E':
#include "helpers/sysfs.h"
static struct option set_opts[] = {
- { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'},
- { },
+ {"perf-bias", optional_argument, NULL, 'b'},
+ { },
};
static void print_wrong_arg_exit(void)
#include "helpers/bitmask.h"
static struct option set_opts[] = {
- { .name = "perf-bias", .has_arg = required_argument, .flag = NULL, .val = 'b'},
+ {"perf-bias", required_argument, NULL, 'b'},
{ },
};
for (cpu = 0; cpu < cpus; cpu++) {
cpu_top->core_info[cpu].cpu = cpu;
cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
- if (!cpu_top->core_info[cpu].is_online)
- continue;
if(sysfs_topology_read_file(
cpu,
"physical_package_id",
- &(cpu_top->core_info[cpu].pkg)) < 0)
- return -1;
+ &(cpu_top->core_info[cpu].pkg)) < 0) {
+ cpu_top->core_info[cpu].pkg = -1;
+ cpu_top->core_info[cpu].core = -1;
+ continue;
+ }
if(sysfs_topology_read_file(
cpu,
"core_id",
- &(cpu_top->core_info[cpu].core)) < 0)
- return -1;
+ &(cpu_top->core_info[cpu].core)) < 0) {
+ cpu_top->core_info[cpu].pkg = -1;
+ cpu_top->core_info[cpu].core = -1;
+ continue;
+ }
}
qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
done by pkg value. */
last_pkg = cpu_top->core_info[0].pkg;
for(cpu = 1; cpu < cpus; cpu++) {
- if(cpu_top->core_info[cpu].pkg != last_pkg) {
+ if (cpu_top->core_info[cpu].pkg != last_pkg &&
+ cpu_top->core_info[cpu].pkg != -1) {
+
last_pkg = cpu_top->core_info[cpu].pkg;
cpu_top->pkgs++;
}
}
- cpu_top->pkgs++;
+ if (!cpu_top->core_info[0].pkg == -1)
+ cpu_top->pkgs++;
/* Intel's cores count is not consecutively numbered, there may
* be a core_id of 3, but none of 2. Assume there always is 0
/* Be careful CPUs may got resorted for pkg value do not just use cpu */
if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
return;
+ if (!cpu_top.core_info[cpu].is_online &&
+ cpu_top.core_info[cpu].pkg == -1)
+ return;
if (topology_depth > 2)
printf("%4d|", cpu_top.core_info[cpu].pkg);
* It's up to the monitor plug-in to check .is_online, this one
* is just for additional info.
*/
- if (!cpu_top.core_info[cpu].is_online) {
+ if (!cpu_top.core_info[cpu].is_online &&
+ cpu_top.core_info[cpu].pkg != -1) {
printf(_(" *is offline\n"));
return;
} else
return EXIT_FAILURE;
}
+ if (!cpu_top.core_info[0].is_online)
+ printf("WARNING: at least one cpu is offline\n");
+
/* Default is: monitor all CPUs */
if (bitmask_isallclear(cpus_chosen))
bitmask_setall(cpus_chosen);
int do_smi;
double bclk;
double base_hz;
+unsigned int has_base_hz;
double tsc_tweak = 1.0;
unsigned int show_pkg;
unsigned int show_core;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
+double discover_bclk(unsigned int family, unsigned int model);
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
}
/* Bzy_MHz */
- if (has_aperf)
- outp += sprintf(outp, "%8.0f",
- 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float);
+ if (has_aperf) {
+ if (has_base_hz)
+ outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf);
+ else
+ outp += sprintf(outp, "%8.0f",
+ 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
+ }
/* TSC_MHz */
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
static void
calculate_tsc_tweak()
{
- unsigned long long msr;
- unsigned int base_ratio;
-
- get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
- base_ratio = (msr >> 8) & 0xFF;
- base_hz = base_ratio * bclk * 1000000;
tsc_tweak = base_hz / tsc_hz;
}
get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
- fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xEF);
+ fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F);
fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
fprintf(stderr, ")\n");
}
int probe_nhm_msrs(unsigned int family, unsigned int model)
{
unsigned long long msr;
+ unsigned int base_ratio;
int *pkg_cstate_limits;
if (!genuine_intel)
if (family != 6)
return 0;
+ bclk = discover_bclk(family, model);
+
switch (model) {
case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
return 0;
}
get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
-
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+ get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
+ base_ratio = (msr >> 8) & 0xFF;
+
+ base_hz = base_ratio * bclk * 1000000;
+ has_base_hz = 1;
return 1;
}
int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
do_skl_residency = has_skl_msrs(family, model);
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
- bclk = discover_bclk(family, model);
rapl_probe(family, model);
perf_limit_reasons_probe(family, model);