#include <asm/desc.h>
+#include <asm/virtext.h>
+
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
- to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+ to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
vcpu->arch.shadow_efer = efer;
}
kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
- vcpu->arch.interrupt_window_open = 1;
+ vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static int has_svm(void)
{
- uint32_t eax, ebx, ecx, edx;
+ const char *msg;
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- printk(KERN_INFO "has_svm: not amd\n");
+ if (!cpu_has_svm(&msg)) {
+ printk(KERN_INFO "has_svn: %s\n", msg);
return 0;
}
- cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
- if (eax < SVM_CPUID_FUNC) {
- printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
- return 0;
- }
-
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
- printk(KERN_DEBUG "has_svm: svm not available\n");
- return 0;
- }
return 1;
}
static void svm_hardware_disable(void *garbage)
{
- uint64_t efer;
-
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+ cpu_svm_disable();
}
static void svm_hardware_enable(void *garbage)
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer | EFER_SVME);
wrmsrl(MSR_VM_HSAVE_PA,
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = MSR_EFER_SVME_MASK;
+ save->efer = EFER_SVME;
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
save->cr4 = 0;
}
force_new_asid(&svm->vcpu);
+
+ svm->vcpu.arch.hflags = HF_GIF_MASK;
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm;
struct page *page;
struct page *msrpm_pages;
+ struct page *hsave_page;
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
+ hsave_page = alloc_page(GFP_KERNEL);
+ if (!hsave_page)
+ goto uninit;
+ svm->hsave = page_address(hsave_page);
+
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(virt_to_page(svm->hsave));
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+ /*
+ * SVM always stores 0 for the 'G' bit in the CS selector in
+ * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+ * Intel's VMENTRY has a check on the 'G' bit.
+ */
+ if (seg == VCPU_SREG_CS)
+ var->g = s->limit > 0xfffff;
+
+ /*
+ * Work around a bug where the busy flag in the tr selector
+ * isn't exposed
+ */
+ if (seg == VCPU_SREG_TR)
+ var->type |= 0x2;
+
var->unusable = !var->present;
}
rep = (io_info & SVM_IOIO_REP_MASK) != 0;
down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
+ skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
}
return 1;
}
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+ if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+ || !is_paging(&svm->vcpu)) {
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (svm->vmcb->save.cpl) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+ struct page *page;
+
+ down_read(¤t->mm->mmap_sem);
+ page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ up_read(¤t->mm->mmap_sem);
+
+ if (is_error_page(page)) {
+ printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+ __func__, gpa);
+ kvm_release_page_clean(page);
+ kvm_inject_gp(&svm->vcpu, 0);
+ return NULL;
+ }
+ return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+ u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+ int (*handler)(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque))
+{
+ struct page *arg1_page;
+ struct page *arg2_page = NULL;
+ void *arg1;
+ void *arg2 = NULL;
+ int retval;
+
+ arg1_page = nested_svm_get_page(svm, arg1_gpa);
+ if(arg1_page == NULL)
+ return 1;
+
+ if (arg2_gpa) {
+ arg2_page = nested_svm_get_page(svm, arg2_gpa);
+ if(arg2_page == NULL) {
+ kvm_release_page_clean(arg1_page);
+ return 1;
+ }
+ }
+
+ arg1 = kmap_atomic(arg1_page, KM_USER0);
+ if (arg2_gpa)
+ arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+ retval = handler(svm, arg1, arg2, opaque);
+
+ kunmap_atomic(arg1, KM_USER0);
+ if (arg2_gpa)
+ kunmap_atomic(arg2, KM_USER1);
+
+ kvm_release_page_dirty(arg1_page);
+ if (arg2_gpa)
+ kvm_release_page_dirty(arg2_page);
+
+ return retval;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+ /* After a CLGI no interrupts should come */
+ svm_clear_vintr(svm);
+ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
case MSR_IA32_LASTINTTOIP:
*data = svm->vmcb->save.last_excp_to;
break;
+ case MSR_VM_HSAVE_PA:
+ *data = svm->hsave_msr;
+ break;
default:
return kvm_get_msr_common(vcpu, ecx, data);
}
*/
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
+ break;
+ case MSR_VM_HSAVE_PA:
+ svm->hsave_msr = data;
break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
{
KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
- svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
* If the user space waits to inject interrupts, exit as soon as
[SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = invalid_op_interception,
[SVM_EXIT_VMSAVE] = invalid_op_interception,
- [SVM_EXIT_STGI] = invalid_op_interception,
- [SVM_EXIT_CLGI] = invalid_op_interception,
+ [SVM_EXIT_STGI] = stgi_interception,
+ [SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
if (!kvm_cpu_has_interrupt(vcpu))
goto out;
+ if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+ goto out;
+
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
(vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
/* unable to deliver irq, set pending irq */
- vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
goto out;
}
/* Okay, we can deliver the interrupt: grab it and update PIC state. */
intr_vector = kvm_cpu_get_interrupt(vcpu);
svm_inject_irq(svm, intr_vector);
- kvm_timer_intr_post(vcpu, intr_vector);
out:
update_cr8_intercept(vcpu);
}
}
svm->vcpu.arch.interrupt_window_open =
- !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+ !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static void svm_do_inject_vector(struct vcpu_svm *svm)
svm->vcpu.arch.interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+ (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK));
if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
/*
*/
if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
- control->intercept |= 1ULL << INTERCEPT_VINTR;
- else
- control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
+ else
+ svm_clear_vintr(svm);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
#endif
}
+static int svm_get_mt_mask_shift(void)
+{
+ return 0;
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
+ .get_mt_mask_shift = svm_get_mt_mask_shift,
};
static int __init svm_init(void)