KVM: SVM: Implement hsave
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / svm.c
index 9c4ce657d96389753ff9650bb06f12bf76e003c8..59aaff1c9597467d3c79926c5d7988282dcdc31c 100644 (file)
@@ -28,6 +28,8 @@
 
 #include <asm/desc.h>
 
+#include <asm/virtext.h>
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -48,6 +50,15 @@ MODULE_LICENSE("GPL");
 
 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
 /* enable NPT for AMD64 and X86 with PAE */
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 static bool npt_enabled = true;
@@ -196,7 +207,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
        if (!npt_enabled && !(efer & EFER_LMA))
                efer &= ~EFER_LME;
 
-       to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+       to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
        vcpu->arch.shadow_efer = efer;
 }
 
@@ -240,39 +251,24 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
        kvm_rip_write(vcpu, svm->next_rip);
        svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
 
-       vcpu->arch.interrupt_window_open = 1;
+       vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static int has_svm(void)
 {
-       uint32_t eax, ebx, ecx, edx;
+       const char *msg;
 
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-               printk(KERN_INFO "has_svm: not amd\n");
+       if (!cpu_has_svm(&msg)) {
+               printk(KERN_INFO "has_svn: %s\n", msg);
                return 0;
        }
 
-       cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-       if (eax < SVM_CPUID_FUNC) {
-               printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
-               return 0;
-       }
-
-       cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-       if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
-               printk(KERN_DEBUG "has_svm: svm not available\n");
-               return 0;
-       }
        return 1;
 }
 
 static void svm_hardware_disable(void *garbage)
 {
-       uint64_t efer;
-
-       wrmsrl(MSR_VM_HSAVE_PA, 0);
-       rdmsrl(MSR_EFER, efer);
-       wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+       cpu_svm_disable();
 }
 
 static void svm_hardware_enable(void *garbage)
@@ -305,7 +301,7 @@ static void svm_hardware_enable(void *garbage)
        svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
        rdmsrl(MSR_EFER, efer);
-       wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+       wrmsrl(MSR_EFER, efer | EFER_SVME);
 
        wrmsrl(MSR_VM_HSAVE_PA,
               page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -572,7 +568,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
        init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
-       save->efer = MSR_EFER_SVME_MASK;
+       save->efer = EFER_SVME;
        save->dr6 = 0xffff0ff0;
        save->dr7 = 0x400;
        save->rflags = 2;
@@ -604,6 +600,8 @@ static void init_vmcb(struct vcpu_svm *svm)
                save->cr4 = 0;
        }
        force_new_asid(&svm->vcpu);
+
+       svm->vcpu.arch.hflags = HF_GIF_MASK;
 }
 
 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -628,6 +626,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
        struct vcpu_svm *svm;
        struct page *page;
        struct page *msrpm_pages;
+       struct page *hsave_page;
        int err;
 
        svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -653,6 +652,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
        svm->msrpm = page_address(msrpm_pages);
        svm_vcpu_init_msrpm(svm->msrpm);
 
+       hsave_page = alloc_page(GFP_KERNEL);
+       if (!hsave_page)
+               goto uninit;
+       svm->hsave = page_address(hsave_page);
+
        svm->vmcb = page_address(page);
        clear_page(svm->vmcb);
        svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
@@ -682,6 +686,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 
        __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+       __free_page(virt_to_page(svm->hsave));
        kvm_vcpu_uninit(vcpu);
        kmem_cache_free(kvm_vcpu_cache, svm);
 }
@@ -731,6 +736,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
        to_svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
 {
        struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -772,6 +787,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
        var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
        var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
        var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+       /*
+        * SVM always stores 0 for the 'G' bit in the CS selector in
+        * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+        * Intel's VMENTRY has a check on the 'G' bit.
+        */
+       if (seg == VCPU_SREG_CS)
+               var->g = s->limit > 0xfffff;
+
+       /*
+        * Work around a bug where the busy flag in the tr selector
+        * isn't exposed
+        */
+       if (seg == VCPU_SREG_TR)
+               var->type |= 0x2;
+
        var->unusable = !var->present;
 }
 
@@ -1099,6 +1130,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        rep = (io_info & SVM_IOIO_REP_MASK) != 0;
        down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
+       skip_emulated_instruction(&svm->vcpu);
        return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 }
 
@@ -1135,6 +1167,112 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+       if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+           || !is_paging(&svm->vcpu)) {
+               kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       if (svm->vmcb->save.cpl) {
+               kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
+       }
+
+       return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+       struct page *page;
+
+       down_read(&current->mm->mmap_sem);
+       page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+       up_read(&current->mm->mmap_sem);
+
+       if (is_error_page(page)) {
+               printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+                      __func__, gpa);
+               kvm_release_page_clean(page);
+               kvm_inject_gp(&svm->vcpu, 0);
+               return NULL;
+       }
+       return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+                        u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+                        int (*handler)(struct vcpu_svm *svm,
+                                       void *arg1,
+                                       void *arg2,
+                                       void *opaque))
+{
+       struct page *arg1_page;
+       struct page *arg2_page = NULL;
+       void *arg1;
+       void *arg2 = NULL;
+       int retval;
+
+       arg1_page = nested_svm_get_page(svm, arg1_gpa);
+       if(arg1_page == NULL)
+               return 1;
+
+       if (arg2_gpa) {
+               arg2_page = nested_svm_get_page(svm, arg2_gpa);
+               if(arg2_page == NULL) {
+                       kvm_release_page_clean(arg1_page);
+                       return 1;
+               }
+       }
+
+       arg1 = kmap_atomic(arg1_page, KM_USER0);
+       if (arg2_gpa)
+               arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+       retval = handler(svm, arg1, arg2, opaque);
+
+       kunmap_atomic(arg1, KM_USER0);
+       if (arg2_gpa)
+               kunmap_atomic(arg2, KM_USER1);
+
+       kvm_release_page_dirty(arg1_page);
+       if (arg2_gpa)
+               kvm_release_page_dirty(arg2_page);
+
+       return retval;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+       return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+       /* After a CLGI no interrupts should come */
+       svm_clear_vintr(svm);
+       svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+       return 1;
+}
+
 static int invalid_op_interception(struct vcpu_svm *svm,
                                   struct kvm_run *kvm_run)
 {
@@ -1246,6 +1384,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
        case MSR_IA32_LASTINTTOIP:
                *data = svm->vmcb->save.last_excp_to;
                break;
+       case MSR_VM_HSAVE_PA:
+               *data = svm->hsave_msr;
+               break;
        default:
                return kvm_get_msr_common(vcpu, ecx, data);
        }
@@ -1339,6 +1480,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                 */
                pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
 
+               break;
+       case MSR_VM_HSAVE_PA:
+               svm->hsave_msr = data;
                break;
        default:
                return kvm_set_msr_common(vcpu, ecx, data);
@@ -1376,7 +1520,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 {
        KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
 
-       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+       svm_clear_vintr(svm);
        svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
        /*
         * If the user space waits to inject interrupts, exit as soon as
@@ -1436,8 +1580,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
        [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
        [SVM_EXIT_VMLOAD]                       = invalid_op_interception,
        [SVM_EXIT_VMSAVE]                       = invalid_op_interception,
-       [SVM_EXIT_STGI]                         = invalid_op_interception,
-       [SVM_EXIT_CLGI]                         = invalid_op_interception,
+       [SVM_EXIT_STGI]                         = stgi_interception,
+       [SVM_EXIT_CLGI]                         = clgi_interception,
        [SVM_EXIT_SKINIT]                       = invalid_op_interception,
        [SVM_EXIT_WBINVD]                       = emulate_on_interception,
        [SVM_EXIT_MONITOR]                      = invalid_op_interception,
@@ -1585,18 +1729,20 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
        if (!kvm_cpu_has_interrupt(vcpu))
                goto out;
 
+       if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+               goto out;
+
        if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
            (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
            (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
                /* unable to deliver irq, set pending irq */
-               vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+               svm_set_vintr(svm);
                svm_inject_irq(svm, 0x0);
                goto out;
        }
        /* Okay, we can deliver the interrupt: grab it and update PIC state. */
        intr_vector = kvm_cpu_get_interrupt(vcpu);
        svm_inject_irq(svm, intr_vector);
-       kvm_timer_intr_post(vcpu, intr_vector);
 out:
        update_cr8_intercept(vcpu);
 }
@@ -1612,7 +1758,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
        }
 
        svm->vcpu.arch.interrupt_window_open =
-               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+                (svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1636,7 +1783,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
        svm->vcpu.arch.interrupt_window_open =
                (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-                (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+                (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+                (svm->vcpu.arch.hflags & HF_GIF_MASK));
 
        if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
                /*
@@ -1649,9 +1797,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
         */
        if (!svm->vcpu.arch.interrupt_window_open &&
            (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
-               control->intercept |= 1ULL << INTERCEPT_VINTR;
-        else
-               control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+               svm_set_vintr(svm);
+       else
+               svm_clear_vintr(svm);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1912,6 +2060,11 @@ static int get_npt_level(void)
 #endif
 }
 
+static int svm_get_mt_mask_shift(void)
+{
+       return 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
@@ -1967,6 +2120,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
        .set_tss_addr = svm_set_tss_addr,
        .get_tdp_level = get_npt_level,
+       .get_mt_mask_shift = svm_get_mt_mask_shift,
 };
 
 static int __init svm_init(void)