KVM: SVM: Implement hsave

[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / svm.c
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index 9c4ce657d96389753ff9650bb06f12bf76e003c8..59aaff1c9597467d3c79926c5d7988282dcdc31c 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
  
  #include <asm/desc.h>
  
+#include <asm/virtext.h>
+
  #define __ex(x) __kvm_handle_fault_on_reboot(x)
  
  MODULE_AUTHOR("Qumranet");
@@ -48,6 +50,15 @@ MODULE_LICENSE("GPL");
  
  #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
  
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
  /* enable NPT for AMD64 and X86 with PAE */
  #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
  static bool npt_enabled = true;
@@ -196,7 +207,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
         if (!npt_enabled && !(efer & EFER_LMA))
                 efer &= ~EFER_LME;
  
-       to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+       to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
         vcpu->arch.shadow_efer = efer;
  }
  
@@ -240,39 +251,24 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
         kvm_rip_write(vcpu, svm->next_rip);
         svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
  
-       vcpu->arch.interrupt_window_open = 1;
+       vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
  }
  
  static int has_svm(void)
  {
-       uint32_t eax, ebx, ecx, edx;
+       const char *msg;
  
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-               printk(KERN_INFO "has_svm: not amd\n");
+       if (!cpu_has_svm(&msg)) {
+               printk(KERN_INFO "has_svn: %s\n", msg);
                 return 0;
         }
  
-       cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-       if (eax < SVM_CPUID_FUNC) {
-               printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
-               return 0;
-       }
-
-       cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-       if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
-               printk(KERN_DEBUG "has_svm: svm not available\n");
-               return 0;
-       }
         return 1;
  }
  
  static void svm_hardware_disable(void *garbage)
  {
-       uint64_t efer;
-
-       wrmsrl(MSR_VM_HSAVE_PA, 0);
-       rdmsrl(MSR_EFER, efer);
-       wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+       cpu_svm_disable();
  }
  
  static void svm_hardware_enable(void *garbage)
@@ -305,7 +301,7 @@ static void svm_hardware_enable(void *garbage)
         svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
  
         rdmsrl(MSR_EFER, efer);
-       wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+       wrmsrl(MSR_EFER, efer | EFER_SVME);
  
         wrmsrl(MSR_VM_HSAVE_PA,
                page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -572,7 +568,7 @@ static void init_vmcb(struct vcpu_svm *svm)
         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
  
-       save->efer = MSR_EFER_SVME_MASK;
+       save->efer = EFER_SVME;
         save->dr6 = 0xffff0ff0;
         save->dr7 = 0x400;
         save->rflags = 2;
@@ -604,6 +600,8 @@ static void init_vmcb(struct vcpu_svm *svm)
                 save->cr4 = 0;
         }
         force_new_asid(&svm->vcpu);
+
+       svm->vcpu.arch.hflags = HF_GIF_MASK;
  }
  
  static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -628,6 +626,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
         struct vcpu_svm *svm;
         struct page *page;
         struct page *msrpm_pages;
+       struct page *hsave_page;
         int err;
  
         svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -653,6 +652,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
         svm->msrpm = page_address(msrpm_pages);
         svm_vcpu_init_msrpm(svm->msrpm);
  
+       hsave_page = alloc_page(GFP_KERNEL);
+       if (!hsave_page)
+               goto uninit;
+       svm->hsave = page_address(hsave_page);
+
         svm->vmcb = page_address(page);
         clear_page(svm->vmcb);
         svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
@@ -682,6 +686,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
  
         __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+       __free_page(virt_to_page(svm->hsave));
         kvm_vcpu_uninit(vcpu);
         kmem_cache_free(kvm_vcpu_cache, svm);
  }
@@ -731,6 +736,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
         to_svm(vcpu)->vmcb->save.rflags = rflags;
  }
  
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
  static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
  {
         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -772,6 +787,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
         var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
         var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+       /*
+        * SVM always stores 0 for the 'G' bit in the CS selector in
+        * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+        * Intel's VMENTRY has a check on the 'G' bit.
+        */
+       if (seg == VCPU_SREG_CS)
+               var->g = s->limit > 0xfffff;
+
+       /*
+        * Work around a bug where the busy flag in the tr selector
+        * isn't exposed
+        */
+       if (seg == VCPU_SREG_TR)
+               var->type |= 0x2;
+
         var->unusable = !var->present;
  }
  
@@ -1099,6 +1130,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
         rep = (io_info & SVM_IOIO_REP_MASK) != 0;
         down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
  
+       skip_emulated_instruction(&svm->vcpu);
         return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
  }
  
@@ -1135,6 +1167,112 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
         return 1;
  }
  
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+       if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+           || !is_paging(&svm->vcpu)) {
+               kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       if (svm->vmcb->save.cpl) {
+               kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
+       }
+
+       return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+       struct page *page;
+
+       down_read(&current->mm->mmap_sem);
+       page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+       up_read(&current->mm->mmap_sem);
+
+       if (is_error_page(page)) {
+               printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+                      __func__, gpa);
+               kvm_release_page_clean(page);
+               kvm_inject_gp(&svm->vcpu, 0);
+               return NULL;
+       }
+       return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+                        u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+                        int (*handler)(struct vcpu_svm *svm,
+                                       void *arg1,
+                                       void *arg2,
+                                       void *opaque))
+{
+       struct page *arg1_page;
+       struct page *arg2_page = NULL;
+       void *arg1;
+       void *arg2 = NULL;
+       int retval;
+
+       arg1_page = nested_svm_get_page(svm, arg1_gpa);
+       if(arg1_page == NULL)
+               return 1;
+
+       if (arg2_gpa) {
+               arg2_page = nested_svm_get_page(svm, arg2_gpa);
+               if(arg2_page == NULL) {
+                       kvm_release_page_clean(arg1_page);
+                       return 1;
+               }
+       }
+
+       arg1 = kmap_atomic(arg1_page, KM_USER0);
+       if (arg2_gpa)
+               arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+       retval = handler(svm, arg1, arg2, opaque);
+
+       kunmap_atomic(arg1, KM_USER0);
+       if (arg2_gpa)
+               kunmap_atomic(arg2, KM_USER1);
+
+       kvm_release_page_dirty(arg1_page);
+       if (arg2_gpa)
+               kvm_release_page_dirty(arg2_page);
+
+       return retval;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+       return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       if (nested_svm_check_permissions(svm))
+               return 1;
+
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+
+       svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+       /* After a CLGI no interrupts should come */
+       svm_clear_vintr(svm);
+       svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+       return 1;
+}
+
  static int invalid_op_interception(struct vcpu_svm *svm,
                                    struct kvm_run *kvm_run)
  {
@@ -1246,6 +1384,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
         case MSR_IA32_LASTINTTOIP:
                 *data = svm->vmcb->save.last_excp_to;
                 break;
+       case MSR_VM_HSAVE_PA:
+               *data = svm->hsave_msr;
+               break;
         default:
                 return kvm_get_msr_common(vcpu, ecx, data);
         }
@@ -1339,6 +1480,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                  */
                 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
  
+               break;
+       case MSR_VM_HSAVE_PA:
+               svm->hsave_msr = data;
                 break;
         default:
                 return kvm_set_msr_common(vcpu, ecx, data);
@@ -1376,7 +1520,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
  {
         KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
  
-       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+       svm_clear_vintr(svm);
         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
         /*
          * If the user space waits to inject interrupts, exit as soon as
@@ -1436,8 +1580,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
         [SVM_EXIT_VMLOAD]                       = invalid_op_interception,
         [SVM_EXIT_VMSAVE]                       = invalid_op_interception,
-       [SVM_EXIT_STGI]                         = invalid_op_interception,
-       [SVM_EXIT_CLGI]                         = invalid_op_interception,
+       [SVM_EXIT_STGI]                         = stgi_interception,
+       [SVM_EXIT_CLGI]                         = clgi_interception,
         [SVM_EXIT_SKINIT]                       = invalid_op_interception,
         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
@@ -1585,18 +1729,20 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
         if (!kvm_cpu_has_interrupt(vcpu))
                 goto out;
  
+       if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+               goto out;
+
         if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
             (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
             (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
                 /* unable to deliver irq, set pending irq */
-               vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+               svm_set_vintr(svm);
                 svm_inject_irq(svm, 0x0);
                 goto out;
         }
         /* Okay, we can deliver the interrupt: grab it and update PIC state. */
         intr_vector = kvm_cpu_get_interrupt(vcpu);
         svm_inject_irq(svm, intr_vector);
-       kvm_timer_intr_post(vcpu, intr_vector);
  out:
         update_cr8_intercept(vcpu);
  }
@@ -1612,7 +1758,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
         }
  
         svm->vcpu.arch.interrupt_window_open =
-               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+               !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+                (svm->vcpu.arch.hflags & HF_GIF_MASK);
  }
  
  static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1636,7 +1783,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
  
         svm->vcpu.arch.interrupt_window_open =
                 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-                (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+                (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+                (svm->vcpu.arch.hflags & HF_GIF_MASK));
  
         if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
                 /*
@@ -1649,9 +1797,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
          */
         if (!svm->vcpu.arch.interrupt_window_open &&
             (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
-               control->intercept |= 1ULL << INTERCEPT_VINTR;
-        else
-               control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+               svm_set_vintr(svm);
+       else
+               svm_clear_vintr(svm);
  }
  
  static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1912,6 +2060,11 @@ static int get_npt_level(void)
  #endif
  }
  
+static int svm_get_mt_mask_shift(void)
+{
+       return 0;
+}
+
  static struct kvm_x86_ops svm_x86_ops = {
         .cpu_has_kvm_support = has_svm,
         .disabled_by_bios = is_disabled,
@@ -1967,6 +2120,7 @@ static struct kvm_x86_ops svm_x86_ops = {
  
         .set_tss_addr = svm_set_tss_addr,
         .get_tdp_level = get_npt_level,
+       .get_mt_mask_shift = svm_get_mt_mask_shift,
  };
  
  static int __init svm_init(void)