KVM: VMX: Separate saving pre-realmode state from setting segments

[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index c39b60707e0262be788b6909d3e46d63d746cadf..4e49caf9224dcc51f8bb15bdedd86527447c4d35 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,10 +596,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
  static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
  {
         struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
-       if (is_error_page(page)) {
-               kvm_release_page_clean(page);
+       if (is_error_page(page))
                 return NULL;
-       }
+
         return page;
  }
  
@@ -1343,7 +1342,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
         guest_efer = vmx->vcpu.arch.efer;
  
         /*
-        * NX is emulated; LMA and LME handled by hardware; SCE meaninless
+        * NX is emulated; LMA and LME handled by hardware; SCE meaningless
          * outside long mode
          */
         ignore_bits = EFER_NX | EFER_SCE;
@@ -1488,13 +1487,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
                 loadsegment(ds, vmx->host_state.ds_sel);
                 loadsegment(es, vmx->host_state.es_sel);
         }
-#else
-       /*
-        * The sysexit path does not restore ds/es, so we must set them to
-        * a reasonable value ourselves.
-        */
-       loadsegment(ds, __USER_DS);
-       loadsegment(es, __USER_DS);
  #endif
         reload_tss();
  #ifdef CONFIG_X86_64
@@ -1998,7 +1990,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
  #endif
                 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
                 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
-               CPU_BASED_RDPMC_EXITING |
+               CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
                 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
         /*
          * We can allow some features even when not supported by the
@@ -2776,7 +2768,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
         return kvm->arch.tss_addr;
  }
  
-static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+static void save_rmode_seg(int seg, struct kvm_save_segment *save)
  {
         struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
  
@@ -2784,6 +2776,12 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
         save->base = vmcs_readl(sf->base);
         save->limit = vmcs_read32(sf->limit);
         save->ar = vmcs_read32(sf->ar_bytes);
+}
+
+static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+{
+       struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
         vmcs_write16(sf->selector, save->base >> 4);
         vmcs_write32(sf->base, save->base & 0xffff0);
         vmcs_write32(sf->limit, 0xffff);
@@ -2806,6 +2804,12 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
         vmx->emulation_required = 1;
         vmx->rmode.vm86_active = 1;
  
+       save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr);
+       save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
+       save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
+       save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+       save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
+
         /*
          * Very old userspace does not call KVM_SET_TSS_ADDR before entering
          * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2820,14 +2824,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
  
         vmx_segment_cache_clear(vmx);
  
-       vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
-       vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
         vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
-
-       vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
         vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
-
-       vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
         vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
  
         flags = vmcs_readl(GUEST_RFLAGS);
@@ -3261,7 +3259,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
          * qemu binaries.
          *   IA32 arch specifies that at the time of processor reset the
          * "Accessed" bit in the AR field of segment registers is 1. And qemu
-        * is setting it to 0 in the usedland code. This causes invalid guest
+        * is setting it to 0 in the userland code. This causes invalid guest
          * state vmexit when "unrestricted guest" mode is turned on.
          *    Fix for this setup issue in cpu_reset is being pushed in the qemu
          * tree. Newer qemu binaries with that qemu fix would not need this
@@ -4446,7 +4444,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
         hypercall[2] = 0xc1;
  }
  
-/* called to set cr0 as approriate for a mov-to-cr0 exit. */
+/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
  static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
  {
         if (to_vmx(vcpu)->nested.vmxon &&
@@ -6230,6 +6228,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long debugctlmsr;
  
         if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
                 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -6269,6 +6268,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 vmx_set_interrupt_shadow(vcpu, 0);
  
         atomic_switch_perf_msrs(vmx);
+       debugctlmsr = get_debugctlmsr();
  
         vmx->__launched = vmx->loaded_vmcs->launched;
         asm(
@@ -6370,6 +6370,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  #endif
               );
  
+       /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+       if (debugctlmsr)
+               update_debugctlmsr(debugctlmsr);
+
+#ifndef CONFIG_X86_64
+       /*
+        * The sysexit path does not restore ds/es, so we must set them to
+        * a reasonable value ourselves.
+        *
+        * We can't defer this to vmx_load_host_state() since that function
+        * may be executed in interrupt context, which saves and restore segments
+        * around it, nullifying its effect.
+        */
+       loadsegment(ds, __USER_DS);
+       loadsegment(es, __USER_DS);
+#endif
+
         vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
                                   | (1 << VCPU_EXREG_RFLAGS)
                                   | (1 << VCPU_EXREG_CPL)