kvm: x86: vmx: NULL out hwapic_isr_update() in case of !enable_apicv

[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index feb852b04598b63d187b0870db26d008c81a13d8..6e71fac27d4eea41cf3f580b3b5f721ab948ff66 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5840,53 +5840,10 @@ static __init int hardware_setup(void)
         memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
         memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
  
-       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-       memcpy(vmx_msr_bitmap_legacy_x2apic,
-                       vmx_msr_bitmap_legacy, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_longmode_x2apic,
-                       vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-       if (enable_apicv) {
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       vmx_disable_intercept_msr_read_x2apic(msr);
-
-               /* According SDM, in x2apic mode, the whole id reg is used.
-                * But in KVM, it only use the highest eight bits. Need to
-                * intercept it */
-               vmx_enable_intercept_msr_read_x2apic(0x802);
-               /* TMCCT */
-               vmx_enable_intercept_msr_read_x2apic(0x839);
-               /* TPR */
-               vmx_disable_intercept_msr_write_x2apic(0x808);
-               /* EOI */
-               vmx_disable_intercept_msr_write_x2apic(0x80b);
-               /* SELF-IPI */
-               vmx_disable_intercept_msr_write_x2apic(0x83f);
-       }
-
-       if (enable_ept) {
-               kvm_mmu_set_mask_ptes(0ull,
-                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-                       0ull, VMX_EPT_EXECUTABLE_MASK);
-               ept_set_mmio_spte_mask();
-               kvm_enable_tdp();
-       } else
-               kvm_disable_tdp();
-
-       update_ple_window_actual_max();
-
         if (setup_vmcs_config(&vmcs_config) < 0) {
                 r = -EIO;
                 goto out7;
-    }
+       }
  
         if (boot_cpu_has(X86_FEATURE_NX))
                 kvm_enable_efer_bits(EFER_NX);
@@ -5938,6 +5895,7 @@ static __init int hardware_setup(void)
                 kvm_x86_ops->update_cr8_intercept = NULL;
         else {
                 kvm_x86_ops->hwapic_irr_update = NULL;
+               kvm_x86_ops->hwapic_isr_update = NULL;
                 kvm_x86_ops->deliver_posted_interrupt = NULL;
                 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
         }
@@ -5945,6 +5903,49 @@ static __init int hardware_setup(void)
         if (nested)
                 nested_vmx_setup_ctls_msrs();
  
+       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+       memcpy(vmx_msr_bitmap_legacy_x2apic,
+                       vmx_msr_bitmap_legacy, PAGE_SIZE);
+       memcpy(vmx_msr_bitmap_longmode_x2apic,
+                       vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+       if (enable_apicv) {
+               for (msr = 0x800; msr <= 0x8ff; msr++)
+                       vmx_disable_intercept_msr_read_x2apic(msr);
+
+               /* According SDM, in x2apic mode, the whole id reg is used.
+                * But in KVM, it only use the highest eight bits. Need to
+                * intercept it */
+               vmx_enable_intercept_msr_read_x2apic(0x802);
+               /* TMCCT */
+               vmx_enable_intercept_msr_read_x2apic(0x839);
+               /* TPR */
+               vmx_disable_intercept_msr_write_x2apic(0x808);
+               /* EOI */
+               vmx_disable_intercept_msr_write_x2apic(0x80b);
+               /* SELF-IPI */
+               vmx_disable_intercept_msr_write_x2apic(0x83f);
+       }
+
+       if (enable_ept) {
+               kvm_mmu_set_mask_ptes(0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+                       0ull, VMX_EPT_EXECUTABLE_MASK);
+               ept_set_mmio_spte_mask();
+               kvm_enable_tdp();
+       } else
+               kvm_disable_tdp();
+
+       update_ple_window_actual_max();
+
         return alloc_kvm_area();
  
  out7:
@@ -6143,6 +6144,13 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
          */
  }
  
+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
+{
+       /* TODO: not to reset guest simply here. */
+       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
+}
+
  static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
  {
         struct vcpu_vmx *vmx =
@@ -7471,9 +7479,6 @@ static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
         u16 status;
         u8 old;
  
-       if (!vmx_vm_has_apicv(kvm))
-               return;
-
         if (isr == -1)
                 isr = 0;
  
@@ -8199,6 +8204,18 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
         vcpu->arch.walk_mmu = &vcpu->arch.mmu;
  }
  
+static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
+                                           u16 error_code)
+{
+       bool inequality, bit;
+
+       bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
+       inequality =
+               (error_code & vmcs12->page_fault_error_code_mask) !=
+                vmcs12->page_fault_error_code_match;
+       return inequality ^ bit;
+}
+
  static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
                 struct x86_exception *fault)
  {
@@ -8206,8 +8223,7 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
  
         WARN_ON(!is_guest_mode(vcpu));
  
-       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code))
                 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
                                   vmcs_read32(VM_EXIT_INTR_INFO),
                                   vmcs_readl(EXIT_QUALIFICATION));
@@ -8286,6 +8302,162 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
                       ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
  }
  
+static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
+                                      unsigned long count_field,
+                                      unsigned long addr_field,
+                                      int maxphyaddr)
+{
+       u64 count, addr;
+
+       if (vmcs12_read_any(vcpu, count_field, &count) ||
+           vmcs12_read_any(vcpu, addr_field, &addr)) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+       if (count == 0)
+               return 0;
+       if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
+           (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
+               pr_warn_ratelimited(
+                       "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
+                       addr_field, maxphyaddr, count, addr);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
+                                               struct vmcs12 *vmcs12)
+{
+       int maxphyaddr;
+
+       if (vmcs12->vm_exit_msr_load_count == 0 &&
+           vmcs12->vm_exit_msr_store_count == 0 &&
+           vmcs12->vm_entry_msr_load_count == 0)
+               return 0; /* Fast path */
+       maxphyaddr = cpuid_maxphyaddr(vcpu);
+       if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
+                                       VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
+           nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
+                                       VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
+           nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
+                                       VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
+               return -EINVAL;
+       return 0;
+}
+
+static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
+                                      struct vmx_msr_entry *e)
+{
+       /* x2APIC MSR accesses are not allowed */
+       if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
+               return -EINVAL;
+       if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
+           e->index == MSR_IA32_UCODE_REV)
+               return -EINVAL;
+       if (e->reserved != 0)
+               return -EINVAL;
+       return 0;
+}
+
+static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
+                                    struct vmx_msr_entry *e)
+{
+       if (e->index == MSR_FS_BASE ||
+           e->index == MSR_GS_BASE ||
+           e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
+           nested_vmx_msr_check_common(vcpu, e))
+               return -EINVAL;
+       return 0;
+}
+
+static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
+                                     struct vmx_msr_entry *e)
+{
+       if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
+           nested_vmx_msr_check_common(vcpu, e))
+               return -EINVAL;
+       return 0;
+}
+
+/*
+ * Load guest's/host's msr at nested entry/exit.
+ * return 0 for success, entry index for failure.
+ */
+static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
+{
+       u32 i;
+       struct vmx_msr_entry e;
+       struct msr_data msr;
+
+       msr.host_initiated = false;
+       for (i = 0; i < count; i++) {
+               if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
+                                  &e, sizeof(e))) {
+                       pr_warn_ratelimited(
+                               "%s cannot read MSR entry (%u, 0x%08llx)\n",
+                               __func__, i, gpa + i * sizeof(e));
+                       goto fail;
+               }
+               if (nested_vmx_load_msr_check(vcpu, &e)) {
+                       pr_warn_ratelimited(
+                               "%s check failed (%u, 0x%x, 0x%x)\n",
+                               __func__, i, e.index, e.reserved);
+                       goto fail;
+               }
+               msr.index = e.index;
+               msr.data = e.value;
+               if (kvm_set_msr(vcpu, &msr)) {
+                       pr_warn_ratelimited(
+                               "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
+                               __func__, i, e.index, e.value);
+                       goto fail;
+               }
+       }
+       return 0;
+fail:
+       return i + 1;
+}
+
+static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
+{
+       u32 i;
+       struct vmx_msr_entry e;
+
+       for (i = 0; i < count; i++) {
+               if (kvm_read_guest(vcpu->kvm,
+                                  gpa + i * sizeof(e),
+                                  &e, 2 * sizeof(u32))) {
+                       pr_warn_ratelimited(
+                               "%s cannot read MSR entry (%u, 0x%08llx)\n",
+                               __func__, i, gpa + i * sizeof(e));
+                       return -EINVAL;
+               }
+               if (nested_vmx_store_msr_check(vcpu, &e)) {
+                       pr_warn_ratelimited(
+                               "%s check failed (%u, 0x%x, 0x%x)\n",
+                               __func__, i, e.index, e.reserved);
+                       return -EINVAL;
+               }
+               if (kvm_get_msr(vcpu, e.index, &e.value)) {
+                       pr_warn_ratelimited(
+                               "%s cannot read MSR (%u, 0x%x)\n",
+                               __func__, i, e.index);
+                       return -EINVAL;
+               }
+               if (kvm_write_guest(vcpu->kvm,
+                                   gpa + i * sizeof(e) +
+                                       offsetof(struct vmx_msr_entry, value),
+                                   &e.value, sizeof(e.value))) {
+                       pr_warn_ratelimited(
+                               "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
+                               __func__, i, e.index, e.value);
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
  /*
   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -8582,6 +8754,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
         int cpu;
         struct loaded_vmcs *vmcs02;
         bool ia32e;
+       u32 msr_entry_idx;
  
         if (!nested_vmx_check_permission(vcpu) ||
             !nested_vmx_check_vmcs12(vcpu))
@@ -8629,11 +8802,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                 return 1;
         }
  
-       if (vmcs12->vm_entry_msr_load_count > 0 ||
-           vmcs12->vm_exit_msr_load_count > 0 ||
-           vmcs12->vm_exit_msr_store_count > 0) {
-               pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
-                                   __func__);
+       if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
                 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                 return 1;
         }
@@ -8739,10 +8908,21 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
  
         vmx_segment_cache_clear(vmx);
  
-       vmcs12->launch_state = 1;
-
         prepare_vmcs02(vcpu, vmcs12);
  
+       msr_entry_idx = nested_vmx_load_msr(vcpu,
+                                           vmcs12->vm_entry_msr_load_addr,
+                                           vmcs12->vm_entry_msr_load_count);
+       if (msr_entry_idx) {
+               leave_guest_mode(vcpu);
+               vmx_load_vmcs01(vcpu);
+               nested_vmx_entry_failure(vcpu, vmcs12,
+                               EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
+               return 1;
+       }
+
+       vmcs12->launch_state = 1;
+
         if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
                 return kvm_emulate_halt(vcpu);
  
@@ -9172,6 +9352,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
  
         kvm_set_dr(vcpu, 7, 0x400);
         vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+       if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
+                               vmcs12->vm_exit_msr_load_count))
+               nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
  }
  
  /*
@@ -9193,6 +9377,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
         prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
                        exit_qualification);
  
+       if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
+                                vmcs12->vm_exit_msr_store_count))
+               nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+
         vmx_load_vmcs01(vcpu);
  
         if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)