kvm: do not handle APIC access page if in-kernel irqchip is not in use
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / x86.c
index ca3d760dd5817f45ab6ea87a33fa075105095e2d..5430e4b0af29194af597902858c99e11250a3cfa 100644 (file)
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
        unsigned int cpu = smp_processor_id();
        struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
        if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
                vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
        else
                vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+
+       return fault->nested_page_fault;
 }
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                            gfn_t ngfn, void *data, int offset, int len,
                            u32 access)
 {
+       struct x86_exception exception;
        gfn_t real_gfn;
        gpa_t ngpa;
 
        ngpa     = gfn_to_gpa(ngfn);
-       real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
+       real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
        if (real_gfn == UNMAPPED_GVA)
                return -EFAULT;
 
@@ -726,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
        if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
                kvm_mmu_sync_roots(vcpu);
-               kvm_mmu_flush_tlb(vcpu);
+               kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
                return 0;
        }
 
@@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
        pvclock_update_vm_gtod_copy(kvm);
 
        kvm_for_each_vcpu(i, vcpu, kvm)
-               set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
        /* guest entries allowed */
        kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work)
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
-               set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
                kvm_vcpu_kick(vcpu);
        }
 }
@@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
 {
        struct kvm *kvm = v->kvm;
 
-       set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+       kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
        schedule_delayed_work(&kvm->arch.kvmclock_update_work,
                                        KVMCLOCK_UPDATE_DELAY);
 }
@@ -1723,9 +1726,10 @@ static bool valid_mtrr_type(unsigned t)
        return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
 }
 
-static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        int i;
+       u64 mask;
 
        if (!msr_mtrr_valid(msr))
                return false;
@@ -1747,14 +1751,31 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        }
 
        /* variable MTRRs */
-       return valid_mtrr_type(data & 0xff);
+       WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+       mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+       if ((msr & 1) == 0) {
+               /* MTRR base */
+               if (!valid_mtrr_type(data & 0xff))
+                       return false;
+               mask |= 0xf00;
+       } else
+               /* MTRR mask */
+               mask |= 0x7ff;
+       if (data & mask) {
+               kvm_inject_gp(vcpu, 0);
+               return false;
+       }
+
+       return true;
 }
+EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
 
-       if (!mtrr_valid(vcpu, msr, data))
+       if (!kvm_mtrr_valid(vcpu, msr, data))
                return 1;
 
        if (msr == MSR_MTRRdefType) {
@@ -1805,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                break;
        default:
                if (msr >= MSR_IA32_MC0_CTL &&
-                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                   msr < MSR_IA32_MCx_CTL(bank_num)) {
                        u32 offset = msr - MSR_IA32_MC0_CTL;
                        /* only 0 or all 1s can be written to IA32_MCi_CTL
                         * some Linux kernels though clear bit 10 in bank 4 to
@@ -2164,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
        case MSR_IA32_MCG_CTL:
        case MSR_IA32_MCG_STATUS:
-       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
                return set_msr_mce(vcpu, msr, data);
 
        /* Performance counters are not protected by a CPUID bit,
@@ -2330,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
                break;
        default:
                if (msr >= MSR_IA32_MC0_CTL &&
-                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                   msr < MSR_IA32_MCx_CTL(bank_num)) {
                        u32 offset = msr - MSR_IA32_MC0_CTL;
                        data = vcpu->arch.mce_banks[offset];
                        break;
@@ -2419,7 +2440,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
        case MSR_K7_EVNTSEL0:
+       case MSR_K7_EVNTSEL1:
+       case MSR_K7_EVNTSEL2:
+       case MSR_K7_EVNTSEL3:
        case MSR_K7_PERFCTR0:
+       case MSR_K7_PERFCTR1:
+       case MSR_K7_PERFCTR2:
+       case MSR_K7_PERFCTR3:
        case MSR_K8_INT_PENDING_MSG:
        case MSR_AMD64_NB_CFG:
        case MSR_FAM10H_MMIO_CONF_BASE:
@@ -2505,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_MCG_CAP:
        case MSR_IA32_MCG_CTL:
        case MSR_IA32_MCG_STATUS:
-       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
                return get_msr_mce(vcpu, msr, pdata);
        case MSR_K7_CLK_CTL:
                /*
@@ -2636,7 +2663,7 @@ out:
        return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
        int r;
 
@@ -2823,7 +2850,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
                adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
                vcpu->arch.tsc_offset_adjustment = 0;
-               set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
        }
 
        if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
@@ -4040,16 +4067,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
        kvm_x86_ops->get_segment(vcpu, var, seg);
 }
 
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+                          struct x86_exception *exception)
 {
        gpa_t t_gpa;
-       struct x86_exception exception;
 
        BUG_ON(!mmu_is_nested(vcpu));
 
        /* NPT walks are always user-walks */
        access |= PFERR_USER_MASK;
-       t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
+       t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
 
        return t_gpa;
 }
@@ -4906,16 +4933,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
        }
 }
 
-static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
 {
        struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        if (ctxt->exception.vector == PF_VECTOR)
-               kvm_propagate_fault(vcpu, &ctxt->exception);
-       else if (ctxt->exception.error_code_valid)
+               return kvm_propagate_fault(vcpu, &ctxt->exception);
+
+       if (ctxt->exception.error_code_valid)
                kvm_queue_exception_e(vcpu, ctxt->exception.vector,
                                      ctxt->exception.error_code);
        else
                kvm_queue_exception(vcpu, ctxt->exception.vector);
+       return false;
 }
 
 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -4972,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 
        ++vcpu->stat.insn_emulation_fail;
        trace_kvm_emulate_insn_failed(vcpu);
-       if (!is_guest_mode(vcpu)) {
+       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
@@ -5224,6 +5253,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
                ctxt->interruptibility = 0;
                ctxt->have_exception = false;
+               ctxt->exception.vector = -1;
                ctxt->perm_ok = false;
 
                ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
@@ -5276,8 +5306,9 @@ restart:
        }
 
        if (ctxt->have_exception) {
-               inject_emulated_exception(vcpu);
                r = EMULATE_DONE;
+               if (inject_emulated_exception(vcpu))
+                       return r;
        } else if (vcpu->arch.pio.count) {
                if (!vcpu->arch.pio.in) {
                        /* FIXME: return into emulator if single-stepping.  */
@@ -5545,7 +5576,7 @@ static void kvm_set_mmio_spte_mask(void)
         * entry to generate page fault with PFER.RSV = 1.
         */
         /* Mask the reserved physical address bits. */
-       mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
+       mask = rsvd_bits(maxphyaddr, 51);
 
        /* Bit 62 is always reserved for 32bit host. */
        mask |= 0x3ull << 62;
@@ -5576,7 +5607,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
-                       set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
+                       kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
        atomic_set(&kvm_guest_has_master_clock, 0);
        spin_unlock(&kvm_lock);
 }
@@ -5989,6 +6020,44 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        kvm_apic_update_tmr(vcpu, tmr);
 }
 
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+       ++vcpu->stat.tlb_flush;
+       kvm_x86_ops->tlb_flush(vcpu);
+}
+
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+       struct page *page = NULL;
+
+       if (!irqchip_in_kernel(vcpu->kvm))
+               return;
+
+       if (!kvm_x86_ops->set_apic_access_page_addr)
+               return;
+
+       page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+       kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+
+       /*
+        * Do not pin apic access page in memory, the MMU notifier
+        * will call us again if it is migrated or swapped out.
+        */
+       put_page(page);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+                                          unsigned long address)
+{
+       /*
+        * The physical address of apic access page is stored in the VMCS.
+        * Update it when it becomes invalid.
+        */
+       if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
+               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
 /*
  * Returns 1 to let __vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
@@ -6018,7 +6087,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
                        kvm_mmu_sync_roots(vcpu);
                if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
-                       kvm_x86_ops->tlb_flush(vcpu);
+                       kvm_vcpu_flush_tlb(vcpu);
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
@@ -6049,6 +6118,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        kvm_deliver_pmi(vcpu);
                if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
                        vcpu_scan_ioapic(vcpu);
+               if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+                       kvm_vcpu_reload_apic_access_page(vcpu);
        }
 
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -6934,7 +7005,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
        kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
@@ -6945,7 +7016,7 @@ int kvm_arch_hardware_enable(void *garbage)
        bool stable, backwards_tsc = false;
 
        kvm_shared_msr_cpu_online();
-       ret = kvm_x86_ops->hardware_enable(garbage);
+       ret = kvm_x86_ops->hardware_enable();
        if (ret != 0)
                return ret;
 
@@ -6954,7 +7025,7 @@ int kvm_arch_hardware_enable(void *garbage)
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (!stable && vcpu->cpu == smp_processor_id())
-                               set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+                               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
                        if (stable && vcpu->arch.last_host_tsc > local_tsc) {
                                backwards_tsc = true;
                                if (vcpu->arch.last_host_tsc > max_tsc)
@@ -7008,8 +7079,7 @@ int kvm_arch_hardware_enable(void *garbage)
                        kvm_for_each_vcpu(i, vcpu, kvm) {
                                vcpu->arch.tsc_offset_adjustment += delta_cyc;
                                vcpu->arch.last_host_tsc = local_tsc;
-                               set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
-                                       &vcpu->requests);
+                               kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
                        }
 
                        /*
@@ -7026,10 +7096,10 @@ int kvm_arch_hardware_enable(void *garbage)
        return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-       kvm_x86_ops->hardware_disable(garbage);
-       drop_user_return_notifiers(garbage);
+       kvm_x86_ops->hardware_disable();
+       drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
@@ -7146,6 +7216,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
                static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+       kvm_x86_ops->sched_in(vcpu, cpu);
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
        if (type)
@@ -7237,10 +7312,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kfree(kvm->arch.vpic);
        kfree(kvm->arch.vioapic);
        kvm_free_vcpus(kvm);
-       if (kvm->arch.apic_access_page)
-               put_page(kvm->arch.apic_access_page);
-       if (kvm->arch.ept_identity_pagetable)
-               put_page(kvm->arch.ept_identity_pagetable);
        kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
@@ -7643,3 +7714,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);