KVM: x86: disable paravirt mmu reporting
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / x86.c
index be451ee44249be80cdc4829df1f8a6ccc1f5d2f5..4693f915f3bde30493cf08667933d35203512c17 100644 (file)
@@ -1238,8 +1238,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_NR_MEMSLOTS:
                r = KVM_MEMORY_SLOTS;
                break;
-       case KVM_CAP_PV_MMU:
-               r = !tdp_enabled;
+       case KVM_CAP_PV_MMU:    /* obsolete */
+               r = 0;
                break;
        case KVM_CAP_IOMMU:
                r = iommu_found();
@@ -1326,6 +1326,8 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        kvm_x86_ops->vcpu_load(vcpu, cpu);
+       if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0))
+               per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu);
        kvm_request_guest_time_update(vcpu);
 }
 
@@ -1591,6 +1593,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 
        if (cpuid->nent < 1)
                goto out;
+       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+               cpuid->nent = KVM_MAX_CPUID_ENTRIES;
        r = -ENOMEM;
        cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
        if (!cpuid_entries)
@@ -1690,7 +1694,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
        unsigned bank_num = mcg_cap & 0xff, bank;
 
        r = -EINVAL;
-       if (!bank_num)
+       if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
                goto out;
        if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
                goto out;
@@ -2036,9 +2040,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
                        sizeof(struct kvm_pic_state));
                break;
        case KVM_IRQCHIP_IOAPIC:
-               memcpy(&chip->chip.ioapic,
-                       ioapic_irqchip(kvm),
-                       sizeof(struct kvm_ioapic_state));
+               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -2068,11 +2070,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
                spin_unlock(&pic_irqchip(kvm)->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
-               mutex_lock(&kvm->irq_lock);
-               memcpy(ioapic_irqchip(kvm),
-                       &chip->chip.ioapic,
-                       sizeof(struct kvm_ioapic_state));
-               mutex_unlock(&kvm->irq_lock);
+               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -2180,7 +2178,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
-       int r = -EINVAL;
+       int r = -ENOTTY;
        /*
         * This union makes it completely explicit to gcc-3.x
         * that these two variables' stack usage should be
@@ -2290,10 +2288,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        __s32 status;
-                       mutex_lock(&kvm->irq_lock);
                        status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                        irq_event.irq, irq_event.level);
-                       mutex_unlock(&kvm->irq_lock);
                        if (ioctl == KVM_IRQ_LINE_STATUS) {
                                irq_event.status = status;
                                if (copy_to_user(argp, &irq_event,
@@ -2755,13 +2751,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
 }
 
 int emulate_instruction(struct kvm_vcpu *vcpu,
-                       struct kvm_run *run,
                        unsigned long cr2,
                        u16 error_code,
                        int emulation_type)
 {
        int r, shadow_mask;
        struct decode_cache *c;
+       struct kvm_run *run = vcpu->run;
 
        kvm_clear_exception_queue(vcpu);
        vcpu->arch.mmio_fault_cr2 = cr2;
@@ -2967,8 +2963,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
        return r;
 }
 
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
-                 int size, unsigned port)
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
 {
        unsigned long val;
 
@@ -2997,7 +2992,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_pio);
 
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
                  int size, unsigned long count, int down,
                  gva_t address, int rep, unsigned port)
 {
@@ -3070,9 +3065,6 @@ static void bounce_off(void *info)
        /* nothing */
 }
 
-static unsigned int  ref_freq;
-static unsigned long tsc_khz_ref;
-
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                                     void *data)
 {
@@ -3081,14 +3073,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
        struct kvm_vcpu *vcpu;
        int i, send_ipi = 0;
 
-       if (!ref_freq)
-               ref_freq = freq->old;
-
        if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
                return 0;
        if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
                return 0;
-       per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+       per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3125,9 +3114,24 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
         .notifier_call  = kvmclock_cpufreq_notifier
 };
 
+static void kvm_timer_init(void)
+{
+       int cpu;
+
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+                                         CPUFREQ_TRANSITION_NOTIFIER);
+               for_each_online_cpu(cpu)
+                       per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu);
+       } else {
+               for_each_possible_cpu(cpu)
+                       per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+       }
+}
+
 int kvm_arch_init(void *opaque)
 {
-       int r, cpu;
+       int r;
        struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
 
        if (kvm_x86_ops) {
@@ -3159,13 +3163,7 @@ int kvm_arch_init(void *opaque)
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                        PT_DIRTY_MASK, PT64_NX_MASK, 0);
 
-       for_each_possible_cpu(cpu)
-               per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
-       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-               tsc_khz_ref = tsc_khz;
-               cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
-                                         CPUFREQ_TRANSITION_NOTIFIER);
-       }
+       kvm_timer_init();
 
        return 0;
 
@@ -3451,17 +3449,17 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
  *
  * No need to exit to userspace if we already have an interrupt queued.
  */
-static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
-                                         struct kvm_run *kvm_run)
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
 {
        return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
-               kvm_run->request_interrupt_window &&
+               vcpu->run->request_interrupt_window &&
                kvm_arch_interrupt_allowed(vcpu));
 }
 
-static void post_kvm_run_save(struct kvm_vcpu *vcpu,
-                             struct kvm_run *kvm_run)
+static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *kvm_run = vcpu->run;
+
        kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
@@ -3523,7 +3521,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
        kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void inject_pending_event(struct kvm_vcpu *vcpu)
 {
        /* try to reinject previous events if any */
        if (vcpu->arch.exception.pending) {
@@ -3559,11 +3557,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 }
 
-static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
        int r;
        bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-               kvm_run->request_interrupt_window;
+               vcpu->run->request_interrupt_window;
 
        if (vcpu->requests)
                if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3584,12 +3582,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                        kvm_x86_ops->tlb_flush(vcpu);
                if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
                                       &vcpu->requests)) {
-                       kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
+                       vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
                        goto out;
                }
                if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
-                       kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+                       vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
                        r = 0;
                        goto out;
                }
@@ -3613,7 +3611,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                goto out;
        }
 
-       inject_pending_event(vcpu, kvm_run);
+       inject_pending_event(vcpu);
 
        /* enable NMI/IRQ window open exits if needed */
        if (vcpu->arch.nmi_pending)
@@ -3639,7 +3637,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
-       kvm_x86_ops->run(vcpu, kvm_run);
+       kvm_x86_ops->run(vcpu);
 
        if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
                set_debugreg(current->thread.debugreg0, 0);
@@ -3680,13 +3678,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        kvm_lapic_sync_from_vapic(vcpu);
 
-       r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+       r = kvm_x86_ops->handle_exit(vcpu);
 out:
        return r;
 }
 
 
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
 
@@ -3706,7 +3704,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        r = 1;
        while (r > 0) {
                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-                       r = vcpu_enter_guest(vcpu, kvm_run);
+                       r = vcpu_enter_guest(vcpu);
                else {
                        up_read(&vcpu->kvm->slots_lock);
                        kvm_vcpu_block(vcpu);
@@ -3734,14 +3732,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                if (kvm_cpu_has_pending_timer(vcpu))
                        kvm_inject_pending_timer_irqs(vcpu);
 
-               if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+               if (dm_request_for_irq_injection(vcpu)) {
                        r = -EINTR;
-                       kvm_run->exit_reason = KVM_EXIT_INTR;
+                       vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.request_irq_exits;
                }
                if (signal_pending(current)) {
                        r = -EINTR;
-                       kvm_run->exit_reason = KVM_EXIT_INTR;
+                       vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.signal_exits;
                }
                if (need_resched()) {
@@ -3752,7 +3750,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 
        up_read(&vcpu->kvm->slots_lock);
-       post_kvm_run_save(vcpu, kvm_run);
+       post_kvm_run_save(vcpu);
 
        vapic_exit(vcpu);
 
@@ -3792,8 +3790,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                vcpu->mmio_needed = 0;
 
                down_read(&vcpu->kvm->slots_lock);
-               r = emulate_instruction(vcpu, kvm_run,
-                                       vcpu->arch.mmio_fault_cr2, 0,
+               r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
                                        EMULTYPE_NO_DECODE);
                up_read(&vcpu->kvm->slots_lock);
                if (r == EMULATE_DO_MMIO) {
@@ -3809,7 +3806,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                kvm_register_write(vcpu, VCPU_REGS_RAX,
                                     kvm_run->hypercall.ret);
 
-       r = __vcpu_run(vcpu, kvm_run);
+       r = __vcpu_run(vcpu);
 
 out:
        if (vcpu->sigset_active)
@@ -4049,7 +4046,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
        return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
 }
 
-static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
                             struct desc_struct *seg_desc)
 {
        u32 base_addr = get_desc_base(seg_desc);
@@ -4206,7 +4203,6 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
        tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
        tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
        tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
-       tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
 }
 
 static int load_state_from_tss16(struct kvm_vcpu *vcpu,
@@ -4476,12 +4472,19 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                                        struct kvm_guest_debug *dbg)
 {
-       int i, r;
+       unsigned long rflags;
+       int old_debug;
+       int i;
 
        vcpu_load(vcpu);
 
-       if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
-           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+       old_debug = vcpu->guest_debug;
+
+       vcpu->guest_debug = dbg->control;
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+               vcpu->guest_debug = 0;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                for (i = 0; i < KVM_NR_DB_REGS; ++i)
                        vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
                vcpu->arch.switch_db_regs =
@@ -4492,16 +4495,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
        }
 
-       r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+       rflags = kvm_x86_ops->get_rflags(vcpu);
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+       else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+               rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+       kvm_x86_ops->set_rflags(vcpu, rflags);
 
-       if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+       kvm_x86_ops->set_guest_debug(vcpu, dbg);
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_DB)
                kvm_queue_exception(vcpu, DB_VECTOR);
-       else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+       else if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_BP)
                kvm_queue_exception(vcpu, BP_VECTOR);
 
        vcpu_put(vcpu);
 
-       return r;
+       return 0;
 }
 
 /*
@@ -4699,9 +4709,17 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
        return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
-       kvm_x86_ops->hardware_enable(garbage);
+       /*
+        * Since this may be called from a hotplug notifcation,
+        * we can't get the CPU frequency directly.
+        */
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               int cpu = raw_smp_processor_id();
+               per_cpu(cpu_tsc_khz, cpu) = 0;
+       }
+       return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)