KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC
[firefly-linux-kernel-4.4.55.git] / virt / kvm / kvm_main.c
index 302681c4aa4465bb21b69524d7c0d3a5341f4a4e..e9a43b6455bea2e231c95067a801aee27276bab3 100644 (file)
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
  *             kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
@@ -102,28 +103,8 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-       if (pfn_valid(pfn)) {
-               int reserved;
-               struct page *tail = pfn_to_page(pfn);
-               struct page *head = compound_trans_head(tail);
-               reserved = PageReserved(head);
-               if (head != tail) {
-                       /*
-                        * "head" is not a dangling pointer
-                        * (compound_trans_head takes care of that)
-                        * but the hugepage may have been splitted
-                        * from under us (and we may not hold a
-                        * reference count on the head page so it can
-                        * be reused before we run PageReferenced), so
-                        * we've to check PageTail before returning
-                        * what we just read.
-                        */
-                       smp_rmb();
-                       if (PageTail(tail))
-                               return reserved;
-               }
-               return PageReserved(tail);
-       }
+       if (pfn_valid(pfn))
+               return PageReserved(pfn_to_page(pfn));
 
        return true;
 }
@@ -510,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
        if (r)
                goto out_err;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 
        return kvm;
 
@@ -560,13 +541,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont)
 {
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                kvm_destroy_dirty_bitmap(free);
 
-       kvm_arch_free_memslot(free, dont);
+       kvm_arch_free_memslot(kvm, free, dont);
 
        free->npages = 0;
 }
@@ -577,7 +558,7 @@ void kvm_free_physmem(struct kvm *kvm)
        struct kvm_memory_slot *memslot;
 
        kvm_for_each_memslot(memslot, slots)
-               kvm_free_physmem_slot(memslot, NULL);
+               kvm_free_physmem_slot(kvm, memslot, NULL);
 
        kfree(kvm->memslots);
 }
@@ -601,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
        struct mm_struct *mm = kvm->mm;
 
        kvm_arch_sync_events(kvm);
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_del(&kvm->vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        kvm_free_irq_routing(kvm);
        for (i = 0; i < KVM_NR_BUSES; i++)
                kvm_io_bus_destroy(kvm->buses[i]);
@@ -731,7 +712,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
        update_memslots(slots, new, kvm->memslots->generation);
        rcu_assign_pointer(kvm->memslots, slots);
        synchronize_srcu_expedited(&kvm->srcu);
-       return old_memslots; 
+
+       kvm_arch_memslots_updated(kvm);
+
+       return old_memslots;
 }
 
 /*
@@ -838,7 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        if (change == KVM_MR_CREATE) {
                new.userspace_addr = mem->userspace_addr;
 
-               if (kvm_arch_create_memslot(&new, npages))
+               if (kvm_arch_create_memslot(kvm, &new, npages))
                        goto out_free;
        }
 
@@ -889,6 +873,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
                        goto out_free;
        }
 
+       /* actual memory is freed via old in kvm_free_physmem_slot below */
+       if (change == KVM_MR_DELETE) {
+               new.dirty_bitmap = NULL;
+               memset(&new.arch, 0, sizeof(new.arch));
+       }
+
+       old_memslots = install_new_memslots(kvm, slots, &new);
+
+       kvm_arch_commit_memory_region(kvm, mem, &old, change);
+
+       kvm_free_physmem_slot(kvm, &old, &new);
+       kfree(old_memslots);
+
        /*
         * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
         * un-mapped and re-mapped if their base changes.  Since base change
@@ -900,29 +897,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
         */
        if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
                r = kvm_iommu_map_pages(kvm, &new);
-               if (r)
-                       goto out_slots;
-       }
-
-       /* actual memory is freed via old in kvm_free_physmem_slot below */
-       if (change == KVM_MR_DELETE) {
-               new.dirty_bitmap = NULL;
-               memset(&new.arch, 0, sizeof(new.arch));
+               return r;
        }
 
-       old_memslots = install_new_memslots(kvm, slots, &new);
-
-       kvm_arch_commit_memory_region(kvm, mem, &old, change);
-
-       kvm_free_physmem_slot(&old, &new);
-       kfree(old_memslots);
-
        return 0;
 
 out_slots:
        kfree(slots);
 out_free:
-       kvm_free_physmem_slot(&new, &old);
+       kvm_free_physmem_slot(kvm, &new, &old);
 out:
        return r;
 }
@@ -1075,12 +1058,18 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
- * The hva returned by this function is only allowed to be read.
- * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ * If writable is set to false, the hva returned by this function is only
+ * allowed to be read.
  */
-static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
-       return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+       struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+       unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+       if (!kvm_is_error_hva(hva) && writable)
+               *writable = !memslot_is_readonly(slot);
+
+       return hva;
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1447,7 +1436,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
        int r;
        unsigned long addr;
 
-       addr = gfn_to_hva_read(kvm, gfn);
+       addr = gfn_to_hva_prot(kvm, gfn, NULL);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1485,7 +1474,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
        gfn_t gfn = gpa >> PAGE_SHIFT;
        int offset = offset_in_page(gpa);
 
-       addr = gfn_to_hva_read(kvm, gfn);
+       addr = gfn_to_hva_prot(kvm, gfn, NULL);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        pagefault_disable();
@@ -1624,8 +1613,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 {
-       return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
-                                   offset, len);
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+       return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
 
@@ -1716,14 +1706,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
-       if (!need_resched())
-               return;
-       cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
        struct pid *pid;
@@ -1893,7 +1875,7 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-       return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+       return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
 }
 
 /*
@@ -1904,6 +1886,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        int r;
        struct kvm_vcpu *vcpu, *v;
 
+       if (id >= KVM_MAX_VCPUS)
+               return -EINVAL;
+
        vcpu = kvm_arch_vcpu_create(kvm, id);
        if (IS_ERR(vcpu))
                return PTR_ERR(vcpu);
@@ -2281,6 +2266,16 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        case KVM_DEV_TYPE_XICS:
                ops = &kvm_xics_ops;
                break;
+#endif
+#ifdef CONFIG_KVM_VFIO
+       case KVM_DEV_TYPE_VFIO:
+               ops = &kvm_vfio_ops;
+               break;
+#endif
+#ifdef CONFIG_KVM_ARM_VGIC
+       case KVM_DEV_TYPE_ARM_VGIC_V2:
+               ops = &kvm_arm_vgic_v2_ops;
+               break;
 #endif
        default:
                return -ENODEV;
@@ -2302,7 +2297,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
                return ret;
        }
 
-       ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+       ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
        if (ret < 0) {
                ops->destroy(dev);
                return ret;
@@ -2530,44 +2525,12 @@ out:
 }
 #endif
 
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       struct page *page[1];
-       unsigned long addr;
-       int npages;
-       gfn_t gfn = vmf->pgoff;
-       struct kvm *kvm = vma->vm_file->private_data;
-
-       addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr))
-               return VM_FAULT_SIGBUS;
-
-       npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-                               NULL);
-       if (unlikely(npages != 1))
-               return VM_FAULT_SIGBUS;
-
-       vmf->page = page[0];
-       return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
-       .fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
-       vma->vm_ops = &kvm_vm_vm_ops;
-       return 0;
-}
-
 static struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
-       .mmap           = kvm_vm_mmap,
        .llseek         = noop_llseek,
 };
 
@@ -2586,7 +2549,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
                return r;
        }
 #endif
-       r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+       r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
        if (r < 0)
                kvm_put_kvm(kvm);
 
@@ -2694,11 +2657,12 @@ static void hardware_enable_nolock(void *junk)
        }
 }
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
 {
-       raw_spin_lock(&kvm_lock);
-       hardware_enable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_enable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -2711,11 +2675,12 @@ static void hardware_disable_nolock(void *junk)
        kvm_arch_hardware_disable(NULL);
 }
 
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
 {
-       raw_spin_lock(&kvm_lock);
-       hardware_disable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_disable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_all_nolock(void)
@@ -2729,16 +2694,16 @@ static void hardware_disable_all_nolock(void)
 
 static void hardware_disable_all(void)
 {
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
        hardware_disable_all_nolock();
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static int hardware_enable_all(void)
 {
        int r = 0;
 
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
 
        kvm_usage_count++;
        if (kvm_usage_count == 1) {
@@ -2751,7 +2716,7 @@ static int hardware_enable_all(void)
                }
        }
 
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
 
        return r;
 }
@@ -2761,20 +2726,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 {
        int cpu = (long)v;
 
-       if (!kvm_usage_count)
-               return NOTIFY_OK;
-
        val &= ~CPU_TASKS_FROZEN;
        switch (val) {
        case CPU_DYING:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               hardware_disable(NULL);
+               hardware_disable();
                break;
        case CPU_STARTING:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               hardware_enable(NULL);
+               hardware_enable();
                break;
        }
        return NOTIFY_OK;
@@ -2987,10 +2949,10 @@ static int vm_stat_get(void *_offset, u64 *val)
        struct kvm *kvm;
 
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                *val += *(u32 *)((void *)kvm + offset);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
 }
 
@@ -3004,12 +2966,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
        int i;
 
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        *val += *(u32 *)((void *)vcpu + offset);
 
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
 }
 
@@ -3022,7 +2984,7 @@ static const struct file_operations *stat_fops[] = {
 
 static int kvm_init_debug(void)
 {
-       int r = -EFAULT;
+       int r = -EEXIST;
        struct kvm_stats_debugfs_item *p;
 
        kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
@@ -3064,7 +3026,7 @@ static int kvm_suspend(void)
 static void kvm_resume(void)
 {
        if (kvm_usage_count) {
-               WARN_ON(raw_spin_is_locked(&kvm_lock));
+               WARN_ON(raw_spin_is_locked(&kvm_count_lock));
                hardware_enable_nolock(NULL);
        }
 }
@@ -3181,6 +3143,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 out_undebugfs:
        unregister_syscore_ops(&kvm_syscore_ops);
+       misc_deregister(&kvm_dev);
 out_unreg:
        kvm_async_pf_deinit();
 out_free: