Merge branch 'for-lsk' of git://git.linaro.org/arm/big.LITTLE/mp into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / arch / arm / kvm / mmu.c
index 7789857d147034b8cbfd1e44d599f81db085215b..eea03069161b4b4c2824bd0908edac065f0959a8 100644 (file)
@@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
 #define kvm_pmd_huge(_x)       (pmd_huge(_x) || pmd_trans_huge(_x))
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
@@ -88,103 +90,208 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
        return p;
 }
 
-static bool page_empty(void *ptr)
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
 {
-       struct page *ptr_page = virt_to_page(ptr);
-       return page_count(ptr_page) == 1;
+       pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+       pgd_clear(pgd);
+       kvm_tlb_flush_vmid_ipa(kvm, addr);
+       pud_free(NULL, pud_table);
+       put_page(virt_to_page(pgd));
 }
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-       if (pud_huge(*pud)) {
-               pud_clear(pud);
-               kvm_tlb_flush_vmid_ipa(kvm, addr);
-       } else {
-               pmd_t *pmd_table = pmd_offset(pud, 0);
-               pud_clear(pud);
-               kvm_tlb_flush_vmid_ipa(kvm, addr);
-               pmd_free(NULL, pmd_table);
-       }
+       pmd_t *pmd_table = pmd_offset(pud, 0);
+       VM_BUG_ON(pud_huge(*pud));
+       pud_clear(pud);
+       kvm_tlb_flush_vmid_ipa(kvm, addr);
+       pmd_free(NULL, pmd_table);
        put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-       if (kvm_pmd_huge(*pmd)) {
-               pmd_clear(pmd);
-               kvm_tlb_flush_vmid_ipa(kvm, addr);
-       } else {
-               pte_t *pte_table = pte_offset_kernel(pmd, 0);
-               pmd_clear(pmd);
-               kvm_tlb_flush_vmid_ipa(kvm, addr);
-               pte_free_kernel(NULL, pte_table);
-       }
+       pte_t *pte_table = pte_offset_kernel(pmd, 0);
+       VM_BUG_ON(kvm_pmd_huge(*pmd));
+       pmd_clear(pmd);
+       kvm_tlb_flush_vmid_ipa(kvm, addr);
+       pte_free_kernel(NULL, pte_table);
        put_page(virt_to_page(pmd));
 }
 
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+                      phys_addr_t addr, phys_addr_t end)
 {
-       if (pte_present(*pte)) {
-               kvm_set_pte(pte, __pte(0));
-               put_page(virt_to_page(pte));
-               kvm_tlb_flush_vmid_ipa(kvm, addr);
-       }
+       phys_addr_t start_addr = addr;
+       pte_t *pte, *start_pte;
+
+       start_pte = pte = pte_offset_kernel(pmd, addr);
+       do {
+               if (!pte_none(*pte)) {
+                       kvm_set_pte(pte, __pte(0));
+                       put_page(virt_to_page(pte));
+                       kvm_tlb_flush_vmid_ipa(kvm, addr);
+               }
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+
+       if (kvm_pte_table_empty(start_pte))
+               clear_pmd_entry(kvm, pmd, start_addr);
+}
+
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+                      phys_addr_t addr, phys_addr_t end)
+{
+       phys_addr_t next, start_addr = addr;
+       pmd_t *pmd, *start_pmd;
+
+       start_pmd = pmd = pmd_offset(pud, addr);
+       do {
+               next = kvm_pmd_addr_end(addr, end);
+               if (!pmd_none(*pmd)) {
+                       if (kvm_pmd_huge(*pmd)) {
+                               pmd_clear(pmd);
+                               kvm_tlb_flush_vmid_ipa(kvm, addr);
+                               put_page(virt_to_page(pmd));
+                       } else {
+                               unmap_ptes(kvm, pmd, addr, next);
+                       }
+               }
+       } while (pmd++, addr = next, addr != end);
+
+       if (kvm_pmd_table_empty(start_pmd))
+               clear_pud_entry(kvm, pud, start_addr);
+}
+
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+                      phys_addr_t addr, phys_addr_t end)
+{
+       phys_addr_t next, start_addr = addr;
+       pud_t *pud, *start_pud;
+
+       start_pud = pud = pud_offset(pgd, addr);
+       do {
+               next = kvm_pud_addr_end(addr, end);
+               if (!pud_none(*pud)) {
+                       if (pud_huge(*pud)) {
+                               pud_clear(pud);
+                               kvm_tlb_flush_vmid_ipa(kvm, addr);
+                               put_page(virt_to_page(pud));
+                       } else {
+                               unmap_pmds(kvm, pud, addr, next);
+                       }
+               }
+       } while (pud++, addr = next, addr != end);
+
+       if (kvm_pud_table_empty(start_pud))
+               clear_pgd_entry(kvm, pgd, start_addr);
 }
 
+
 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-                       unsigned long long start, u64 size)
+                       phys_addr_t start, u64 size)
 {
        pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       phys_addr_t addr = start, end = start + size;
+       phys_addr_t next;
+
+       pgd = pgdp + pgd_index(addr);
+       do {
+               next = kvm_pgd_addr_end(addr, end);
+               unmap_puds(kvm, pgd, addr, next);
+       } while (pgd++, addr = next, addr != end);
+}
+
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+                             phys_addr_t addr, phys_addr_t end)
+{
        pte_t *pte;
-       unsigned long long addr = start, end = start + size;
-       u64 next;
 
-       while (addr < end) {
-               pgd = pgdp + pgd_index(addr);
-               pud = pud_offset(pgd, addr);
-               if (pud_none(*pud)) {
-                       addr = pud_addr_end(addr, end);
-                       continue;
+       pte = pte_offset_kernel(pmd, addr);
+       do {
+               if (!pte_none(*pte)) {
+                       hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                       kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
                }
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+}
 
-               if (pud_huge(*pud)) {
-                       /*
-                        * If we are dealing with a huge pud, just clear it and
-                        * move on.
-                        */
-                       clear_pud_entry(kvm, pud, addr);
-                       addr = pud_addr_end(addr, end);
-                       continue;
-               }
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+                             phys_addr_t addr, phys_addr_t end)
+{
+       pmd_t *pmd;
+       phys_addr_t next;
 
-               pmd = pmd_offset(pud, addr);
-               if (pmd_none(*pmd)) {
-                       addr = pmd_addr_end(addr, end);
-                       continue;
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = kvm_pmd_addr_end(addr, end);
+               if (!pmd_none(*pmd)) {
+                       if (kvm_pmd_huge(*pmd)) {
+                               hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                               kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+                       } else {
+                               stage2_flush_ptes(kvm, pmd, addr, next);
+                       }
                }
+       } while (pmd++, addr = next, addr != end);
+}
 
-               if (!kvm_pmd_huge(*pmd)) {
-                       pte = pte_offset_kernel(pmd, addr);
-                       clear_pte_entry(kvm, pte, addr);
-                       next = addr + PAGE_SIZE;
-               }
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+                             phys_addr_t addr, phys_addr_t end)
+{
+       pud_t *pud;
+       phys_addr_t next;
 
-               /*
-                * If the pmd entry is to be cleared, walk back up the ladder
-                */
-               if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
-                       clear_pmd_entry(kvm, pmd, addr);
-                       next = pmd_addr_end(addr, end);
-                       if (page_empty(pmd) && !page_empty(pud)) {
-                               clear_pud_entry(kvm, pud, addr);
-                               next = pud_addr_end(addr, end);
+       pud = pud_offset(pgd, addr);
+       do {
+               next = kvm_pud_addr_end(addr, end);
+               if (!pud_none(*pud)) {
+                       if (pud_huge(*pud)) {
+                               hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                               kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+                       } else {
+                               stage2_flush_pmds(kvm, pud, addr, next);
                        }
                }
+       } while (pud++, addr = next, addr != end);
+}
 
-               addr = next;
-       }
+static void stage2_flush_memslot(struct kvm *kvm,
+                                struct kvm_memory_slot *memslot)
+{
+       phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+       phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+       phys_addr_t next;
+       pgd_t *pgd;
+
+       pgd = kvm->arch.pgd + pgd_index(addr);
+       do {
+               next = kvm_pgd_addr_end(addr, end);
+               stage2_flush_puds(kvm, pgd, addr, next);
+       } while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots)
+               stage2_flush_memslot(kvm, memslot);
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
 }
 
 /**
@@ -199,14 +306,14 @@ void free_boot_hyp_pgd(void)
        if (boot_hyp_pgd) {
                unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
                unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-               kfree(boot_hyp_pgd);
+               free_pages((unsigned long)boot_hyp_pgd, pgd_order);
                boot_hyp_pgd = NULL;
        }
 
        if (hyp_pgd)
                unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-       kfree(init_bounce_page);
+       free_page((unsigned long)init_bounce_page);
        init_bounce_page = NULL;
 
        mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -236,7 +343,7 @@ void free_hyp_pgds(void)
                for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
                        unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
-               kfree(hyp_pgd);
+               free_pages((unsigned long)hyp_pgd, pgd_order);
                hyp_pgd = NULL;
        }
 
@@ -639,21 +746,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
        return false;
 }
 
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vcpu_trap_is_iabt(vcpu))
+               return false;
+
+       return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-                         struct kvm_memory_slot *memslot,
+                         struct kvm_memory_slot *memslot, unsigned long hva,
                          unsigned long fault_status)
 {
        int ret;
        bool write_fault, writable, hugetlb = false, force_pte = false;
        unsigned long mmu_seq;
        gfn_t gfn = fault_ipa >> PAGE_SHIFT;
-       unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
        struct kvm *kvm = vcpu->kvm;
        struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
        struct vm_area_struct *vma;
        pfn_t pfn;
+       pgprot_t mem_type = PAGE_S2;
 
-       write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+       write_fault = kvm_is_write_fault(vcpu);
        if (fault_status == FSC_PERM && !write_fault) {
                kvm_err("Unexpected L2 read permission error\n");
                return -EFAULT;
@@ -702,6 +817,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        if (is_error_pfn(pfn))
                return -EFAULT;
 
+       if (kvm_is_mmio_pfn(pfn))
+               mem_type = PAGE_S2_DEVICE;
+
        spin_lock(&kvm->mmu_lock);
        if (mmu_notifier_retry(kvm, mmu_seq))
                goto out_unlock;
@@ -709,22 +827,23 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
        if (hugetlb) {
-               pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+               pmd_t new_pmd = pfn_pmd(pfn, mem_type);
                new_pmd = pmd_mkhuge(new_pmd);
                if (writable) {
                        kvm_set_s2pmd_writable(&new_pmd);
                        kvm_set_pfn_dirty(pfn);
                }
-               coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+               coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
                ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
        } else {
-               pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+               pte_t new_pte = pfn_pte(pfn, mem_type);
                if (writable) {
                        kvm_set_s2pte_writable(&new_pte);
                        kvm_set_pfn_dirty(pfn);
                }
-               coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
-               ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
+               coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+               ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+                                    mem_type == PAGE_S2_DEVICE);
        }
 
 
@@ -751,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
        unsigned long fault_status;
        phys_addr_t fault_ipa;
        struct kvm_memory_slot *memslot;
-       bool is_iabt;
+       unsigned long hva;
+       bool is_iabt, write_fault, writable;
        gfn_t gfn;
        int ret, idx;
 
@@ -762,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                              kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
        /* Check the stage-2 fault is trans. fault or write fault */
-       fault_status = kvm_vcpu_trap_get_fault(vcpu);
+       fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
        if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-               kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-                       kvm_vcpu_trap_get_class(vcpu), fault_status);
+               kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+                       kvm_vcpu_trap_get_class(vcpu),
+                       (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+                       (unsigned long)kvm_vcpu_get_hsr(vcpu));
                return -EFAULT;
        }
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
        gfn = fault_ipa >> PAGE_SHIFT;
-       if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+       memslot = gfn_to_memslot(vcpu->kvm, gfn);
+       hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+       write_fault = kvm_is_write_fault(vcpu);
+       if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
                if (is_iabt) {
                        /* Prefetch Abort on I/O address */
                        kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -780,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                        goto out_unlock;
                }
 
-               if (fault_status != FSC_FAULT) {
-                       kvm_err("Unsupported fault status on io memory: %#lx\n",
-                               fault_status);
-                       ret = -EFAULT;
-                       goto out_unlock;
-               }
-
                /*
                 * The IPA is reported as [MAX:12], so we need to
                 * complement it with the bottom 12 bits from the
@@ -798,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                goto out_unlock;
        }
 
-       memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-       ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+       ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
        if (ret == 0)
                ret = 1;
 out_unlock:
@@ -930,7 +1046,7 @@ int kvm_mmu_init(void)
                size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
                phys_addr_t phys_base;
 
-               init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
                if (!init_bounce_page) {
                        kvm_err("Couldn't allocate HYP init bounce page\n");
                        err = -ENOMEM;
@@ -956,8 +1072,9 @@ int kvm_mmu_init(void)
                         (unsigned long)phys_base);
        }
 
-       hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
-       boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+       hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+       boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+
        if (!hyp_pgd || !boot_hyp_pgd) {
                kvm_err("Hyp mode PGD not allocated\n");
                err = -ENOMEM;
@@ -1003,3 +1120,49 @@ out:
        free_hyp_pgds();
        return err;
 }
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+                                  struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_memory_slot *old,
+                                  enum kvm_mr_change change)
+{
+       gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = old->npages << PAGE_SHIFT;
+       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+               spin_lock(&kvm->mmu_lock);
+               unmap_stage2_range(kvm, gpa, size);
+               spin_unlock(&kvm->mmu_lock);
+       }
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                  struct kvm_memory_slot *memslot,
+                                  struct kvm_userspace_memory_region *mem,
+                                  enum kvm_mr_change change)
+{
+       return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+                          struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
+{
+       return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+                                  struct kvm_memory_slot *slot)
+{
+}