UPSTREAM: arm64: add macro to extract ESR_ELx.EC
[firefly-linux-kernel-4.4.55.git] / arch / arm64 / mm / mmu.c
index 61a82d68330d00604b347275bf2258db1c194e7f..1cab2703f5a87e7074dbdbe8c932ff2317cf4bfe 100644 (file)
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 
+#include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 
+u64 kimage_voffset __read_mostly;
+EXPORT_SYMBOL(kimage_voffset);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
  */
-struct page *empty_zero_page;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                              unsigned long size, pgprot_t vma_prot)
 {
@@ -62,16 +71,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static void __init *early_alloc(unsigned long sz)
+static phys_addr_t __init early_pgtable_alloc(void)
 {
        phys_addr_t phys;
        void *ptr;
 
-       phys = memblock_alloc(sz, sz);
+       phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
        BUG_ON(!phys);
-       ptr = __va(phys);
-       memset(ptr, 0, sz);
-       return ptr;
+
+       /*
+        * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+        * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+        * any level of table.
+        */
+       ptr = pte_set_fixmap(phys);
+
+       memset(ptr, 0, PAGE_SIZE);
+
+       /*
+        * Implicit barriers also ensure the zeroed page is visible to the page
+        * table walker
+        */
+       pte_clear_fixmap();
+
+       return phys;
 }
 
 /*
@@ -95,24 +118,30 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
                                  unsigned long end, unsigned long pfn,
                                  pgprot_t prot,
-                                 void *(*alloc)(unsigned long size))
+                                 phys_addr_t (*pgtable_alloc)(void))
 {
        pte_t *pte;
 
        if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-               pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+               phys_addr_t pte_phys;
+               BUG_ON(!pgtable_alloc);
+               pte_phys = pgtable_alloc();
+               pte = pte_set_fixmap(pte_phys);
                if (pmd_sect(*pmd))
                        split_pmd(pmd, pte);
-               __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
                flush_tlb_all();
+               pte_clear_fixmap();
        }
        BUG_ON(pmd_bad(*pmd));
 
-       pte = pte_offset_kernel(pmd, addr);
+       pte = pte_set_fixmap_offset(pmd, addr);
        do {
                set_pte(pte, pfn_pte(pfn, prot));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
+
+       pte_clear_fixmap();
 }
 
 static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -127,10 +156,29 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
        } while (pmd++, i++, i < PTRS_PER_PMD);
 }
 
-static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
-                                 unsigned long addr, unsigned long end,
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+
+       /*
+        * If debug_page_alloc is enabled we must map the linear map
+        * using pages. However, other mappings created by
+        * create_mapping_noalloc must use sections in some cases. Allow
+        * sections to be used in those cases, where no pgtable_alloc
+        * function is provided.
+        */
+       return !pgtable_alloc || !debug_pagealloc_enabled();
+}
+#else
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+       return true;
+}
+#endif
+
+static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
                                  phys_addr_t phys, pgprot_t prot,
-                                 void *(*alloc)(unsigned long size))
+                                 phys_addr_t (*pgtable_alloc)(void))
 {
        pmd_t *pmd;
        unsigned long next;
@@ -139,7 +187,10 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
         * Check for initial section mappings in the pgd/pud and remove them.
         */
        if (pud_none(*pud) || pud_sect(*pud)) {
-               pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+               phys_addr_t pmd_phys;
+               BUG_ON(!pgtable_alloc);
+               pmd_phys = pgtable_alloc();
+               pmd = pmd_set_fixmap(pmd_phys);
                if (pud_sect(*pud)) {
                        /*
                         * need to have the 1G of mappings continue to be
@@ -147,19 +198,20 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
                         */
                        split_pud(pud, pmd);
                }
-               pud_populate(mm, pud, pmd);
+               __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
                flush_tlb_all();
+               pmd_clear_fixmap();
        }
        BUG_ON(pud_bad(*pud));
 
-       pmd = pmd_offset(pud, addr);
+       pmd = pmd_set_fixmap_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
                /* try section mapping first */
-               if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+               if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+                     block_mappings_allowed(pgtable_alloc)) {
                        pmd_t old_pmd =*pmd;
-                       set_pmd(pmd, __pmd(phys |
-                                          pgprot_val(mk_sect_prot(prot))));
+                       pmd_set_huge(pmd, phys, prot);
                        /*
                         * Check for previous table entries created during
                         * boot (__create_page_tables) and flush them.
@@ -167,17 +219,19 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
                        if (!pmd_none(old_pmd)) {
                                flush_tlb_all();
                                if (pmd_table(old_pmd)) {
-                                       phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+                                       phys_addr_t table = pmd_page_paddr(old_pmd);
                                        if (!WARN_ON_ONCE(slab_is_available()))
                                                memblock_free(table, PAGE_SIZE);
                                }
                        }
                } else {
                        alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-                                      prot, alloc);
+                                      prot, pgtable_alloc);
                }
                phys += next - addr;
        } while (pmd++, addr = next, addr != end);
+
+       pmd_clear_fixmap();
 }
 
 static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -192,31 +246,32 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
        return true;
 }
 
-static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
-                                 unsigned long addr, unsigned long end,
+static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
                                  phys_addr_t phys, pgprot_t prot,
-                                 void *(*alloc)(unsigned long size))
+                                 phys_addr_t (*pgtable_alloc)(void))
 {
        pud_t *pud;
        unsigned long next;
 
        if (pgd_none(*pgd)) {
-               pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
-               pgd_populate(mm, pgd, pud);
+               phys_addr_t pud_phys;
+               BUG_ON(!pgtable_alloc);
+               pud_phys = pgtable_alloc();
+               __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
        }
        BUG_ON(pgd_bad(*pgd));
 
-       pud = pud_offset(pgd, addr);
+       pud = pud_set_fixmap_offset(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
 
                /*
                 * For 4K granule only, attempt to put down a 1GB block
                 */
-               if (use_1G_block(addr, next, phys)) {
+               if (use_1G_block(addr, next, phys) &&
+                   block_mappings_allowed(pgtable_alloc)) {
                        pud_t old_pud = *pud;
-                       set_pud(pud, __pud(phys |
-                                          pgprot_val(mk_sect_prot(prot))));
+                       pud_set_huge(pud, phys, prot);
 
                        /*
                         * If we have an old value for a pud, it will
@@ -228,26 +283,28 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
                        if (!pud_none(old_pud)) {
                                flush_tlb_all();
                                if (pud_table(old_pud)) {
-                                       phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+                                       phys_addr_t table = pud_page_paddr(old_pud);
                                        if (!WARN_ON_ONCE(slab_is_available()))
                                                memblock_free(table, PAGE_SIZE);
                                }
                        }
                } else {
-                       alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+                       alloc_init_pmd(pud, addr, next, phys, prot,
+                                      pgtable_alloc);
                }
                phys += next - addr;
        } while (pud++, addr = next, addr != end);
+
+       pud_clear_fixmap();
 }
 
 /*
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
-                                   phys_addr_t phys, unsigned long virt,
+static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
                                    phys_addr_t size, pgprot_t prot,
-                                   void *(*alloc)(unsigned long size))
+                                   phys_addr_t (*pgtable_alloc)(void))
 {
        unsigned long addr, length, end, next;
 
@@ -258,28 +315,42 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
        if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
                return;
 
+       phys &= PAGE_MASK;
        addr = virt & PAGE_MASK;
        length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 
        end = addr + length;
        do {
                next = pgd_addr_end(addr, end);
-               alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+               alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
                phys += next - addr;
        } while (pgd++, addr = next, addr != end);
 }
 
-static void *late_alloc(unsigned long size)
+static phys_addr_t late_pgtable_alloc(void)
 {
-       void *ptr;
-
-       BUG_ON(size > PAGE_SIZE);
-       ptr = (void *)__get_free_page(PGALLOC_GFP);
+       void *ptr = (void *)__get_free_page(PGALLOC_GFP);
        BUG_ON(!ptr);
-       return ptr;
+
+       /* Ensure the zeroed page is visible to the page table walker */
+       dsb(ishst);
+       return __pa(ptr);
+}
+
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+                                unsigned long virt, phys_addr_t size,
+                                pgprot_t prot,
+                                phys_addr_t (*alloc)(void))
+{
+       init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
 }
 
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+/*
+ * This function can only be used to modify existing table entries,
+ * without allocating new levels of table. Note that this permits the
+ * creation of new section or page entries.
+ */
+static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
                                  phys_addr_t size, pgprot_t prot)
 {
        if (virt < VMALLOC_START) {
@@ -287,16 +358,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
                        &phys, virt);
                return;
        }
-       __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
-                        size, prot, early_alloc);
+       __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+                            NULL);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
                               unsigned long virt, phys_addr_t size,
                               pgprot_t prot)
 {
-       __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
-                               late_alloc);
+       __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+                            late_pgtable_alloc);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -308,69 +379,57 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
                return;
        }
 
-       return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
-                               phys, virt, size, prot, late_alloc);
+       __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+                            late_pgtable_alloc);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
+       unsigned long kernel_start = __pa(_text);
+       unsigned long kernel_end = __pa(__init_begin);
+
        /*
-        * Set up the executable regions using the existing section mappings
-        * for now. This will get more fine grained later once all memory
-        * is mapped
+        * Take care not to create a writable alias for the
+        * read-only text and rodata sections of the kernel image.
         */
-       unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
-       unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
-
-       if (end < kernel_x_start) {
-               create_mapping(start, __phys_to_virt(start),
-                       end - start, PAGE_KERNEL);
-       } else if (start >= kernel_x_end) {
-               create_mapping(start, __phys_to_virt(start),
-                       end - start, PAGE_KERNEL);
-       } else {
-               if (start < kernel_x_start)
-                       create_mapping(start, __phys_to_virt(start),
-                               kernel_x_start - start,
-                               PAGE_KERNEL);
-               create_mapping(kernel_x_start,
-                               __phys_to_virt(kernel_x_start),
-                               kernel_x_end - kernel_x_start,
-                               PAGE_KERNEL_EXEC);
-               if (kernel_x_end < end)
-                       create_mapping(kernel_x_end,
-                               __phys_to_virt(kernel_x_end),
-                               end - kernel_x_end,
-                               PAGE_KERNEL);
+
+       /* No overlap with the kernel text/rodata */
+       if (end < kernel_start || start >= kernel_end) {
+               __create_pgd_mapping(pgd, start, __phys_to_virt(start),
+                                    end - start, PAGE_KERNEL,
+                                    early_pgtable_alloc);
+               return;
        }
 
+       /*
+        * This block overlaps the kernel text/rodata mappings.
+        * Map the portion(s) which don't overlap.
+        */
+       if (start < kernel_start)
+               __create_pgd_mapping(pgd, start,
+                                    __phys_to_virt(start),
+                                    kernel_start - start, PAGE_KERNEL,
+                                    early_pgtable_alloc);
+       if (kernel_end < end)
+               __create_pgd_mapping(pgd, kernel_end,
+                                    __phys_to_virt(kernel_end),
+                                    end - kernel_end, PAGE_KERNEL,
+                                    early_pgtable_alloc);
+
+       /*
+        * Map the linear alias of the [_text, __init_begin) interval as
+        * read-only/non-executable. This makes the contents of the
+        * region accessible to subsystems such as hibernate, but
+        * protects it from inadvertent modification or execution.
+        */
+       __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
+                            kernel_end - kernel_start, PAGE_KERNEL_RO,
+                            early_pgtable_alloc);
 }
-#else
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
-{
-       create_mapping(start, __phys_to_virt(start), end - start,
-                       PAGE_KERNEL_EXEC);
-}
-#endif
 
-static void __init map_mem(void)
+static void __init map_mem(pgd_t *pgd)
 {
        struct memblock_region *reg;
-       phys_addr_t limit;
-
-       /*
-        * Temporarily limit the memblock range. We need to do this as
-        * create_mapping requires puds, pmds and ptes to be allocated from
-        * memory addressable from the initial direct kernel mapping.
-        *
-        * The initial direct kernel mapping, located at swapper_pg_dir, gives
-        * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
-        * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
-        * per Documentation/arm64/booting.txt).
-        */
-       limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
-       memblock_set_current_limit(limit);
 
        /* map all the memory banks */
        for_each_memblock(memory, reg) {
@@ -379,70 +438,97 @@ static void __init map_mem(void)
 
                if (start >= end)
                        break;
+               if (memblock_is_nomap(reg))
+                       continue;
 
-               if (ARM64_SWAPPER_USES_SECTION_MAPS) {
-                       /*
-                        * For the first memory bank align the start address and
-                        * current memblock limit to prevent create_mapping() from
-                        * allocating pte page tables from unmapped memory. With
-                        * the section maps, if the first block doesn't end on section
-                        * size boundary, create_mapping() will try to allocate a pte
-                        * page, which may be returned from an unmapped area.
-                        * When section maps are not used, the pte page table for the
-                        * current limit is already present in swapper_pg_dir.
-                        */
-                       if (start < limit)
-                               start = ALIGN(start, SECTION_SIZE);
-                       if (end < limit) {
-                               limit = end & SECTION_MASK;
-                               memblock_set_current_limit(limit);
-                       }
-               }
-               __map_memblock(start, end);
+               __map_memblock(pgd, start, end);
        }
-
-       /* Limit no longer required. */
-       memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
-static void __init fixup_executable(void)
+void mark_rodata_ro(void)
 {
-#ifdef CONFIG_DEBUG_RODATA
-       /* now that we are actually fully mapped, make the start/end more fine grained */
-       if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
-               unsigned long aligned_start = round_down(__pa(_stext),
-                                                        SWAPPER_BLOCK_SIZE);
+       unsigned long section_size;
 
-               create_mapping(aligned_start, __phys_to_virt(aligned_start),
-                               __pa(_stext) - aligned_start,
-                               PAGE_KERNEL);
-       }
+       section_size = (unsigned long)_etext - (unsigned long)_text;
+       create_mapping_late(__pa(_text), (unsigned long)_text,
+                           section_size, PAGE_KERNEL_ROX);
+       /*
+        * mark .rodata as read only. Use __init_begin rather than __end_rodata
+        * to cover NOTES and EXCEPTION_TABLE.
+        */
+       section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
+       create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
+                           section_size, PAGE_KERNEL_RO);
+}
 
-       if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
-               unsigned long aligned_end = round_up(__pa(__init_end),
-                                                         SWAPPER_BLOCK_SIZE);
-               create_mapping(__pa(__init_end), (unsigned long)__init_end,
-                               aligned_end - __pa(__init_end),
-                               PAGE_KERNEL);
-       }
-#endif
+void fixup_init(void)
+{
+       /*
+        * Unmap the __init region but leave the VM area in place. This
+        * prevents the region from being reused for kernel modules, which
+        * is not supported by kallsyms.
+        */
+       unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void)
+static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+                                     pgprot_t prot, struct vm_struct *vma)
 {
-       create_mapping_late(__pa(_stext), (unsigned long)_stext,
-                               (unsigned long)_etext - (unsigned long)_stext,
-                               PAGE_KERNEL_ROX);
+       phys_addr_t pa_start = __pa(va_start);
+       unsigned long size = va_end - va_start;
+
+       BUG_ON(!PAGE_ALIGNED(pa_start));
+       BUG_ON(!PAGE_ALIGNED(size));
+
+       __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+                            early_pgtable_alloc);
 
+       vma->addr       = va_start;
+       vma->phys_addr  = pa_start;
+       vma->size       = size;
+       vma->flags      = VM_MAP;
+       vma->caller     = __builtin_return_address(0);
+
+       vm_area_add_early(vma);
 }
-#endif
 
-void fixup_init(void)
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgd)
 {
-       create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
-                       (unsigned long)__init_end - (unsigned long)__init_begin,
-                       PAGE_KERNEL);
+       static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
+
+       map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
+       map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
+       map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+                          &vmlinux_init);
+       map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+
+       if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+               /*
+                * The fixmap falls in a separate pgd to the kernel, and doesn't
+                * live in the carveout for the swapper_pg_dir. We can simply
+                * re-use the existing dir for the fixmap.
+                */
+               set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+                       *pgd_offset_k(FIXADDR_START));
+       } else if (CONFIG_PGTABLE_LEVELS > 3) {
+               /*
+                * The fixmap shares its top level pgd entry with the kernel
+                * mapping. This can really only occur when we are running
+                * with 16k/4 levels, so we can simply reuse the pud level
+                * entry instead.
+                */
+               BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+               set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+                       __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+               pud_clear_fixmap();
+       } else {
+               BUG();
+       }
+
+       kasan_copy_shadow(pgd);
 }
 
 /*
@@ -451,28 +537,35 @@ void fixup_init(void)
  */
 void __init paging_init(void)
 {
-       void *zero_page;
-
-       map_mem();
-       fixup_executable();
+       phys_addr_t pgd_phys = early_pgtable_alloc();
+       pgd_t *pgd = pgd_set_fixmap(pgd_phys);
 
-       /* allocate the zero page. */
-       zero_page = early_alloc(PAGE_SIZE);
+       map_kernel(pgd);
+       map_mem(pgd);
 
-       bootmem_init();
+       /*
+        * We want to reuse the original swapper_pg_dir so we don't have to
+        * communicate the new address to non-coherent secondaries in
+        * secondary_entry, and so cpu_switch_mm can generate the address with
+        * adrp+add rather than a load from some global variable.
+        *
+        * To do this we need to go via a temporary pgd.
+        */
+       cpu_replace_ttbr1(__va(pgd_phys));
+       memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+       cpu_replace_ttbr1(swapper_pg_dir);
 
-       empty_zero_page = virt_to_page(zero_page);
-
-       /* Ensure the zero page is visible to the page table walker */
-       dsb(ishst);
+       pgd_clear_fixmap();
+       memblock_free(pgd_phys, PAGE_SIZE);
 
        /*
-        * TTBR0 is only used for the identity mapping at this stage. Make it
-        * point to zero page to avoid speculatively fetching new entries.
+        * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
+        * allocated with it.
         */
-       cpu_set_reserved_ttbr0();
-       local_flush_tlb_all();
-       cpu_set_default_tcr_t0sz();
+       memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+                     SWAPPER_DIR_SIZE - PAGE_SIZE);
+
+       bootmem_init();
 }
 
 /*
@@ -559,21 +652,13 @@ void vmemmap_free(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
        pgd_t *pgd = pgd_offset_k(addr);
 
        BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
 
-       return pud_offset(pgd, addr);
+       return pud_offset_kimg(pgd, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
@@ -582,16 +667,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
 
        BUG_ON(pud_none(*pud) || pud_bad(*pud));
 
-       return pmd_offset(pud, addr);
+       return pmd_offset_kimg(pud, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
 {
-       pmd_t *pmd = fixmap_pmd(addr);
-
-       BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-       return pte_offset_kernel(pmd, addr);
+       return &bm_pte[pte_index(addr)];
 }
 
 void __init early_fixmap_init(void)
@@ -602,15 +683,26 @@ void __init early_fixmap_init(void)
        unsigned long addr = FIXADDR_START;
 
        pgd = pgd_offset_k(addr);
-       pgd_populate(&init_mm, pgd, bm_pud);
-       pud = pud_offset(pgd, addr);
+       if (CONFIG_PGTABLE_LEVELS > 3 &&
+           !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
+               /*
+                * We only end up here if the kernel mapping and the fixmap
+                * share the top level pgd entry, which should only happen on
+                * 16k/4 levels configurations.
+                */
+               BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+               pud = pud_offset_kimg(pgd, addr);
+       } else {
+               pgd_populate(&init_mm, pgd, bm_pud);
+               pud = fixmap_pud(addr);
+       }
        pud_populate(&init_mm, pud, bm_pmd);
-       pmd = pmd_offset(pud, addr);
+       pmd = fixmap_pmd(addr);
        pmd_populate_kernel(&init_mm, pmd, bm_pte);
 
        /*
         * The boot-ioremap range spans multiple pmds, for which
-        * we are not preparted:
+        * we are not prepared:
         */
        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
@@ -649,19 +741,18 @@ void __set_fixmap(enum fixed_addresses idx,
        }
 }
 
-void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 {
        const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
-       pgprot_t prot = PAGE_KERNEL_RO;
-       int size, offset;
+       int offset;
        void *dt_virt;
 
        /*
         * Check whether the physical FDT address is set and meets the minimum
         * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
-        * at least 8 bytes so that we can always access the size field of the
-        * FDT header after mapping the first chunk, double check here if that
-        * is indeed the case.
+        * at least 8 bytes so that we can always access the magic and size
+        * fields of the FDT header after mapping the first chunk, double check
+        * here if that is indeed the case.
         */
        BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
        if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
@@ -670,7 +761,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
        /*
         * Make sure that the FDT region can be mapped without the need to
         * allocate additional translation table pages, so that it is safe
-        * to call create_mapping() this early.
+        * to call create_mapping_noalloc() this early.
         *
         * On 64k pages, the FDT will be mapped using PTEs, so we need to
         * be in the same PMD as the rest of the fixmap.
@@ -686,21 +777,73 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
        dt_virt = (void *)dt_virt_base + offset;
 
        /* map the first chunk so we can read the size from the header */
-       create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-                      SWAPPER_BLOCK_SIZE, prot);
+       create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+                       dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
 
-       if (fdt_check_header(dt_virt) != 0)
+       if (fdt_magic(dt_virt) != FDT_MAGIC)
                return NULL;
 
-       size = fdt_totalsize(dt_virt);
-       if (size > MAX_FDT_SIZE)
+       *size = fdt_totalsize(dt_virt);
+       if (*size > MAX_FDT_SIZE)
                return NULL;
 
-       if (offset + size > SWAPPER_BLOCK_SIZE)
-               create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-                              round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+       if (offset + *size > SWAPPER_BLOCK_SIZE)
+               create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+                              round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
 
-       memblock_reserve(dt_phys, size);
+       return dt_virt;
+}
 
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+       void *dt_virt;
+       int size;
+
+       dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
+       if (!dt_virt)
+               return NULL;
+
+       memblock_reserve(dt_phys, size);
        return dt_virt;
 }
+
+int __init arch_ioremap_pud_supported(void)
+{
+       /* only 4k granule supports level 1 block mappings */
+       return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+       return 1;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+       BUG_ON(phys & ~PUD_MASK);
+       set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+       return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+       BUG_ON(phys & ~PMD_MASK);
+       set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+       return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+       if (!pud_sect(*pud))
+               return 0;
+       pud_clear(pud);
+       return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+       if (!pmd_sect(*pmd))
+               return 0;
+       pmd_clear(pmd);
+       return 1;
+}