Merge remote-tracking branch 'lsk/v3.10/topic/gator' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / arch / arm64 / kernel / head.S
index 999504b50c30a9aca6aa2ed4c14ac742929938ae..f1d3f693cac665d221b5eaff32fe67e56e3a29cb 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/cache.h>
 #include <asm/cputype.h>
 #include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/virt.h>
 
-/*
- * swapper_pg_dir is the virtual address of the initial page table. We place
- * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has
- * 2 pages and is placed below swapper_pg_dir.
- */
 #define KERNEL_RAM_VADDR       (PAGE_OFFSET + TEXT_OFFSET)
 
 #if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000
 #error KERNEL_RAM_VADDR must start at 0xXXX80000
 #endif
 
-#define SWAPPER_DIR_SIZE       (3 * PAGE_SIZE)
-#define IDMAP_DIR_SIZE         (2 * PAGE_SIZE)
-
-       .globl  swapper_pg_dir
-       .equ    swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE
-
-       .globl  idmap_pg_dir
-       .equ    idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE
-
-       .macro  pgtbl, ttb0, ttb1, phys
-       add     \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE
-       sub     \ttb0, \ttb1, #IDMAP_DIR_SIZE
+       .macro  pgtbl, ttb0, ttb1, virt_to_phys
+       ldr     \ttb1, =swapper_pg_dir
+       ldr     \ttb0, =idmap_pg_dir
+       add     \ttb1, \ttb1, \virt_to_phys
+       add     \ttb0, \ttb0, \virt_to_phys
        .endm
 
 #ifdef CONFIG_ARM64_64K_PAGES
        /*
         * DO NOT MODIFY. Image header expected by Linux boot-loaders.
         */
+#ifdef CONFIG_EFI
+efi_head:
+       /*
+        * This add instruction has no meaningful effect except that
+        * its opcode forms the magic "MZ" signature required by UEFI.
+        */
+       add     x13, x18, #0x16
+       b       stext
+#else
        b       stext                           // branch to kernel start, magic
        .long   0                               // reserved
+#endif
        .quad   TEXT_OFFSET                     // Image load offset from start of RAM
        .quad   0                               // reserved
        .quad   0                               // reserved
+       .quad   0                               // reserved
+       .quad   0                               // reserved
+       .quad   0                               // reserved
+       .byte   0x41                            // Magic number, "ARM\x64"
+       .byte   0x52
+       .byte   0x4d
+       .byte   0x64
+#ifdef CONFIG_EFI
+       .long   pe_header - efi_head            // Offset to the PE header.
+#else
+       .word   0                               // reserved
+#endif
+
+#ifdef CONFIG_EFI
+       .align 3
+pe_header:
+       .ascii  "PE"
+       .short  0
+coff_header:
+       .short  0xaa64                          // AArch64
+       .short  2                               // nr_sections
+       .long   0                               // TimeDateStamp
+       .long   0                               // PointerToSymbolTable
+       .long   1                               // NumberOfSymbols
+       .short  section_table - optional_header // SizeOfOptionalHeader
+       .short  0x206                           // Characteristics.
+                                               // IMAGE_FILE_DEBUG_STRIPPED |
+                                               // IMAGE_FILE_EXECUTABLE_IMAGE |
+                                               // IMAGE_FILE_LINE_NUMS_STRIPPED
+optional_header:
+       .short  0x20b                           // PE32+ format
+       .byte   0x02                            // MajorLinkerVersion
+       .byte   0x14                            // MinorLinkerVersion
+       .long   _edata - stext                  // SizeOfCode
+       .long   0                               // SizeOfInitializedData
+       .long   0                               // SizeOfUninitializedData
+       .long   efi_stub_entry - efi_head       // AddressOfEntryPoint
+       .long   stext - efi_head                // BaseOfCode
+
+extra_header_fields:
+       .quad   0                               // ImageBase
+       .long   0x20                            // SectionAlignment
+       .long   0x8                             // FileAlignment
+       .short  0                               // MajorOperatingSystemVersion
+       .short  0                               // MinorOperatingSystemVersion
+       .short  0                               // MajorImageVersion
+       .short  0                               // MinorImageVersion
+       .short  0                               // MajorSubsystemVersion
+       .short  0                               // MinorSubsystemVersion
+       .long   0                               // Win32VersionValue
+
+       .long   _edata - efi_head               // SizeOfImage
+
+       // Everything before the kernel image is considered part of the header
+       .long   stext - efi_head                // SizeOfHeaders
+       .long   0                               // CheckSum
+       .short  0xa                             // Subsystem (EFI application)
+       .short  0                               // DllCharacteristics
+       .quad   0                               // SizeOfStackReserve
+       .quad   0                               // SizeOfStackCommit
+       .quad   0                               // SizeOfHeapReserve
+       .quad   0                               // SizeOfHeapCommit
+       .long   0                               // LoaderFlags
+       .long   0x6                             // NumberOfRvaAndSizes
+
+       .quad   0                               // ExportTable
+       .quad   0                               // ImportTable
+       .quad   0                               // ResourceTable
+       .quad   0                               // ExceptionTable
+       .quad   0                               // CertificationTable
+       .quad   0                               // BaseRelocationTable
+
+       // Section table
+section_table:
+
+       /*
+        * The EFI application loader requires a relocation section
+        * because EFI applications must be relocatable.  This is a
+        * dummy section as far as we are concerned.
+        */
+       .ascii  ".reloc"
+       .byte   0
+       .byte   0                       // end of 0 padding of section name
+       .long   0
+       .long   0
+       .long   0                       // SizeOfRawData
+       .long   0                       // PointerToRawData
+       .long   0                       // PointerToRelocations
+       .long   0                       // PointerToLineNumbers
+       .short  0                       // NumberOfRelocations
+       .short  0                       // NumberOfLineNumbers
+       .long   0x42100040              // Characteristics (section flags)
+
+
+       .ascii  ".text"
+       .byte   0
+       .byte   0
+       .byte   0                       // end of 0 padding of section name
+       .long   _edata - stext          // VirtualSize
+       .long   stext - efi_head        // VirtualAddress
+       .long   _edata - stext          // SizeOfRawData
+       .long   stext - efi_head        // PointerToRawData
+
+       .long   0               // PointerToRelocations (0 for executables)
+       .long   0               // PointerToLineNumbers (0 for executables)
+       .short  0               // NumberOfRelocations  (0 for executables)
+       .short  0               // NumberOfLineNumbers  (0 for executables)
+       .long   0xe0500020      // Characteristics (section flags)
+       .align 5
+#endif
 
 ENTRY(stext)
        mov     x21, x0                         // x21=FDT
@@ -232,8 +341,9 @@ ENDPROC(set_cpu_boot_mode_flag)
  * This is not in .bss, because we set it sufficiently early that the boot-time
  * zeroing of .bss would clobber it.
  */
-       .pushsection    .data
+       .pushsection    .data..cacheline_aligned
 ENTRY(__boot_cpu_mode)
+       .align  L1_CACHE_SHIFT
        .long   BOOT_CPU_MODE_EL2
        .long   0
        .popsection
@@ -289,7 +399,7 @@ ENTRY(secondary_startup)
        mov     x23, x0                         // x23=current cpu_table
        cbz     x23, __error_p                  // invalid processor (x23=0)?
 
-       pgtbl   x25, x26, x24                   // x25=TTBR0, x26=TTBR1
+       pgtbl   x25, x26, x28                   // x25=TTBR0, x26=TTBR1
        ldr     x12, [x23, #CPU_INFO_SETUP]
        add     x12, x12, x28                   // __virt_to_phys
        blr     x12                             // initialise processor
@@ -331,8 +441,13 @@ ENDPROC(__enable_mmu)
  *  x27 = *virtual* address to jump to upon completion
  *
  * other registers depend on the function called upon completion
+ *
+ * We align the entire function to the smallest power of two larger than it to
+ * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
+ * close to the end of a 512MB or 1GB block we might require an additional
+ * table to map the entire function.
  */
-       .align  6
+       .align  4
 __turn_mmu_on:
        msr     sctlr_el1, x0
        isb
@@ -375,26 +490,18 @@ ENDPROC(__calc_phys_offset)
  * Preserves:  tbl, flags
  * Corrupts:   phys, start, end, pstate
  */
-       .macro  create_block_map, tbl, flags, phys, start, end, idmap=0
+       .macro  create_block_map, tbl, flags, phys, start, end
        lsr     \phys, \phys, #BLOCK_SHIFT
-       .if     \idmap
-       and     \start, \phys, #PTRS_PER_PTE - 1        // table index
-       .else
        lsr     \start, \start, #BLOCK_SHIFT
        and     \start, \start, #PTRS_PER_PTE - 1       // table index
-       .endif
        orr     \phys, \flags, \phys, lsl #BLOCK_SHIFT  // table entry
-       .ifnc   \start,\end
        lsr     \end, \end, #BLOCK_SHIFT
        and     \end, \end, #PTRS_PER_PTE - 1           // table end index
-       .endif
 9999:  str     \phys, [\tbl, \start, lsl #3]           // store the entry
-       .ifnc   \start,\end
        add     \start, \start, #1                      // next entry
        add     \phys, \phys, #BLOCK_SIZE               // next block
        cmp     \start, \end
        b.ls    9999b
-       .endif
        .endm
 
 /*
@@ -403,10 +510,19 @@ ENDPROC(__calc_phys_offset)
  *   - identity mapping to enable the MMU (low address, TTBR0)
  *   - first few MB of the kernel linear mapping to jump to once the MMU has
  *     been enabled, including the FDT blob (TTBR1)
- *   - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1)
+ *   - pgd entry for fixed mappings (TTBR1)
  */
 __create_page_tables:
-       pgtbl   x25, x26, x24                   // idmap_pg_dir and swapper_pg_dir addresses
+       pgtbl   x25, x26, x28                   // idmap_pg_dir and swapper_pg_dir addresses
+       mov     x27, lr
+
+       /*
+        * Invalidate the idmap and swapper page tables to avoid potential
+        * dirty cache lines being evicted.
+        */
+       mov     x0, x25
+       add     x1, x26, #SWAPPER_DIR_SIZE
+       bl      __inval_cache_range
 
        /*
         * Clear the idmap and swapper page tables.
@@ -426,9 +542,13 @@ __create_page_tables:
         * Create the identity mapping.
         */
        add     x0, x25, #PAGE_SIZE             // section table address
-       adr     x3, __turn_mmu_on               // virtual/physical address
+       ldr     x3, =KERNEL_START
+       add     x3, x3, x28                     // __pa(KERNEL_START)
        create_pgd_entry x25, x0, x3, x5, x6
-       create_block_map x0, x7, x3, x5, x5, idmap=1
+       ldr     x6, =KERNEL_END
+       mov     x5, x3                          // __pa(KERNEL_START)
+       add     x6, x6, x28                     // __pa(KERNEL_END)
+       create_block_map x0, x7, x3, x5, x6
 
        /*
         * Map the kernel image (starting with PHYS_OFFSET).
@@ -436,7 +556,7 @@ __create_page_tables:
        add     x0, x26, #PAGE_SIZE             // section table address
        mov     x5, #PAGE_OFFSET
        create_pgd_entry x26, x0, x5, x3, x6
-       ldr     x6, =KERNEL_END - 1
+       ldr     x6, =KERNEL_END
        mov     x3, x24                         // phys offset
        create_block_map x0, x7, x3, x5, x6
 
@@ -456,15 +576,23 @@ __create_page_tables:
        sub     x6, x6, #1                      // inclusive range
        create_block_map x0, x7, x3, x5, x6
 1:
-#ifdef CONFIG_EARLY_PRINTK
        /*
-        * Create the pgd entry for the UART mapping. The full mapping is done
-        * later based earlyprintk kernel parameter.
+        * Create the pgd entry for the fixed mappings.
         */
-       ldr     x5, =EARLYCON_IOBASE            // UART virtual address
+       ldr     x5, =FIXADDR_TOP                // Fixed mapping virtual address
        add     x0, x26, #2 * PAGE_SIZE         // section table address
        create_pgd_entry x26, x0, x5, x6, x7
-#endif
+
+       /*
+        * Since the page tables have been populated with non-cacheable
+        * accesses (MMU disabled), invalidate the idmap and swapper page
+        * tables again to remove any speculatively loaded cache lines.
+        */
+       mov     x0, x25
+       add     x1, x26, #SWAPPER_DIR_SIZE
+       bl      __inval_cache_range
+
+       mov     lr, x27
        ret
 ENDPROC(__create_page_tables)
        .ltorg
@@ -473,10 +601,8 @@ ENDPROC(__create_page_tables)
        .type   __switch_data, %object
 __switch_data:
        .quad   __mmap_switched
-       .quad   __data_loc                      // x4
-       .quad   _data                           // x5
        .quad   __bss_start                     // x6
-       .quad   _end                            // x7
+       .quad   __bss_stop                      // x7
        .quad   processor_id                    // x4
        .quad   __fdt_pointer                   // x5
        .quad   memstart_addr                   // x6
@@ -489,15 +615,7 @@ __switch_data:
 __mmap_switched:
        adr     x3, __switch_data + 8
 
-       ldp     x4, x5, [x3], #16
        ldp     x6, x7, [x3], #16
-       cmp     x4, x5                          // Copy data segment if needed
-1:     ccmp    x5, x6, #4, ne
-       b.eq    2f
-       ldr     x16, [x4], #8
-       str     x16, [x5], #8
-       b       1b
-2:
 1:     cmp     x6, x7
        b.hs    2f
        str     xzr, [x6], #8                   // Clear BSS