config PHYS_OFFSET
hex "Physical address of main memory" if MMU
- depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
+ depends on !ARM_PATCH_PHYS_VIRT
default DRAM_BASE if !MMU
+ default 0x00000000 if ARCH_EBSA110 || \
+ EP93XX_SDCE3_SYNC_PHYS_OFFSET || \
+ ARCH_FOOTBRIDGE || \
+ ARCH_INTEGRATOR || \
+ ARCH_IOP13XX || \
+ ARCH_KS8695 || \
+ (ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET)
+ default 0x10000000 if ARCH_OMAP1 || ARCH_RPC
+ default 0x20000000 if ARCH_S5PV210
+ default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET
+ default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100
+ default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET
+ default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET
+ default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET
help
Please provide the physical address corresponding to the
location of main memory in your system.
select ARM_VIC
select CLKDEV_LOOKUP
select CPU_ARM920T
- select NEED_MACH_MEMORY_H
help
This enables support for the Cirrus EP93xx series of CPUs.
#Default value
head-y := arch/arm/kernel/head$(MMUEXT).o
+
+# Text offset. This list is sorted numerically by address in order to
+# provide a means to avoid/resolve conflicts in multi-arch kernels.
textofs-y := 0x00008000
textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
# We don't want the htc bootloader to corrupt kernel during resume
ZBSSADDR := ALIGN(8)
endif
-SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
+CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
suffix_$(CONFIG_KERNEL_GZIP) = gzip
suffix_$(CONFIG_KERNEL_LZO) = lzo
$(obj)/font.c: $(FONTC)
$(call cmd,shipped)
-$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
- @sed "$(SEDFLAGS)" < $< > $@
-
$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
$(call cmd,shipped)
THUMB( adr r12, BSYM(1f) )
THUMB( bx r12 )
- .word 0x016f2818 @ Magic numbers to help the loader
- .word start @ absolute load/run zImage address
- .word _edata @ zImage end address
+ .word _magic_sig @ Magic numbers to help the loader
+ .word _magic_start @ absolute load/run zImage address
+ .word _magic_end @ zImage end address
+ .word 0x04030201 @ endianness flag
+
THUMB( .thumb )
1:
ARM_BE8( setend be ) @ go BE8 if compiled for BE8
--- /dev/null
+/*
+ * Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
+ (((x) >> 8) & 0x0000ff00) | \
+ (((x) << 8) & 0x00ff0000) | \
+ (((x) << 24) & 0xff000000) )
+#else
+#define ZIMAGE_MAGIC(x) (x)
+#endif
+
+OUTPUT_ARCH(arm)
+ENTRY(_start)
+SECTIONS
+{
+ /DISCARD/ : {
+ *(.ARM.exidx*)
+ *(.ARM.extab*)
+ /*
+ * Discard any r/w data - this produces a link error if we have any,
+ * which is required for PIC decompression. Local data generates
+ * GOTOFF relocations, which prevents it being relocated independently
+ * of the text/got segments.
+ */
+ *(.data)
+ }
+
+ . = TEXT_START;
+ _text = .;
+
+ .text : {
+ _start = .;
+ *(.start)
+ *(.text)
+ *(.text.*)
+ *(.fixup)
+ *(.gnu.warning)
+ *(.glue_7t)
+ *(.glue_7)
+ }
+ .rodata : {
+ *(.rodata)
+ *(.rodata.*)
+ }
+ .piggydata : {
+ *(.piggydata)
+ }
+
+ . = ALIGN(4);
+ _etext = .;
+
+ .got.plt : { *(.got.plt) }
+ _got_start = .;
+ .got : { *(.got) }
+ _got_end = .;
+
+ /* ensure the zImage file size is always a multiple of 64 bits */
+ /* (without a dummy byte, ld just ignores the empty section) */
+ .pad : { BYTE(0); . = ALIGN(8); }
+ _edata = .;
+
+ _magic_sig = ZIMAGE_MAGIC(0x016f2818);
+ _magic_start = ZIMAGE_MAGIC(_start);
+ _magic_end = ZIMAGE_MAGIC(_edata);
+
+ . = BSS_START;
+ __bss_start = .;
+ .bss : { *(.bss) }
+ _end = .;
+
+ . = ALIGN(8); /* the stack must be 64-bit aligned */
+ .stack : { *(.stack) }
+
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+}
+++ /dev/null
-/*
- * linux/arch/arm/boot/compressed/vmlinux.lds.in
- *
- * Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-OUTPUT_ARCH(arm)
-ENTRY(_start)
-SECTIONS
-{
- /DISCARD/ : {
- *(.ARM.exidx*)
- *(.ARM.extab*)
- /*
- * Discard any r/w data - this produces a link error if we have any,
- * which is required for PIC decompression. Local data generates
- * GOTOFF relocations, which prevents it being relocated independently
- * of the text/got segments.
- */
- *(.data)
- }
-
- . = TEXT_START;
- _text = .;
-
- .text : {
- _start = .;
- *(.start)
- *(.text)
- *(.text.*)
- *(.fixup)
- *(.gnu.warning)
- *(.glue_7t)
- *(.glue_7)
- }
- .rodata : {
- *(.rodata)
- *(.rodata.*)
- }
- .piggydata : {
- *(.piggydata)
- }
-
- . = ALIGN(4);
- _etext = .;
-
- .got.plt : { *(.got.plt) }
- _got_start = .;
- .got : { *(.got) }
- _got_end = .;
-
- /* ensure the zImage file size is always a multiple of 64 bits */
- /* (without a dummy byte, ld just ignores the empty section) */
- .pad : { BYTE(0); . = ALIGN(8); }
- _edata = .;
-
- . = BSS_START;
- __bss_start = .;
- .bss : { *(.bss) }
- _end = .;
-
- . = ALIGN(8); /* the stack must be 64-bit aligned */
- .stack : { *(.stack) }
-
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-}
-
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irqflags.h>
+#include <linux/cpu_pm.h>
#include <asm/mcpm.h>
#include <asm/cacheflush.h>
#include <asm/idmap.h>
#include <asm/cputype.h>
+#include <asm/suspend.h>
extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
return 0;
}
+#ifdef CONFIG_ARM_CPU_SUSPEND
+
+static int __init nocache_trampoline(unsigned long _arg)
+{
+ void (*cache_disable)(void) = (void *)_arg;
+ unsigned int mpidr = read_cpuid_mpidr();
+ unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ phys_reset_t phys_reset;
+
+ mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+ setup_mm_for_reboot();
+
+ __mcpm_cpu_going_down(cpu, cluster);
+ BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
+ cache_disable();
+ __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+ __mcpm_cpu_down(cpu, cluster);
+
+ phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+ phys_reset(virt_to_phys(mcpm_entry_point));
+ BUG();
+}
+
+int __init mcpm_loopback(void (*cache_disable)(void))
+{
+ int ret;
+
+ /*
+ * We're going to soft-restart the current CPU through the
+ * low-level MCPM code by leveraging the suspend/resume
+ * infrastructure. Let's play it safe by using cpu_pm_enter()
+ * in case the CPU init code path resets the VFP or similar.
+ */
+ local_irq_disable();
+ local_fiq_disable();
+ ret = cpu_pm_enter();
+ if (!ret) {
+ ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline);
+ cpu_pm_exit();
+ }
+ local_fiq_enable();
+ local_irq_enable();
+ if (ret)
+ pr_err("%s returned %d\n", __func__, ret);
+ return ret;
+}
+
+#endif
+
struct sync_struct mcpm_sync;
/*
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
+sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
+sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
@ that is being targetted.
#include <linux/linkage.h>
+#include <asm/assembler.h>
.text
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
-.Labrt: mov pc,lr
+.Labrt: ret lr
ENDPROC(private_AES_set_encrypt_key)
.align 5
--- /dev/null
+/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code 32
+.fpu neon
+
+.text
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define K1 0x5A827999
+#define K2 0x6ED9EBA1
+#define K3 0x8F1BBCDC
+#define K4 0xCA62C1D6
+.align 4
+.LK_VEC:
+.LK1: .long K1, K1, K1, K1
+.LK2: .long K2, K2, K2, K2
+.LK3: .long K3, K3, K3, K3
+.LK4: .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define RSTATE r0
+#define RDATA r1
+#define RNBLKS r2
+#define ROLDSTACK r3
+#define RWK lr
+
+#define _a r4
+#define _b r5
+#define _c r6
+#define _d r7
+#define _e r8
+
+#define RT0 r9
+#define RT1 r10
+#define RT2 r11
+#define RT3 r12
+
+#define W0 q0
+#define W1 q1
+#define W2 q2
+#define W3 q3
+#define W4 q4
+#define W5 q5
+#define W6 q6
+#define W7 q7
+
+#define tmp0 q8
+#define tmp1 q9
+#define tmp2 q10
+#define tmp3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round function macros. */
+
+#define WK_offs(i) (((i) & 15) * 4)
+
+#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ bic RT0, d, b; \
+ add e, e, a, ror #(32 - 5); \
+ and RT1, c, b; \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add RT0, RT0, RT3; \
+ add e, e, RT1; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT0;
+
+#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ eor RT0, d, b; \
+ add e, e, a, ror #(32 - 5); \
+ eor RT0, RT0, c; \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT3; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT0; \
+
+#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ ldr RT3, [sp, WK_offs(i)]; \
+ pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ eor RT0, b, c; \
+ and RT1, b, c; \
+ add e, e, a, ror #(32 - 5); \
+ pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ and RT0, RT0, d; \
+ add RT1, RT1, RT3; \
+ add e, e, RT0; \
+ ror b, #(32 - 30); \
+ pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+ add e, e, RT1;
+
+#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define R(a,b,c,d,e,f,i) \
+ _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
+ W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define dummy(...)
+
+
+/* Input expansion macros. */
+
+/********* Precalc macros for rounds 0-15 *************************************/
+
+#define W_PRECALC_00_15() \
+ add RWK, sp, #(WK_offs(0)); \
+ \
+ vld1.32 {tmp0, tmp1}, [RDATA]!; \
+ vrev32.8 W0, tmp0; /* big => little */ \
+ vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vadd.u32 tmp0, W0, curK; \
+ vrev32.8 W7, tmp1; /* big => little */ \
+ vrev32.8 W6, tmp2; /* big => little */ \
+ vadd.u32 tmp1, W7, curK; \
+ vrev32.8 W5, tmp3; /* big => little */ \
+ vadd.u32 tmp2, W6, curK; \
+ vst1.32 {tmp0, tmp1}, [RWK]!; \
+ vadd.u32 tmp3, W5, curK; \
+ vst1.32 {tmp2, tmp3}, [RWK]; \
+
+#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vld1.32 {tmp0, tmp1}, [RDATA]!; \
+
+#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ add RWK, sp, #(WK_offs(0)); \
+
+#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W0, tmp0; /* big => little */ \
+
+#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vld1.32 {tmp2, tmp3}, [RDATA]!; \
+
+#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp0, W0, curK; \
+
+#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W7, tmp1; /* big => little */ \
+
+#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W6, tmp2; /* big => little */ \
+
+#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp1, W7, curK; \
+
+#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vrev32.8 W5, tmp3; /* big => little */ \
+
+#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp2, W6, curK; \
+
+#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp0, tmp1}, [RWK]!; \
+
+#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp3, W5, curK; \
+
+#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp2, tmp3}, [RWK]; \
+
+
+/********* Precalc macros for rounds 16-31 ************************************/
+
+#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp0, tmp0; \
+ vext.8 W, W_m16, W_m12, #8; \
+
+#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ add RWK, sp, #(WK_offs(i)); \
+ vext.8 tmp0, W_m04, tmp0, #4; \
+
+#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp0, tmp0, W_m16; \
+ veor.32 W, W, W_m08; \
+
+#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp1, tmp1; \
+ veor W, W, tmp0; \
+
+#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshl.u32 tmp0, W, #1; \
+
+#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vext.8 tmp1, tmp1, W, #(16-12); \
+ vshr.u32 W, W, #31; \
+
+#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vorr tmp0, tmp0, W; \
+ vshr.u32 W, tmp1, #30; \
+
+#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshl.u32 tmp1, tmp1, #2; \
+
+#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor tmp0, tmp0, W; \
+
+#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, tmp0, tmp1; \
+
+#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp0}, [RWK];
+
+
+/********* Precalc macros for rounds 32-79 ************************************/
+
+#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, W_m28; \
+
+#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vext.8 tmp0, W_m08, W_m04, #8; \
+
+#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, W_m16; \
+
+#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ veor W, tmp0; \
+
+#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ add RWK, sp, #(WK_offs(i&~3)); \
+
+#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshl.u32 tmp1, W, #2; \
+
+#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vshr.u32 tmp0, W, #30; \
+
+#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vorr W, tmp0, tmp1; \
+
+#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+ vst1.32 {tmp0}, [RWK];
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * sha1_transform_neon (void *ctx, const unsigned char *data,
+ * unsigned int nblks)
+ */
+.align 3
+ENTRY(sha1_transform_neon)
+ /* input:
+ * r0: ctx, CTX
+ * r1: data (64*nblks bytes)
+ * r2: nblks
+ */
+
+ cmp RNBLKS, #0;
+ beq .Ldo_nothing;
+
+ push {r4-r12, lr};
+ /*vpush {q4-q7};*/
+
+ adr RT3, .LK_VEC;
+
+ mov ROLDSTACK, sp;
+
+ /* Align stack. */
+ sub RT0, sp, #(16*4);
+ and RT0, #(~(16-1));
+ mov sp, RT0;
+
+ vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
+
+ /* Get the values of the chaining variables. */
+ ldm RSTATE, {_a-_e};
+
+ vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
+
+#undef curK
+#define curK qK1
+ /* Precalc 0-15. */
+ W_PRECALC_00_15();
+
+.Loop:
+ /* Transform 0-15 + Precalc 16-31. */
+ _R( _a, _b, _c, _d, _e, F1, 0,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
+ W4, W5, W6, W7, W0, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 1,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
+ W4, W5, W6, W7, W0, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F1, 2,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
+ W4, W5, W6, W7, W0, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F1, 3,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
+ W4, W5, W6, W7, W0, _, _, _ );
+
+#undef curK
+#define curK qK2
+ _R( _b, _c, _d, _e, _a, F1, 4,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
+ W3, W4, W5, W6, W7, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 5,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
+ W3, W4, W5, W6, W7, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 6,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
+ W3, W4, W5, W6, W7, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F1, 7,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
+ W3, W4, W5, W6, W7, _, _, _ );
+
+ _R( _c, _d, _e, _a, _b, F1, 8,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
+ W2, W3, W4, W5, W6, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F1, 9,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
+ W2, W3, W4, W5, W6, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 10,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
+ W2, W3, W4, W5, W6, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F1, 11,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
+ W2, W3, W4, W5, W6, _, _, _ );
+
+ _R( _d, _e, _a, _b, _c, F1, 12,
+ WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
+ W1, W2, W3, W4, W5, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F1, 13,
+ WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
+ W1, W2, W3, W4, W5, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F1, 14,
+ WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
+ W1, W2, W3, W4, W5, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F1, 15,
+ WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
+ W1, W2, W3, W4, W5, _, _, _ );
+
+ /* Transform 16-63 + Precalc 32-79. */
+ _R( _e, _a, _b, _c, _d, F1, 16,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _d, _e, _a, _b, _c, F1, 17,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _c, _d, _e, _a, _b, F1, 18,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _b, _c, _d, _e, _a, F1, 19,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+
+ _R( _a, _b, _c, _d, _e, F2, 20,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _e, _a, _b, _c, _d, F2, 21,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _d, _e, _a, _b, _c, F2, 22,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _c, _d, _e, _a, _b, F2, 23,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+
+#undef curK
+#define curK qK3
+ _R( _b, _c, _d, _e, _a, F2, 24,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _a, _b, _c, _d, _e, F2, 25,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _e, _a, _b, _c, _d, F2, 26,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _d, _e, _a, _b, _c, F2, 27,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+
+ _R( _c, _d, _e, _a, _b, F2, 28,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _b, _c, _d, _e, _a, F2, 29,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _a, _b, _c, _d, _e, F2, 30,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _e, _a, _b, _c, _d, F2, 31,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+
+ _R( _d, _e, _a, _b, _c, F2, 32,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _c, _d, _e, _a, _b, F2, 33,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _b, _c, _d, _e, _a, F2, 34,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+ _R( _a, _b, _c, _d, _e, F2, 35,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48,
+ W4, W5, W6, W7, W0, W1, W2, W3);
+
+ _R( _e, _a, _b, _c, _d, F2, 36,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _d, _e, _a, _b, _c, F2, 37,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _c, _d, _e, _a, _b, F2, 38,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+ _R( _b, _c, _d, _e, _a, F2, 39,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52,
+ W3, W4, W5, W6, W7, W0, W1, W2);
+
+ _R( _a, _b, _c, _d, _e, F3, 40,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _e, _a, _b, _c, _d, F3, 41,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _d, _e, _a, _b, _c, F3, 42,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+ _R( _c, _d, _e, _a, _b, F3, 43,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56,
+ W2, W3, W4, W5, W6, W7, W0, W1);
+
+#undef curK
+#define curK qK4
+ _R( _b, _c, _d, _e, _a, F3, 44,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _a, _b, _c, _d, _e, F3, 45,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _e, _a, _b, _c, _d, F3, 46,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+ _R( _d, _e, _a, _b, _c, F3, 47,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60,
+ W1, W2, W3, W4, W5, W6, W7, W0);
+
+ _R( _c, _d, _e, _a, _b, F3, 48,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _b, _c, _d, _e, _a, F3, 49,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _a, _b, _c, _d, _e, F3, 50,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+ _R( _e, _a, _b, _c, _d, F3, 51,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64,
+ W0, W1, W2, W3, W4, W5, W6, W7);
+
+ _R( _d, _e, _a, _b, _c, F3, 52,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _c, _d, _e, _a, _b, F3, 53,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _b, _c, _d, _e, _a, F3, 54,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+ _R( _a, _b, _c, _d, _e, F3, 55,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68,
+ W7, W0, W1, W2, W3, W4, W5, W6);
+
+ _R( _e, _a, _b, _c, _d, F3, 56,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _d, _e, _a, _b, _c, F3, 57,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _c, _d, _e, _a, _b, F3, 58,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+ _R( _b, _c, _d, _e, _a, F3, 59,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72,
+ W6, W7, W0, W1, W2, W3, W4, W5);
+
+ subs RNBLKS, #1;
+
+ _R( _a, _b, _c, _d, _e, F4, 60,
+ WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _e, _a, _b, _c, _d, F4, 61,
+ WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _d, _e, _a, _b, _c, F4, 62,
+ WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+ _R( _c, _d, _e, _a, _b, F4, 63,
+ WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76,
+ W5, W6, W7, W0, W1, W2, W3, W4);
+
+ beq .Lend;
+
+ /* Transform 64-79 + Precalc 0-15 of next block. */
+#undef curK
+#define curK qK1
+ _R( _b, _c, _d, _e, _a, F4, 64,
+ WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 65,
+ WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F4, 66,
+ WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F4, 67,
+ WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _c, _d, _e, _a, _b, F4, 68,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 69,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 70,
+ WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _e, _a, _b, _c, _d, F4, 71,
+ WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _d, _e, _a, _b, _c, F4, 72,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F4, 73,
+ dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 74,
+ WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _a, _b, _c, _d, _e, F4, 75,
+ WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+ _R( _e, _a, _b, _c, _d, F4, 76,
+ WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _d, _e, _a, _b, _c, F4, 77,
+ WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _c, _d, _e, _a, _b, F4, 78,
+ WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+ _R( _b, _c, _d, _e, _a, F4, 79,
+ WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
+
+ /* Update the chaining variables. */
+ ldm RSTATE, {RT0-RT3};
+ add _a, RT0;
+ ldr RT0, [RSTATE, #state_h4];
+ add _b, RT1;
+ add _c, RT2;
+ add _d, RT3;
+ add _e, RT0;
+ stm RSTATE, {_a-_e};
+
+ b .Loop;
+
+.Lend:
+ /* Transform 64-79 */
+ R( _b, _c, _d, _e, _a, F4, 64 );
+ R( _a, _b, _c, _d, _e, F4, 65 );
+ R( _e, _a, _b, _c, _d, F4, 66 );
+ R( _d, _e, _a, _b, _c, F4, 67 );
+ R( _c, _d, _e, _a, _b, F4, 68 );
+ R( _b, _c, _d, _e, _a, F4, 69 );
+ R( _a, _b, _c, _d, _e, F4, 70 );
+ R( _e, _a, _b, _c, _d, F4, 71 );
+ R( _d, _e, _a, _b, _c, F4, 72 );
+ R( _c, _d, _e, _a, _b, F4, 73 );
+ R( _b, _c, _d, _e, _a, F4, 74 );
+ R( _a, _b, _c, _d, _e, F4, 75 );
+ R( _e, _a, _b, _c, _d, F4, 76 );
+ R( _d, _e, _a, _b, _c, F4, 77 );
+ R( _c, _d, _e, _a, _b, F4, 78 );
+ R( _b, _c, _d, _e, _a, F4, 79 );
+
+ mov sp, ROLDSTACK;
+
+ /* Update the chaining variables. */
+ ldm RSTATE, {RT0-RT3};
+ add _a, RT0;
+ ldr RT0, [RSTATE, #state_h4];
+ add _b, RT1;
+ add _c, RT2;
+ add _d, RT3;
+ /*vpop {q4-q7};*/
+ add _e, RT0;
+ stm RSTATE, {_a-_e};
+
+ pop {r4-r12, pc};
+
+.Ldo_nothing:
+ bx lr
+ENDPROC(sha1_transform_neon)
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
+#include <asm/crypto/sha1.h>
-struct SHA1_CTX {
- uint32_t h0,h1,h2,h3,h4;
- u64 count;
- u8 data[SHA1_BLOCK_SIZE];
-};
-asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
+asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
static int sha1_init(struct shash_desc *desc)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
- memset(sctx, 0, sizeof(*sctx));
- sctx->h0 = SHA1_H0;
- sctx->h1 = SHA1_H1;
- sctx->h2 = SHA1_H2;
- sctx->h3 = SHA1_H3;
- sctx->h4 = SHA1_H4;
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+
return 0;
}
-static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
+static int __sha1_update(struct sha1_state *sctx, const u8 *data,
+ unsigned int len, unsigned int partial)
{
unsigned int done = 0;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
- memcpy(sctx->data + partial, data, done);
- sha1_block_data_order(sctx, sctx->data, 1);
+ memcpy(sctx->buffer + partial, data, done);
+ sha1_block_data_order(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
- sha1_block_data_order(sctx, data + done, rounds);
+ sha1_block_data_order(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
- memcpy(sctx->data, data + done, len - done);
+ memcpy(sctx->buffer, data + done, len - done);
return 0;
}
-static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
- memcpy(sctx->data + partial, data, len);
+ memcpy(sctx->buffer + partial, data, len);
return 0;
}
res = __sha1_update(sctx, data, len, partial);
return res;
}
+EXPORT_SYMBOL_GPL(sha1_update_arm);
/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
/* We need to fill a whole block for __sha1_update() */
if (padlen <= 56) {
sctx->count += padlen;
- memcpy(sctx->data + index, padding, padlen);
+ memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_update(sctx, padding, padlen, index);
}
/* Store state in digest */
for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
+ dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
static int sha1_export(struct shash_desc *desc, void *out)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_import(struct shash_desc *desc, const void *in)
{
- struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+ struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_init,
- .update = sha1_update,
+ .update = sha1_update_arm,
.final = sha1_final,
.export = sha1_export,
.import = sha1_import,
- .descsize = sizeof(struct SHA1_CTX),
- .statesize = sizeof(struct SHA1_CTX),
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",
--- /dev/null
+/*
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
+ * ARM NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha1_generic.c and sha1_ssse3_glue.c:
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/crypto/sha1.h>
+
+
+asmlinkage void sha1_transform_neon(void *state_h, const char *data,
+ unsigned int rounds);
+
+
+static int sha1_neon_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+
+ return 0;
+}
+
+static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int done = 0;
+
+ sctx->count += len;
+
+ if (partial) {
+ done = SHA1_BLOCK_SIZE - partial;
+ memcpy(sctx->buffer + partial, data, done);
+ sha1_transform_neon(sctx->state, sctx->buffer, 1);
+ }
+
+ if (len - done >= SHA1_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+ sha1_transform_neon(sctx->state, data + done, rounds);
+ done += rounds * SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buffer, data + done, len - done);
+
+ return 0;
+}
+
+static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+ int res;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA1_BLOCK_SIZE) {
+ sctx->count += len;
+ memcpy(sctx->buffer + partial, data, len);
+
+ return 0;
+ }
+
+ if (!may_use_simd()) {
+ res = sha1_update_arm(desc, data, len);
+ } else {
+ kernel_neon_begin();
+ res = __sha1_neon_update(desc, data, len, partial);
+ kernel_neon_end();
+ }
+
+ return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_neon_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->count % SHA1_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+ if (!may_use_simd()) {
+ sha1_update_arm(desc, padding, padlen);
+ sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
+ } else {
+ kernel_neon_begin();
+ /* We need to fill a whole block for __sha1_neon_update() */
+ if (padlen <= 56) {
+ sctx->count += padlen;
+ memcpy(sctx->buffer + index, padding, padlen);
+ } else {
+ __sha1_neon_update(desc, padding, padlen, index);
+ }
+ __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+ kernel_neon_end();
+ }
+
+ /* Store state in digest */
+ for (i = 0; i < 5; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_neon_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_neon_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = sha1_neon_init,
+ .update = sha1_neon_update,
+ .final = sha1_neon_final,
+ .export = sha1_neon_export,
+ .import = sha1_neon_import,
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_neon_mod_init(void)
+{
+ if (!cpu_has_neon())
+ return -ENODEV;
+
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_neon_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_neon_mod_init);
+module_exit(sha1_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
+MODULE_ALIAS("sha1");
--- /dev/null
+/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code 32
+.fpu neon
+
+.text
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RA d0
+#define RB d1
+#define RC d2
+#define RD d3
+#define RE d4
+#define RF d5
+#define RG d6
+#define RH d7
+
+#define RT0 d8
+#define RT1 d9
+#define RT2 d10
+#define RT3 d11
+#define RT4 d12
+#define RT5 d13
+#define RT6 d14
+#define RT7 d15
+
+#define RT01q q4
+#define RT23q q5
+#define RT45q q6
+#define RT67q q7
+
+#define RW0 d16
+#define RW1 d17
+#define RW2 d18
+#define RW3 d19
+#define RW4 d20
+#define RW5 d21
+#define RW6 d22
+#define RW7 d23
+#define RW8 d24
+#define RW9 d25
+#define RW10 d26
+#define RW11 d27
+#define RW12 d28
+#define RW13 d29
+#define RW14 d30
+#define RW15 d31
+
+#define RW01q q8
+#define RW23q q9
+#define RW45q q10
+#define RW67q q11
+#define RW89q q12
+#define RW1011q q13
+#define RW1213q q14
+#define RW1415q q15
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
+ rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
+ /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+ vshr.u64 RT2, re, #14; \
+ vshl.u64 RT3, re, #64 - 14; \
+ interleave_op(arg1); \
+ vshr.u64 RT4, re, #18; \
+ vshl.u64 RT5, re, #64 - 18; \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, re, #41; \
+ vshl.u64 RT5, re, #64 - 41; \
+ vadd.u64 RT0, RT0, rw0; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, re; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, rf, rg; \
+ \
+ vadd.u64 RT1, RT1, rh; \
+ vshr.u64 RT2, ra, #28; \
+ vshl.u64 RT3, ra, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, ra, #34; \
+ vshl.u64 RT5, ra, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* h = Sum0 (a) + Maj (a, b, c); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, ra, #39; \
+ vshl.u64 RT5, ra, #64 - 39; \
+ veor.64 RT0, ra, rb; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rc, rb; \
+ vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+ veor.64 rh, RT2, RT3; \
+ \
+ /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+ vshr.u64 RT2, rd, #14; \
+ vshl.u64 RT3, rd, #64 - 14; \
+ vadd.u64 rh, rh, RT0; \
+ vshr.u64 RT4, rd, #18; \
+ vshl.u64 RT5, rd, #64 - 18; \
+ vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rd, #41; \
+ vshl.u64 RT5, rd, #64 - 41; \
+ vadd.u64 RT0, RT0, rw1; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, rd; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, re, rf; \
+ \
+ vadd.u64 RT1, RT1, rg; \
+ vshr.u64 RT2, rh, #28; \
+ vshl.u64 RT3, rh, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, rh, #34; \
+ vshl.u64 RT5, rh, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* g = Sum0 (h) + Maj (h, a, b); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rh, #39; \
+ vshl.u64 RT5, rh, #64 - 39; \
+ veor.64 RT0, rh, ra; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rb, ra; \
+ vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+ veor.64 rg, RT2, RT3; \
+ \
+ /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
+ /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
+ \
+ /**** S0(w[1:2]) */ \
+ \
+ /* w[0:1] += w[9:10] */ \
+ /* RT23q = rw1:rw2 */ \
+ vext.u64 RT23q, rw01q, rw23q, #1; \
+ vadd.u64 rw0, rw9; \
+ vadd.u64 rg, rg, RT0; \
+ vadd.u64 rw1, rw10;\
+ vadd.u64 rg, rg, RT1; /* g+=t1; */ \
+ \
+ vshr.u64 RT45q, RT23q, #1; \
+ vshl.u64 RT67q, RT23q, #64 - 1; \
+ vshr.u64 RT01q, RT23q, #8; \
+ veor.u64 RT45q, RT45q, RT67q; \
+ vshl.u64 RT67q, RT23q, #64 - 8; \
+ veor.u64 RT45q, RT45q, RT01q; \
+ vshr.u64 RT01q, RT23q, #7; \
+ veor.u64 RT45q, RT45q, RT67q; \
+ \
+ /**** S1(w[14:15]) */ \
+ vshr.u64 RT23q, rw1415q, #6; \
+ veor.u64 RT01q, RT01q, RT45q; \
+ vshr.u64 RT45q, rw1415q, #19; \
+ vshl.u64 RT67q, rw1415q, #64 - 19; \
+ veor.u64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT45q, rw1415q, #61; \
+ veor.u64 RT23q, RT23q, RT67q; \
+ vshl.u64 RT67q, rw1415q, #64 - 61; \
+ veor.u64 RT23q, RT23q, RT45q; \
+ vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
+ veor.u64 RT01q, RT23q, RT67q;
+#define vadd_RT01q(rw01q) \
+ /* w[0:1] += S(w[14:15]) */ \
+ vadd.u64 rw01q, RT01q;
+
+#define dummy(_) /*_*/
+
+#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
+ interleave_op1, arg1, interleave_op2, arg2) \
+ /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+ vshr.u64 RT2, re, #14; \
+ vshl.u64 RT3, re, #64 - 14; \
+ interleave_op1(arg1); \
+ vshr.u64 RT4, re, #18; \
+ vshl.u64 RT5, re, #64 - 18; \
+ interleave_op2(arg2); \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, re, #41; \
+ vshl.u64 RT5, re, #64 - 41; \
+ vadd.u64 RT0, RT0, rw0; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, re; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, rf, rg; \
+ \
+ vadd.u64 RT1, RT1, rh; \
+ vshr.u64 RT2, ra, #28; \
+ vshl.u64 RT3, ra, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, ra, #34; \
+ vshl.u64 RT5, ra, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* h = Sum0 (a) + Maj (a, b, c); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, ra, #39; \
+ vshl.u64 RT5, ra, #64 - 39; \
+ veor.64 RT0, ra, rb; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rc, rb; \
+ vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+ veor.64 rh, RT2, RT3; \
+ \
+ /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+ vshr.u64 RT2, rd, #14; \
+ vshl.u64 RT3, rd, #64 - 14; \
+ vadd.u64 rh, rh, RT0; \
+ vshr.u64 RT4, rd, #18; \
+ vshl.u64 RT5, rd, #64 - 18; \
+ vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+ vld1.64 {RT0}, [RK]!; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rd, #41; \
+ vshl.u64 RT5, rd, #64 - 41; \
+ vadd.u64 RT0, RT0, rw1; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vmov.64 RT7, rd; \
+ veor.64 RT1, RT2, RT3; \
+ vbsl.64 RT7, re, rf; \
+ \
+ vadd.u64 RT1, RT1, rg; \
+ vshr.u64 RT2, rh, #28; \
+ vshl.u64 RT3, rh, #64 - 28; \
+ vadd.u64 RT1, RT1, RT0; \
+ vshr.u64 RT4, rh, #34; \
+ vshl.u64 RT5, rh, #64 - 34; \
+ vadd.u64 RT1, RT1, RT7; \
+ \
+ /* g = Sum0 (h) + Maj (h, a, b); */ \
+ veor.64 RT23q, RT23q, RT45q; \
+ vshr.u64 RT4, rh, #39; \
+ vshl.u64 RT5, rh, #64 - 39; \
+ veor.64 RT0, rh, ra; \
+ veor.64 RT23q, RT23q, RT45q; \
+ vbsl.64 RT0, rb, ra; \
+ vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+ veor.64 rg, RT2, RT3;
+#define vadd_rg_RT0(rg) \
+ vadd.u64 rg, rg, RT0;
+#define vadd_rg_RT1(rg) \
+ vadd.u64 rg, rg, RT1; /* g+=t1; */
+
+.align 3
+ENTRY(sha512_transform_neon)
+ /* Input:
+ * %r0: SHA512_CONTEXT
+ * %r1: data
+ * %r2: u64 k[] constants
+ * %r3: nblks
+ */
+ push {%lr};
+
+ mov %lr, #0;
+
+ /* Load context to d0-d7 */
+ vld1.64 {RA-RD}, [%r0]!;
+ vld1.64 {RE-RH}, [%r0];
+ sub %r0, #(4*8);
+
+ /* Load input to w[16], d16-d31 */
+ /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
+ vld1.64 {RW0-RW3}, [%r1]!;
+ vld1.64 {RW4-RW7}, [%r1]!;
+ vld1.64 {RW8-RW11}, [%r1]!;
+ vld1.64 {RW12-RW15}, [%r1]!;
+#ifdef __ARMEL__
+ /* byteswap */
+ vrev64.8 RW01q, RW01q;
+ vrev64.8 RW23q, RW23q;
+ vrev64.8 RW45q, RW45q;
+ vrev64.8 RW67q, RW67q;
+ vrev64.8 RW89q, RW89q;
+ vrev64.8 RW1011q, RW1011q;
+ vrev64.8 RW1213q, RW1213q;
+ vrev64.8 RW1415q, RW1415q;
+#endif
+
+ /* EABI says that d8-d15 must be preserved by callee. */
+ /*vpush {RT0-RT7};*/
+
+.Loop:
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+ RW23q, RW1415q, RW9, RW10, dummy, _);
+ b .Lenter_rounds;
+
+.Loop_rounds:
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+ RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
+.Lenter_rounds:
+ rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
+ RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
+ rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
+ RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
+ rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
+ RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
+ rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
+ RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
+ rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
+ RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
+ add %lr, #16;
+ rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
+ RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
+ cmp %lr, #64;
+ rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
+ RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
+ bne .Loop_rounds;
+
+ subs %r3, #1;
+
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
+ vadd_RT01q, RW1415q, dummy, _);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
+ vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+ beq .Lhandle_tail;
+ vld1.64 {RW0-RW3}, [%r1]!;
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+ vrev64.8 RW01q, RW01q;
+ vrev64.8 RW23q, RW23q;
+#endif
+ vld1.64 {RW4-RW7}, [%r1]!;
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+ vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+ vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+#ifdef __ARMEL__
+ vrev64.8 RW45q, RW45q;
+ vrev64.8 RW67q, RW67q;
+#endif
+ vld1.64 {RW8-RW11}, [%r1]!;
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+ vrev64.8 RW89q, RW89q;
+ vrev64.8 RW1011q, RW1011q;
+#endif
+ vld1.64 {RW12-RW15}, [%r1]!;
+ vadd_rg_RT0(RA);
+ vadd_rg_RT1(RA);
+
+ /* Load context */
+ vld1.64 {RT0-RT3}, [%r0]!;
+ vld1.64 {RT4-RT7}, [%r0];
+ sub %r0, #(4*8);
+
+#ifdef __ARMEL__
+ vrev64.8 RW1213q, RW1213q;
+ vrev64.8 RW1415q, RW1415q;
+#endif
+
+ vadd.u64 RA, RT0;
+ vadd.u64 RB, RT1;
+ vadd.u64 RC, RT2;
+ vadd.u64 RD, RT3;
+ vadd.u64 RE, RT4;
+ vadd.u64 RF, RT5;
+ vadd.u64 RG, RT6;
+ vadd.u64 RH, RT7;
+
+ /* Store the first half of context */
+ vst1.64 {RA-RD}, [%r0]!;
+ sub RK, $(8*80);
+ vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+ mov %lr, #0;
+ sub %r0, #(4*8);
+
+ b .Loop;
+
+.Lhandle_tail:
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+ rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+ vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+ rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+ vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+ rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+ vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+ rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+ vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+
+ /* Load context to d16-d23 */
+ vld1.64 {RW0-RW3}, [%r0]!;
+ vadd_rg_RT0(RA);
+ vld1.64 {RW4-RW7}, [%r0];
+ vadd_rg_RT1(RA);
+ sub %r0, #(4*8);
+
+ vadd.u64 RA, RW0;
+ vadd.u64 RB, RW1;
+ vadd.u64 RC, RW2;
+ vadd.u64 RD, RW3;
+ vadd.u64 RE, RW4;
+ vadd.u64 RF, RW5;
+ vadd.u64 RG, RW6;
+ vadd.u64 RH, RW7;
+
+ /* Store the first half of context */
+ vst1.64 {RA-RD}, [%r0]!;
+
+ /* Clear used registers */
+ /* d16-d31 */
+ veor.u64 RW01q, RW01q;
+ veor.u64 RW23q, RW23q;
+ veor.u64 RW45q, RW45q;
+ veor.u64 RW67q, RW67q;
+ vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+ veor.u64 RW89q, RW89q;
+ veor.u64 RW1011q, RW1011q;
+ veor.u64 RW1213q, RW1213q;
+ veor.u64 RW1415q, RW1415q;
+ /* d8-d15 */
+ /*vpop {RT0-RT7};*/
+ /* d0-d7 (q0-q3) */
+ veor.u64 %q0, %q0;
+ veor.u64 %q1, %q1;
+ veor.u64 %q2, %q2;
+ veor.u64 %q3, %q3;
+
+ pop {%pc};
+ENDPROC(sha512_transform_neon)
--- /dev/null
+/*
+ * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha512_ssse3_glue.c:
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+
+static const u64 sha512_k[] = {
+ 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+ 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+ 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+ 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+ 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+ 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+ 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+ 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+ 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+ 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+ 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+ 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+ 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+ 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+ 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+ 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+ 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+ 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+ 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+ 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+ 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+ 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+ 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+ 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+ 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+ 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+ 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+ 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+ 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+ 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+ 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+ 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+ 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+ 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+ 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+ 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+ 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+ 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+ 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+ 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+
+asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
+ const u64 k[], unsigned int num_blks);
+
+
+static int sha512_neon_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA512_H0;
+ sctx->state[1] = SHA512_H1;
+ sctx->state[2] = SHA512_H2;
+ sctx->state[3] = SHA512_H3;
+ sctx->state[4] = SHA512_H4;
+ sctx->state[5] = SHA512_H5;
+ sctx->state[6] = SHA512_H6;
+ sctx->state[7] = SHA512_H7;
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int done = 0;
+
+ sctx->count[0] += len;
+ if (sctx->count[0] < len)
+ sctx->count[1]++;
+
+ if (partial) {
+ done = SHA512_BLOCK_SIZE - partial;
+ memcpy(sctx->buf + partial, data, done);
+ sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
+ }
+
+ if (len - done >= SHA512_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+
+ sha512_transform_neon(sctx->state, data + done, sha512_k,
+ rounds);
+
+ done += rounds * SHA512_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buf, data + done, len - done);
+
+ return 0;
+}
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+ int res;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA512_BLOCK_SIZE) {
+ sctx->count[0] += len;
+ if (sctx->count[0] < len)
+ sctx->count[1]++;
+ memcpy(sctx->buf + partial, data, len);
+
+ return 0;
+ }
+
+ if (!may_use_simd()) {
+ res = crypto_sha512_update(desc, data, len);
+ } else {
+ kernel_neon_begin();
+ res = __sha512_neon_update(desc, data, len, partial);
+ kernel_neon_end();
+ }
+
+ return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be64 *dst = (__be64 *)out;
+ __be64 bits[2];
+ static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
+
+ /* save number of bits */
+ bits[1] = cpu_to_be64(sctx->count[0] << 3);
+ bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+ /* Pad out to 112 mod 128 and append length */
+ index = sctx->count[0] & 0x7f;
+ padlen = (index < 112) ? (112 - index) : ((128+112) - index);
+
+ if (!may_use_simd()) {
+ crypto_sha512_update(desc, padding, padlen);
+ crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
+ } else {
+ kernel_neon_begin();
+ /* We need to fill a whole block for __sha512_neon_update() */
+ if (padlen <= 112) {
+ sctx->count[0] += padlen;
+ if (sctx->count[0] < padlen)
+ sctx->count[1]++;
+ memcpy(sctx->buf + index, padding, padlen);
+ } else {
+ __sha512_neon_update(desc, padding, padlen, index);
+ }
+ __sha512_neon_update(desc, (const u8 *)&bits,
+ sizeof(bits), 112);
+ kernel_neon_end();
+ }
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be64(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha512_neon_export(struct shash_desc *desc, void *out)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha512_neon_import(struct shash_desc *desc, const void *in)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha384_neon_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA384_H0;
+ sctx->state[1] = SHA384_H1;
+ sctx->state[2] = SHA384_H2;
+ sctx->state[3] = SHA384_H3;
+ sctx->state[4] = SHA384_H4;
+ sctx->state[5] = SHA384_H5;
+ sctx->state[6] = SHA384_H6;
+ sctx->state[7] = SHA384_H7;
+
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[SHA512_DIGEST_SIZE];
+
+ sha512_neon_final(desc, D);
+
+ memcpy(hash, D, SHA384_DIGEST_SIZE);
+ memset(D, 0, SHA512_DIGEST_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .init = sha512_neon_init,
+ .update = sha512_neon_update,
+ .final = sha512_neon_final,
+ .export = sha512_neon_export,
+ .import = sha512_neon_import,
+ .descsize = sizeof(struct sha512_state),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name = "sha512-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = sha384_neon_init,
+ .update = sha512_neon_update,
+ .final = sha384_neon_final,
+ .export = sha512_neon_export,
+ .import = sha512_neon_import,
+ .descsize = sizeof(struct sha512_state),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name = "sha384-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha512_neon_mod_init(void)
+{
+ if (!cpu_has_neon())
+ return -ENODEV;
+
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_neon_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha512_neon_mod_init);
+module_exit(sha512_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
+
+MODULE_ALIAS("sha512");
+MODULE_ALIAS("sha384");
#include <asm/domain.h>
#include <asm/opcodes-virt.h>
#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
#define IOMEM(x) (x)
* Get current thread_info.
*/
.macro get_thread_info, rd
- ARM( mov \rd, sp, lsr #13 )
+ ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT )
THUMB( mov \rd, sp )
- THUMB( lsr \rd, \rd, #13 )
- mov \rd, \rd, lsl #13
+ THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT )
+ mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
.endm
/*
#endif
.endm
+ .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+ .macro ret\c, reg
+#if __LINUX_ARM_ARCH__ < 6
+ mov\c pc, \reg
+#else
+ .ifeqs "\reg", "lr"
+ bx\c \reg
+ .else
+ mov\c pc, \reg
+ .endif
+#endif
+ .endm
+ .endr
+
+ .macro ret.w, reg
+ ret \reg
+#ifdef CONFIG_THUMB2_KERNEL
+ nop
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H__ */
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_INTEL 0x69
-#define ARM_CPU_PART_ARM1136 0xB360
-#define ARM_CPU_PART_ARM1156 0xB560
-#define ARM_CPU_PART_ARM1176 0xB760
-#define ARM_CPU_PART_ARM11MPCORE 0xB020
-#define ARM_CPU_PART_CORTEX_A8 0xC080
-#define ARM_CPU_PART_CORTEX_A9 0xC090
-#define ARM_CPU_PART_CORTEX_A5 0xC050
-#define ARM_CPU_PART_CORTEX_A15 0xC0F0
-#define ARM_CPU_PART_CORTEX_A7 0xC070
-#define ARM_CPU_PART_CORTEX_A12 0xC0D0
-#define ARM_CPU_PART_CORTEX_A17 0xC0E0
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136 0x4100b360
+#define ARM_CPU_PART_ARM1156 0x4100b560
+#define ARM_CPU_PART_ARM1176 0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE 0x4100b020
+#define ARM_CPU_PART_CORTEX_A8 0x4100c080
+#define ARM_CPU_PART_CORTEX_A9 0x4100c090
+#define ARM_CPU_PART_CORTEX_A5 0x4100c050
+#define ARM_CPU_PART_CORTEX_A7 0x4100c070
+#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0
#define ARM_CPU_XSCALE_ARCH_MASK 0xe000
#define ARM_CPU_XSCALE_ARCH_V1 0x2000
return (read_cpuid_id() & 0xFF000000) >> 24;
}
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+ return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
{
return read_cpuid_id() & 0xFFF0;
}
static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
{
- return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+ return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
}
static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
--- /dev/null
+#ifndef ASM_ARM_CRYPTO_SHA1_H
+#define ASM_ARM_CRYPTO_SHA1_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+#endif
\symbol_name:
mov r8, lr
arch_irq_handler_default
- mov pc, r8
+ ret r8
.endm
# endif
#endif
-#ifdef CONFIG_CPU_V7
-# ifdef CPU_NAME
-# undef MULTI_CPU
-# define MULTI_CPU
-# else
-# define CPU_NAME cpu_v7
-# endif
-#endif
-
#ifdef CONFIG_CPU_V7M
# ifdef CPU_NAME
# undef MULTI_CPU
# endif
#endif
+#ifdef CONFIG_CPU_V7
+/*
+ * Cortex-A9 needs a different suspend/resume function, so we need
+ * multiple CPU support for ARMv7 anyway.
+ */
+# undef MULTI_CPU
+# define MULTI_CPU
+#endif
+
#ifndef MULTI_CPU
#define cpu_proc_init __glue(CPU_NAME,_proc_init)
#define cpu_proc_fin __glue(CPU_NAME,_proc_fin)
int __init mcpm_sync_init(
void (*power_up_setup)(unsigned int affinity_level));
+/**
+ * mcpm_loopback - make a run through the MCPM low-level code
+ *
+ * @cache_disable: pointer to function performing cache disabling
+ *
+ * This exercises the MCPM machinery by soft resetting the CPU and branching
+ * to the MCPM low-level entry code before returning to the caller.
+ * The @cache_disable function must do the necessary cache disabling to
+ * let the regular kernel init code turn it back on as if the CPU was
+ * hotplugged in. The MCPM state machine is set as if the cluster was
+ * initialized meaning the power_up_setup callback passed to mcpm_sync_init()
+ * will be invoked for all affinity levels. This may be useful to initialize
+ * some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes.
+ */
+int __init mcpm_loopback(void (*cache_disable)(void));
+
void __init mcpm_smp_set_ops(void);
#else
--- /dev/null
+#ifndef __ASM_MCS_LOCK_H
+#define __ASM_MCS_LOCK_H
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+
+/* MCS spin-locking. */
+#define arch_mcs_spin_lock_contended(lock) \
+do { \
+ /* Ensure prior stores are observed before we enter wfe. */ \
+ smp_mb(); \
+ while (!(smp_load_acquire(lock))) \
+ wfe(); \
+} while (0) \
+
+#define arch_mcs_spin_unlock_contended(lock) \
+do { \
+ smp_store_release(lock, 1); \
+ dsb_sev(); \
+} while (0)
+
+#endif /* CONFIG_SMP */
+#endif /* __ASM_MCS_LOCK_H */
* of this define that was meant to.
* Fortunately, there is no reference for this in noMMU mode, for now.
*/
-#ifndef TASK_SIZE
-#define TASK_SIZE (CONFIG_DRAM_SIZE)
-#endif
+#define TASK_SIZE UL(0xffffffff)
#ifndef TASK_UNMAPPED_BASE
#define TASK_UNMAPPED_BASE UL(0x00000000)
/*
* PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
- * memory. This is used for XIP and NoMMU kernels, or by kernels which
- * have their own mach/memory.h. Assembly code must always use
+ * memory. This is used for XIP and NoMMU kernels, and on platforms that don't
+ * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
* PLAT_PHYS_OFFSET and not PHYS_OFFSET.
*/
-#ifndef PLAT_PHYS_OFFSET
#define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET)
-#endif
#ifndef __ASSEMBLY__
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/*
- * The ARMv7 CPU PMU supports up to 32 event counters.
- */
-#define ARMPMU_MAX_HWEVENTS 32
-
-#define HW_OP_UNSUPPORTED 0xFFFF
-#define C(_x) PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED 0xFFFF
-
#ifdef CONFIG_HW_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
#define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2)
#define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
-#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
+#define PMD_SECT_AP2 (_AT(pmdval_t, 1) << 7) /* read only */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_nG (_AT(pmdval_t, 1) << 11)
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */
#define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */
+#define PTE_AP2 (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */
#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */
#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
-#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
#define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */
-#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */
-#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */
+#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55)
+#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */
+#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */
-#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
-#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
-#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
-#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
+#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
+#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
+#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
+#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
+#define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58)
/*
* To be used in assembly code with the upper page attributes.
#define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
-#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
+#define pmd_isset(pmd, val) ((u32)(val) == (val) ? pmd_val(pmd) & (val) \
+ : !!(pmd_val(pmd) & (val)))
+#define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val)))
+
+#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF))
#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
+#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
+#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
-#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd))
+#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING))
#endif
#define PMD_BIT_FUNC(fn,op) \
static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
-PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
-PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
- PMD_SECT_VALID | PMD_SECT_NONE;
+ const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY |
+ L_PMD_SECT_VALID | L_PMD_SECT_NONE;
pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
return pmd;
}
BUG_ON(addr >= TASK_SIZE);
/* create a faulting entry if PROT_NONE protected */
- if (pmd_val(pmd) & PMD_SECT_NONE)
- pmd_val(pmd) &= ~PMD_SECT_VALID;
+ if (pmd_val(pmd) & L_PMD_SECT_NONE)
+ pmd_val(pmd) &= ~L_PMD_SECT_VALID;
+
+ if (pmd_write(pmd) && pmd_dirty(pmd))
+ pmd_val(pmd) &= ~PMD_SECT_AP2;
+ else
+ pmd_val(pmd) |= PMD_SECT_AP2;
*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
flush_pmd_entry(pmdp);
#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0)
+#define pte_isset(pte, val) ((u32)(val) == (val) ? pte_val(pte) & (val) \
+ : !!(pte_val(pte) & (val)))
+#define pte_isclear(pte, val) (!(pte_val(pte) & (val)))
+
#define pte_none(pte) (!pte_val(pte))
-#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT)
-#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID)
+#define pte_present(pte) (pte_isset((pte), L_PTE_PRESENT))
+#define pte_valid(pte) (pte_isset((pte), L_PTE_VALID))
#define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
-#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY))
-#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY)
-#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN))
+#define pte_write(pte) (pte_isclear((pte), L_PTE_RDONLY))
+#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY))
+#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG))
+#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN))
#define pte_special(pte) (0)
#define pte_valid_user(pte) \
- (pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
+ (pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
#if __LINUX_ARM_ARCH__ < 6
static inline void __sync_icache_dcache(pte_t pteval)
#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * The ARMv7 CPU PMU supports up to 32 event counters.
+ */
+#define ARMPMU_MAX_HWEVENTS 32
+
+#define HW_OP_UNSUPPORTED 0xFFFF
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED 0xFFFF
+
+#define PERF_MAP_ALL_UNSUPPORTED \
+ [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
+
+#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
+[0 ... C(MAX) - 1] = { \
+ [0 ... C(OP_MAX) - 1] = { \
+ [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
+ }, \
+}
+
/* The events for a given PMU register set. */
struct pmu_hw_events {
/*
#define instruction_pointer(regs) (regs)->ARM_pc
+#ifdef CONFIG_THUMB2_KERNEL
+#define frame_pointer(regs) (regs)->ARM_r7
+#else
+#define frame_pointer(regs) (regs)->ARM_fp
+#endif
+
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
static inline bool scu_a9_has_base(void)
{
- return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+ return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
}
static inline unsigned long scu_a9_get_base(void)
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
+#include <asm/ptrace.h>
+
struct stackframe {
+ /*
+ * FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled
+ * and R11 otherwise.
+ */
unsigned long fp;
unsigned long sp;
unsigned long lr;
unsigned long pc;
};
+static __always_inline
+void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame)
+{
+ frame->fp = frame_pointer(regs);
+ frame->sp = regs->ARM_sp;
+ frame->lr = regs->ARM_lr;
+ frame->pc = regs->ARM_pc;
+}
+
extern int unwind_frame(struct stackframe *frame);
extern void walk_stackframe(struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
#include <linux/compiler.h>
#include <asm/fpstate.h>
+#include <asm/page.h>
#define THREAD_SIZE_ORDER 1
-#define THREAD_SIZE 8192
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_START_SP (THREAD_SIZE - 8)
#ifndef __ASSEMBLY__
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
+extern int __get_user_lo8(void *);
+extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
+#define __GUP_CLOBBER_lo8 "lr", "cc"
+#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
+/* narrowing a double-word get into a single 32bit word register: */
+#ifdef __ARMEB__
+#define __get_user_xb(__r2, __p, __e, __l, __s) \
+ __get_user_x(__r2, __p, __e, __l, lo8)
+#else
+#define __get_user_xb __get_user_x
+#endif
+
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
- register unsigned long __r2 asm("r2"); \
+ register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
+ case 8: \
+ if (sizeof((x)) < 8) \
+ __get_user_xb(__r2, __p, __e, __l, 4); \
+ else \
+ __get_user_x(__r2, __p, __e, __l, 8); \
+ break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
#define user_addr_max() \
- (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+ (segment_eq(get_fs(), KERNEL_DS) ? ~0UL : get_fs())
/*
* The "__xxx" versions of the user access functions do not verify the
#include <uapi/asm/unistd.h>
+/*
+ * This may need to be greater than __NR_last_syscall+1 in order to
+ * account for the padding in the syscall table
+ */
#define __NR_syscalls (384)
+
+/*
+ * *NOTE*: This is a ghost syscall private to the kernel. Only the
+ * __kuser_cmpxchg code in entry-armv.S should be aware of its
+ * existence. Don't ever use this from user code.
+ */
#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0)
#define __ARCH_WANT_STAT64
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
-/*
- * This may need to be greater than __NR_last_syscall+1 in order to
- * account for the padding in the syscall table
- */
-
/*
* The following SWIs are ARM private.
*/
#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
-/*
- * *NOTE*: This is a ghost syscall private to the kernel. Only the
- * __kuser_cmpxchg code in entry-armv.S should be aware of its
- * existence. Don't ever use this from user code.
- */
-
/*
* The following syscalls are obsolete and no longer available for EABI.
*/
ldrneb r1, [r0], #1
teqne r1, #0
bne 1b
- mov pc, lr
+ ret lr
ENDPROC(printascii)
ENTRY(printch)
addruart r2, r3, ip
str r2, [r0]
str r3, [r1]
- mov pc, lr
+ ret lr
ENDPROC(debug_ll_addr)
#endif
mov r0, #0x04 @ SYS_WRITE0
ARM( svc #0x123456 )
THUMB( svc #0xab )
- mov pc, lr
+ ret lr
ENDPROC(printascii)
ENTRY(printch)
mov r0, #0x03 @ SYS_WRITEC
ARM( svc #0x123456 )
THUMB( svc #0xab )
- mov pc, lr
+ ret lr
ENDPROC(printch)
ENTRY(debug_ll_addr)
mov r2, #0
str r2, [r0]
str r2, [r1]
- mov pc, lr
+ ret lr
ENDPROC(debug_ll_addr)
#endif
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
- moveq pc, r8 @ go again
+ reteq r8 @ go again
b 1b
#endif
.pushsection .fixup, "ax"
.align 2
4: str r4, [sp, #S_PC] @ retry current instruction
- mov pc, r9
+ ret r9
.popsection
.pushsection __ex_table,"a"
.long 1b, 4b
#endif
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
- moveq pc, lr
+ reteq lr
and r8, r0, #0x00000f00 @ mask out CP number
THUMB( lsr r8, r8, #8 )
mov r7, #1
THUMB( add pc, r8 )
nop
- movw_pc lr @ CP#0
+ ret.w lr @ CP#0
W(b) do_fpe @ CP#1 (FPE)
W(b) do_fpe @ CP#2 (FPE)
- movw_pc lr @ CP#3
+ ret.w lr @ CP#3
#ifdef CONFIG_CRUNCH
b crunch_task_enable @ CP#4 (MaverickCrunch)
b crunch_task_enable @ CP#5 (MaverickCrunch)
b crunch_task_enable @ CP#6 (MaverickCrunch)
#else
- movw_pc lr @ CP#4
- movw_pc lr @ CP#5
- movw_pc lr @ CP#6
+ ret.w lr @ CP#4
+ ret.w lr @ CP#5
+ ret.w lr @ CP#6
#endif
- movw_pc lr @ CP#7
- movw_pc lr @ CP#8
- movw_pc lr @ CP#9
+ ret.w lr @ CP#7
+ ret.w lr @ CP#8
+ ret.w lr @ CP#9
#ifdef CONFIG_VFP
W(b) do_vfp @ CP#10 (VFP)
W(b) do_vfp @ CP#11 (VFP)
#else
- movw_pc lr @ CP#10 (VFP)
- movw_pc lr @ CP#11 (VFP)
+ ret.w lr @ CP#10 (VFP)
+ ret.w lr @ CP#11 (VFP)
#endif
- movw_pc lr @ CP#12
- movw_pc lr @ CP#13
- movw_pc lr @ CP#14 (Debug)
- movw_pc lr @ CP#15 (Control)
+ ret.w lr @ CP#12
+ ret.w lr @ CP#13
+ ret.w lr @ CP#14 (Debug)
+ ret.w lr @ CP#15 (Control)
#ifdef NEED_CPU_ARCHITECTURE
.align 2
.popsection
ENTRY(no_fp)
- mov pc, lr
+ ret lr
ENDPROC(no_fp)
__und_usr_fault_32:
#ifdef CONFIG_ARM_THUMB
bx \reg
#else
- mov pc, \reg
+ ret \reg
#endif
.endm
#if __LINUX_ARM_ARCH__ < 6
bcc kuser_cmpxchg32_fixup
#endif
- mov pc, lr
+ ret lr
.previous
#else
subs r8, r4, r7
rsbcss r8, r8, #(2b - 1b)
strcs r7, [sp, #S_PC]
- mov pc, lr
+ ret lr
.previous
#else
* published by the Free Software Foundation.
*/
+#include <asm/assembler.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/unwind.h>
cmp r5, #0
movne r0, r4
adrne lr, BSYM(1f)
- movne pc, r5
+ retne r5
1: get_thread_info tsk
b ret_slow_syscall
ENDPROC(ret_from_fork)
.macro mcount_exit
ldmia sp!, {r0-r3, ip, lr}
- mov pc, ip
+ ret ip
.endm
ENTRY(__gnu_mcount_nc)
#ifdef CONFIG_DYNAMIC_FTRACE
mov ip, lr
ldmia sp!, {lr}
- mov pc, ip
+ ret ip
#else
__mcount
#endif
bl ftrace_return_to_handler
mov lr, r0 @ r0 has real ret addr
ldmia sp!, {r0-r3}
- mov pc, lr
+ ret lr
#endif
ENTRY(ftrace_stub)
.Lftrace_stub:
- mov pc, lr
+ ret lr
ENDPROC(ftrace_stub)
#endif /* CONFIG_FUNCTION_TRACER */
streq r5, [sp, #4]
beq sys_mmap_pgoff
mov r0, #-EINVAL
- mov pc, lr
+ ret lr
#else
str r5, [sp, #4]
b sys_mmap_pgoff
movs pc, lr @ return & move spsr_svc into cpsr
.endm
- @
- @ 32-bit wide "mov pc, reg"
- @
- .macro movw_pc, reg
- mov pc, \reg
- .endm
#else /* CONFIG_THUMB2_KERNEL */
.macro svc_exit, rpsr, irq = 0
.if \irq != 0
movs pc, lr @ return & move spsr_svc into cpsr
.endm
#endif /* ifdef CONFIG_CPU_V7M / else */
-
- @
- @ 32-bit wide "mov pc, reg"
- @
- .macro movw_pc, reg
- mov pc, \reg
- nop
- .endm
#endif /* !CONFIG_THUMB2_KERNEL */
/*
ldr lr, [r0]
msr cpsr_c, r1 @ return to SVC mode
mov r0, r0 @ avoid hazard prior to ARMv4
- mov pc, lr
+ ret lr
ENDPROC(__set_fiq_regs)
ENTRY(__get_fiq_regs)
str lr, [r0]
msr cpsr_c, r1 @ return to SVC mode
mov r0, r0 @ avoid hazard prior to ARMv4
- mov pc, lr
+ ret lr
ENDPROC(__get_fiq_regs)
* published by the Free Software Foundation.
*
*/
+#include <asm/assembler.h>
#define ATAG_CORE 0x54410001
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
cmp r5, r6
bne 1f
-2: mov pc, lr @ atag/dtb pointer is ok
+2: ret lr @ atag/dtb pointer is ok
1: mov r2, #0
- mov pc, lr
+ ret lr
ENDPROC(__vet_atags)
/*
cmp r5, r6
blo 1b
mov r5, #0 @ unknown processor
-2: mov pc, lr
+2: ret lr
ENDPROC(__lookup_processor_type)
/*
adr lr, BSYM(1f) @ return (PIC) address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
1: b __after_proc_init
ENDPROC(stext)
mov r13, r12 @ __secondary_switched address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
ENDPROC(secondary_startup)
ENTRY(__secondary_switched)
#endif
mcr p15, 0, r0, c1, c0, 0 @ write control reg
#endif /* CONFIG_CPU_CP15 */
- mov pc, r13
+ ret r13
ENDPROC(__after_proc_init)
.ltorg
orr r0, r0, #CR_M @ Set SCTRL.M (MPU on)
mcr p15, 0, r0, c1, c0, 0 @ Enable MPU
isb
- mov pc,lr
+ ret lr
ENDPROC(__setup_mpu)
#endif
#include "head-common.S"
mov r8, r4 @ set TTBR1 to swapper_pg_dir
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
1: b __enable_mmu
ENDPROC(stext)
.ltorg
sub r4, r4, #0x1000 @ point to the PGD table
mov r4, r4, lsr #ARCH_PGD_SHIFT
#endif
- mov pc, lr
+ ret lr
ENDPROC(__create_page_tables)
.ltorg
.align
ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor
@ (return control reg)
THUMB( add r12, r10, #PROCINFO_INITFUNC )
- THUMB( mov pc, r12 )
+ THUMB( ret r12 )
ENDPROC(secondary_startup)
/*
instr_sync
mov r3, r3
mov r3, r13
- mov pc, r3
+ ret r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
.popsection
orr r4, r4, #0x0000b000
orr r4, r4, #0x00000020 @ val 0x4100b020
teq r3, r4 @ ARM 11MPCore?
- moveq pc, lr @ yes, assume SMP
+ reteq lr @ yes, assume SMP
mrc p15, 0, r0, c0, c0, 5 @ read MPIDR
and r0, r0, #0xc0000000 @ multiprocessing extensions and
orr r4, r4, #0x0000c000
orr r4, r4, #0x00000090
teq r3, r4 @ Check for ARM Cortex-A9
- movne pc, lr @ Not ARM Cortex-A9,
+ retne lr @ Not ARM Cortex-A9,
@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
@ below address check will need to be #ifdef'd or equivalent
ARM_BE8(rev r0, r0) @ byteswap if big endian
and r0, r0, #0x3 @ number of CPUs
teq r0, #0x0 @ is 1?
- movne pc, lr
+ retne lr
__fixup_smp_on_up:
adr r0, 1f
.text
__do_fixup_smp_on_up:
cmp r4, r5
- movhs pc, lr
+ reths lr
ldmia r4!, {r0, r6}
ARM( str r6, [r0, r3] )
THUMB( add r0, r0, r3 )
2: cmp r4, r5
ldrcc r7, [r4], #4 @ use branch for delay slot
bcc 1b
- mov pc, lr
+ ret lr
#endif
ENDPROC(__fixup_a_pv_table)
* immediately.
*/
compare_cpu_mode_with_primary r4, r5, r6, r7
- movne pc, lr
+ retne lr
/*
* Once we have given up on one CPU, we do not try to install the
*/
cmp r4, #HYP_MODE
- movne pc, lr @ give up if the CPU is not in HYP mode
+ retne lr @ give up if the CPU is not in HYP mode
/*
* Configure HSCTLR to set correct exception endianness/instruction set
@ fall through
ENTRY(__hyp_set_vectors)
__HVC(0)
- mov pc, lr
+ ret lr
ENDPROC(__hyp_set_vectors)
#ifndef ZIMAGE
get_thread_info r10
#endif
4: dec_preempt_count r10, r3
- mov pc, r9 @ normal exit from exception
+ ret r9 @ normal exit from exception
concan_save:
wstrd wR15, [r1, #MMX_WR15]
2: teq r0, #0 @ anything to load?
- moveq pc, lr @ if not, return
+ reteq lr @ if not, return
concan_load:
@ clear CUP/MUP (only if r1 != 0)
teq r1, #0
mov r2, #0
- moveq pc, lr
+ reteq lr
tmcr wCon, r2
- mov pc, lr
+ ret lr
/*
* Back up Concan regs to save area and disable access to them
mov r3, lr @ preserve return address
bl concan_dump
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
/*
* Restore Concan state from given memory address
mov r3, lr @ preserve return address
bl concan_load
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
/*
* Concan handling on task switch
add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area
ldr r2, [r2] @ get current Concan owner
teq r2, r3 @ next task owns it?
- movne pc, lr @ no: leave Concan disabled
+ retne lr @ no: leave Concan disabled
1: @ flip Concan access
XSC(eor r1, r1, #0x3)
eors r0, r0, r1 @ if equal...
streq r0, [r3] @ then clear ownership
msr cpsr_c, r2 @ restore interrupts
- mov pc, lr
+ ret lr
.data
concan_owner:
struct perf_callchain_entry *entry)
{
struct frame_tail buftail;
+ unsigned long err;
- /* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
return NULL;
- if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
return NULL;
perf_callchain_store(entry, buftail.lr);
}
perf_callchain_store(entry, regs->ARM_pc);
+
+ if (!current->mm)
+ return;
+
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
return;
}
- fr.fp = regs->ARM_fp;
- fr.sp = regs->ARM_sp;
- fr.lr = regs->ARM_lr;
- fr.pc = regs->ARM_pc;
+ arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry);
}
{.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
{.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
- {.compatible = "arm,arm1176-pmu", .data = armv6pmu_init},
- {.compatible = "arm,arm1136-pmu", .data = armv6pmu_init},
+ {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
+ {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
{.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
{},
};
static struct platform_device_id cpu_pmu_plat_device_ids[] = {
{.name = "arm-pmu"},
+ {.name = "armv6-pmu"},
+ {.name = "armv7-pmu"},
+ {.name = "xscale-pmu"},
{},
};
static int probe_current_pmu(struct arm_pmu *pmu)
{
int cpu = get_cpu();
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
int ret = -ENODEV;
pr_info("probing PMU on CPU %d\n", cpu);
+ switch (read_cpuid_part()) {
/* ARM Ltd CPUs. */
- if (implementor == ARM_CPU_IMP_ARM) {
- switch (part_number) {
- case ARM_CPU_PART_ARM1136:
- case ARM_CPU_PART_ARM1156:
- case ARM_CPU_PART_ARM1176:
- ret = armv6pmu_init(pmu);
- break;
- case ARM_CPU_PART_ARM11MPCORE:
- ret = armv6mpcore_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A8:
- ret = armv7_a8_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A9:
- ret = armv7_a9_pmu_init(pmu);
- break;
- }
- /* Intel CPUs [xscale]. */
- } else if (implementor == ARM_CPU_IMP_INTEL) {
- switch (xscale_cpu_arch_version()) {
- case ARM_CPU_XSCALE_ARCH_V1:
- ret = xscale1pmu_init(pmu);
- break;
- case ARM_CPU_XSCALE_ARCH_V2:
- ret = xscale2pmu_init(pmu);
- break;
+ case ARM_CPU_PART_ARM1136:
+ ret = armv6_1136_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM1156:
+ ret = armv6_1156_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM1176:
+ ret = armv6_1176_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM11MPCORE:
+ ret = armv6mpcore_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_CORTEX_A8:
+ ret = armv7_a8_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_CORTEX_A9:
+ ret = armv7_a9_pmu_init(pmu);
+ break;
+
+ default:
+ if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+ switch (xscale_cpu_arch_version()) {
+ case ARM_CPU_XSCALE_ARCH_V1:
+ ret = xscale1pmu_init(pmu);
+ break;
+ case ARM_CPU_XSCALE_ARCH_V2:
+ ret = xscale2pmu_init(pmu);
+ break;
+ }
}
+ break;
}
put_cpu();
* accesses/misses in hardware.
*/
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
};
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- /*
- * The ARM performance counters can count micro DTLB misses,
- * micro ITLB misses and main TLB misses. There isn't an event
- * for TLB misses, so use the micro misses here and if users
- * want the main TLB misses they can use a raw counter.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
};
enum armv6mpcore_perf_types {
* accesses/misses in hardware.
*/
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
};
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
- [C(RESULT_MISS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
- [C(RESULT_MISS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- /*
- * The ARM performance counters can count micro DTLB misses,
- * micro ITLB misses and main TLB misses. There isn't an event
- * for TLB misses, so use the micro misses here and if users
- * want the main TLB misses they can use a raw counter.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
+
+ /*
+ * The ARM performance counters can count micro DTLB misses, micro ITLB
+ * misses and main TLB misses. There isn't an event for TLB misses, so
+ * use the micro misses here and if users want the main TLB misses they
+ * can use a raw counter.
+ */
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
};
static inline unsigned long
&armv6_perf_cache_map, 0xFF);
}
-static int armv6pmu_init(struct arm_pmu *cpu_pmu)
+static void armv6pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "v6";
cpu_pmu->handle_irq = armv6pmu_handle_irq;
cpu_pmu->enable = armv6pmu_enable_event;
cpu_pmu->disable = armv6pmu_disable_event;
cpu_pmu->map_event = armv6_map_event;
cpu_pmu->num_events = 3;
cpu_pmu->max_period = (1LLU << 32) - 1;
+}
+
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1136";
+ return 0;
+}
+static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1156";
+ return 0;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv6pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv6_1176";
return 0;
}
static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "v6mpcore";
+ cpu_pmu->name = "armv6_11mpcore";
cpu_pmu->handle_irq = armv6pmu_handle_irq;
cpu_pmu->enable = armv6pmu_enable_event;
cpu_pmu->disable = armv6mpcore_pmu_disable_event;
return 0;
}
#else
-static int armv6pmu_init(struct arm_pmu *cpu_pmu)
+static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return -ENODEV;
+}
+
+static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return -ENODEV;
+}
+
+static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
{
return -ENODEV;
}
* accesses/misses in hardware.
*/
static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A9 HW events mapping
*/
static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH,
};
static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A5 HW events mapping
*/
static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
- [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- /*
- * The prefetch counters don't differentiate between the I
- * side and the D side.
- */
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
- [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+ /*
+ * The prefetch counters don't differentiate between the I side and the
+ * D side.
+ */
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A15 HW events mapping
*/
static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- /*
- * Not all performance counters differentiate between read
- * and write accesses/misses so we're not always strictly
- * correct, but it's the best we can do. Writes and reads get
- * combined in these cases.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A7 HW events mapping
*/
static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Cortex-A12 HW events mapping
*/
static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- /*
- * Not all performance counters differentiate between read
- * and write accesses/misses so we're not always strictly
- * correct, but it's the best we can do. Writes and reads get
- * combined in these cases.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ /*
+ * Not all performance counters differentiate between read and write
+ * accesses/misses so we're not always strictly correct, but it's the
+ * best we can do. Writes and reads get combined in these cases.
+ */
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL,
+
+ [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ,
+ [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+ [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE,
+ [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
+ [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Krait HW events mapping
*/
static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
};
static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
- [C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
- [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ /*
+ * The performance counters don't differentiate between read and write
+ * accesses/misses so this isn't strictly correct, but it's the best we
+ * can do. Writes and reads get combined.
+ */
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A8";
+ cpu_pmu->name = "armv7_cortex_a8";
cpu_pmu->map_event = armv7_a8_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A9";
+ cpu_pmu->name = "armv7_cortex_a9";
cpu_pmu->map_event = armv7_a9_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A5";
+ cpu_pmu->name = "armv7_cortex_a5";
cpu_pmu->map_event = armv7_a5_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A15";
+ cpu_pmu->name = "armv7_cortex_a15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A7";
+ cpu_pmu->name = "armv7_cortex_a7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A12";
+ cpu_pmu->name = "armv7_cortex_a12";
cpu_pmu->map_event = armv7_a12_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7_a12_pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A17";
+ cpu_pmu->name = "armv7_cortex_a17";
return 0;
}
unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
/* Disable counter and interrupt */
unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
/*
static int krait_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Krait";
+ cpu_pmu->name = "armv7_krait";
/* Some early versions of Krait don't support PC write events */
if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node,
"qcom,no-pc-write"))
};
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(NODE)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
+ [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
};
#define XSCALE_PMU_ENABLE 0x001
static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "xscale1";
+ cpu_pmu->name = "armv5_xscale1";
cpu_pmu->handle_irq = xscale1pmu_handle_irq;
cpu_pmu->enable = xscale1pmu_enable_event;
cpu_pmu->disable = xscale1pmu_disable_event;
static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
{
- cpu_pmu->name = "xscale2";
+ cpu_pmu->name = "armv5_xscale2";
cpu_pmu->handle_irq = xscale2pmu_handle_irq;
cpu_pmu->enable = xscale2pmu_enable_event;
cpu_pmu->disable = xscale2pmu_disable_event;
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/kexec.h>
.align 3 /* not needed for this code, but keeps fncpy() happy */
mov r0,#0
ldr r1,kexec_mach_type
ldr r2,kexec_boot_atags
- ARM( mov pc, lr )
+ ARM( ret lr )
THUMB( bx lr )
.align
elf_hwcap |= HWCAP_LPAE;
}
-static void __init feat_v6_fixup(void)
+static void __init elf_hwcap_fixup(void)
{
- int id = read_cpuid_id();
-
- if ((id & 0xff0f0000) != 0x41070000)
- return;
+ unsigned id = read_cpuid_id();
+ unsigned sync_prim;
/*
* HWCAP_TLS is available only on 1136 r1p0 and later,
* see also kuser_get_tls_init.
*/
- if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
+ if (read_cpuid_part() == ARM_CPU_PART_ARM1136 &&
+ ((id >> 20) & 3) == 0) {
elf_hwcap &= ~HWCAP_TLS;
+ return;
+ }
+
+ /* Verify if CPUID scheme is implemented */
+ if ((id & 0x000f0000) != 0x000f0000)
+ return;
+
+ /*
+ * If the CPU supports LDREX/STREX and LDREXB/STREXB,
+ * avoid advertising SWP; it may not be atomic with
+ * multiprocessing cores.
+ */
+ sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
+ ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
+ if (sync_prim >= 0x13)
+ elf_hwcap &= ~HWCAP_SWP;
}
/*
#endif
erratum_a15_798181_init();
- feat_v6_fixup();
+ elf_hwcap_fixup();
cacheid_init();
cpu_init();
instr_sync
mov r0, r0
mov r0, r0
- mov pc, r3 @ jump to virtual address
+ ret r3 @ jump to virtual address
ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:
#include <asm/cputype.h>
#define SCU_CTRL 0x00
+#define SCU_ENABLE (1 << 0)
+#define SCU_STANDBY_ENABLE (1 << 5)
#define SCU_CONFIG 0x04
#define SCU_CPU_STATUS 0x08
#define SCU_INVALIDATE 0x0c
scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
/* already enabled? */
- if (scu_ctrl & 1)
+ if (scu_ctrl & SCU_ENABLE)
return;
- scu_ctrl |= 1;
+ scu_ctrl |= SCU_ENABLE;
+
+ /* Cortex-A9 earlier than r2p0 has no standby bit in SCU */
+ if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 &&
+ (read_cpuid_id() & 0x00f0000f) >= 0x00200000)
+ scu_ctrl |= SCU_STANDBY_ENABLE;
+
writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
/*
unsigned int midr = read_cpuid_id();
unsigned int revidr = read_cpuid(CPUID_REVIDR);
- /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
- if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 ||
- (revidr & 0x210) == 0x210) {
- return;
- }
- if (revidr & 0x10)
- erratum_a15_798181_handler = erratum_a15_798181_partial;
- else
+ /* Brahma-B15 r0p0..r0p2 affected
+ * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
+ if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2)
erratum_a15_798181_handler = erratum_a15_798181_broadcast;
+ else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 &&
+ (revidr & 0x210) != 0x210) {
+ if (revidr & 0x10)
+ erratum_a15_798181_handler =
+ erratum_a15_798181_partial;
+ else
+ erratum_a15_798181_handler =
+ erratum_a15_798181_broadcast;
+ }
}
#endif
#include <linux/perf_event.h>
#include <asm/opcodes.h>
+#include <asm/system_info.h>
#include <asm/traps.h>
#include <asm/uaccess.h>
*/
static int __init swp_emulation_init(void)
{
+ if (cpu_architecture() < CPU_ARCH_ARMv7)
+ return 0;
+
#ifdef CONFIG_PROC_FS
if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
return -ENOMEM;
if (!in_lock_functions(regs->ARM_pc))
return regs->ARM_pc;
- frame.fp = regs->ARM_fp;
- frame.sp = regs->ARM_sp;
- frame.lr = regs->ARM_lr;
- frame.pc = regs->ARM_pc;
+ arm_get_current_stackframe(regs, &frame);
do {
int ret = unwind_frame(&frame);
if (ret < 0)
#include <asm/exception.h>
#include <asm/unistd.h>
#include <asm/traps.h>
+#include <asm/ptrace.h>
#include <asm/unwind.h>
#include <asm/tls.h>
#include <asm/system_misc.h>
#include <asm/opcodes.h>
+
static const char *handler[]= {
"prefetch abort",
"data abort",
tsk = current;
if (regs) {
- fp = regs->ARM_fp;
+ fp = frame_pointer(regs);
mode = processor_mode(regs);
} else if (tsk != current) {
fp = thread_saved_fp(tsk);
dump_instr("", regs);
if (user_mode(regs)) {
__show_regs(regs);
- c_backtrace(regs->ARM_fp, processor_mode(regs));
+ c_backtrace(frame_pointer(regs), processor_mode(regs));
}
}
#endif
tsk = current;
if (regs) {
- frame.fp = regs->ARM_fp;
- frame.sp = regs->ARM_sp;
- frame.lr = regs->ARM_lr;
+ arm_get_current_stackframe(regs, &frame);
/* PC might be corrupted, use LR in that case. */
- frame.pc = kernel_text_address(regs->ARM_pc)
- ? regs->ARM_pc : regs->ARM_lr;
+ if (!kernel_text_address(regs->ARM_pc))
+ frame.pc = regs->ARM_lr;
} else if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_sp;
_end = .;
STABS_DEBUG
- .comment 0 : { *(.comment) }
}
/*
int __attribute_const__ kvm_target_cpu(void)
{
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
-
- if (implementor != ARM_CPU_IMP_ARM)
- return -EINVAL;
-
- switch (part_number) {
+ switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
ldr r0, =TRAMPOLINE_VA
adr r1, target
bfi r0, r1, #0, #PAGE_SHIFT
- mov pc, r0
+ ret r0
target: @ We're now in the trampoline code, switch page tables
mcrr p15, 4, r2, r3, c2
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
THUMB( lsrmi r3, al, ip )
THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
- mov pc, lr
+ ret lr
ENDPROC(__ashldi3)
ENDPROC(__aeabi_llsl)
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, asr r2
- mov pc, lr
+ ret lr
ENDPROC(__ashrdi3)
ENDPROC(__aeabi_lasr)
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
- mov pc, lr
+ ret lr
ENDPROC(c_backtrace)
#else
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
+#include <asm/assembler.h>
#include <asm/unwind.h>
#if __LINUX_ARM_ARCH__ >= 6
\instr r2, r2, r3
str r2, [r1, r0, lsl #2]
restore_irqs ip
- mov pc, lr
+ ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
\store r2, [r1]
moveq r0, #0
restore_irqs ip
- mov pc, lr
+ ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
#include <linux/linkage.h>
+#include <asm/assembler.h>
#if __LINUX_ARM_ARCH__ >= 6
ENTRY(__bswapsi2)
mov r3, r3, lsr #8
bic r3, r3, #0xff00
eor r0, r3, r0, ror #8
- mov pc, lr
+ ret lr
ENDPROC(__bswapsi2)
ENTRY(__bswapdi2)
bic r1, r1, #0xff00
eor r1, r1, r0, ror #8
eor r0, r3, ip, ror #8
- mov pc, lr
+ ret lr
ENDPROC(__bswapdi2)
#endif
mov r0, r1
adr lr, BSYM(1f)
- mov pc, r2
+ ret r2
1: ldr lr, [sp]
ldr sp, [sp, #4]
- mov pc, lr
+ ret lr
ENDPROC(call_with_stack)
#endif
#endif
adcnes sum, sum, td0 @ update checksum
- mov pc, lr
+ ret lr
ENTRY(csum_partial)
stmfd sp!, {buf, lr}
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <asm/assembler.h>
/*
* unsigned int
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst dst, #2
- moveq pc, lr @ dst is now 32bit aligned
+ reteq lr @ dst is now 32bit aligned
.Ldst_16bit: load2b r8, ip
sub len, len, #2
strb r8, [dst], #1
adcs sum, sum, ip, put_byte_1
strb ip, [dst], #1
- mov pc, lr @ dst is now 32bit aligned
+ ret lr @ dst is now 32bit aligned
/*
* Handle 0 to 7 bytes, with any alignment of source and
mul r0, r2, r0 @ max = 2^32-1
add r0, r0, r1, lsr #32-6
movs r0, r0, lsr #6
- moveq pc, lr
+ reteq lr
/*
* loops = r0 * HZ * loops_per_jiffy / 1000000
ENTRY(__loop_delay)
subs r0, r0, #1
#if 0
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
- movls pc, lr
+ retls lr
subs r0, r0, #1
#endif
bhi __loop_delay
- mov pc, lr
+ ret lr
ENDPROC(__loop_udelay)
ENDPROC(__loop_const_udelay)
ENDPROC(__loop_delay)
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unwind.h>
#ifdef __ARMEB__
mov yl, #0
cmpeq xl, r4
movlo xh, xl
- movlo pc, lr
+ retlo lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
- mov pc, lr
+ ret lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
- moveq pc, lr
+ reteq lr
@ We still have remainer bits in the low part. Bring them up.
movs ip, ip, lsr #1
mov xh, #1
bne 4b
- mov pc, lr
+ ret lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
- mov pc, lr
+ ret lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
- moveq pc, lr
+ reteq lr
UNWIND(.fnend)
UNWIND(.fnstart)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_zero_bit_le)
/*
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_bit_le)
/*
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- mov pc, lr
+ ret lr
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
#endif
cmp r1, r0 @ Clamp to maxbit
movlo r0, r1
- mov pc, lr
+ ret lr
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2 contains the zero-extended value
+ * r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
orr r2, rb, r2, lsl #8
#endif
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
check_uaccess r0, 4, r1, r2, __get_user_bad
4: TUSER(ldr) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__get_user_4)
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_lo8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_lo8)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+#endif
.popsection
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne .Linsb_align
ENTRY(__raw_readsl)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne 3f
stmcsia r1!, {r3, ip}
ldrne r3, [r0, #0]
strne r3, [r1, #0]
- mov pc, lr
+ ret lr
3: ldr r3, [r0]
cmp ip, #2
strb r3, [r1, #1]
8: mov r3, ip, get_byte_0
strb r3, [r1, #0]
- mov pc, lr
+ ret lr
ENDPROC(__raw_readsl)
strb r3, [r1], #1
subs r2, r2, #1
- moveq pc, lr
+ reteq lr
ENTRY(__raw_readsw)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
tst r1, #3
bne .Linsw_align
ENTRY(__raw_readsw)
teq r2, #0
- moveq pc, lr
+ reteq lr
tst r1, #3
bne .Linsw_align
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne .Loutsb_align
ENTRY(__raw_writesl)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
ands ip, r1, #3
bne 3f
ldrne r3, [r1, #0]
strcs ip, [r0, #0]
strne r3, [r0, #0]
- mov pc, lr
+ ret lr
3: bic r1, r1, #3
ldr r3, [r1], #4
orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
- mov pc, lr
+ ret lr
5: mov ip, r3, lspull #8
ldr r3, [r1], #4
orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
- mov pc, lr
+ ret lr
6: mov ip, r3, lspull #24
ldr r3, [r1], #4
orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
- mov pc, lr
+ ret lr
ENDPROC(__raw_writesl)
orr r3, r3, r3, lsl #16
str r3, [r0]
subs r2, r2, #1
- moveq pc, lr
+ reteq lr
ENTRY(__raw_writesw)
teq r2, #0 @ do we have to check for the zero len?
- moveq pc, lr
+ reteq lr
tst r1, #3
bne .Loutsw_align
ENTRY(__raw_writesw)
teq r2, #0
- moveq pc, lr
+ reteq lr
ands r3, r1, #3
bne .Loutsw_align
tst r2, #1
3: movne ip, r3, lsr #8
strneh ip, [r0]
- mov pc, lr
+ ret lr
ENDPROC(__raw_writesw)
UNWIND(.fnstart)
subs r2, r1, #1
- moveq pc, lr
+ reteq lr
bcc Ldiv0
cmp r0, r1
bls 11f
ARM_DIV_BODY r0, r1, r2, r3
mov r0, r2
- mov pc, lr
+ ret lr
11: moveq r0, #1
movne r0, #0
- mov pc, lr
+ ret lr
12: ARM_DIV2_ORDER r1, r2
mov r0, r0, lsr r2
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__udivsi3)
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
- movls pc, lr
+ retls lr
ARM_MOD_BODY r0, r1, r2, r3
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__umodsi3)
cmp ip, #0
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
10: teq ip, r0 @ same sign ?
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
11: movlo r0, #0
moveq r0, ip, asr #31
orreq r0, r0, #1
- mov pc, lr
+ ret lr
12: ARM_DIV2_ORDER r1, r2
cmp ip, #0
mov r0, r3, lsr r2
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__divsi3)
10: cmp ip, #0
rsbmi r0, r0, #0
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__modsi3)
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
- mov pc, lr
+ ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, lsr r2
- mov pc, lr
+ ret lr
ENDPROC(__lshrdi3)
ENDPROC(__aeabi_llsr)
bne 1b
sub r0, r0, #1
2: movne r0, #0
- mov pc, lr
+ ret lr
ENDPROC(memchr)
strneb r1, [ip], #1
tst r2, #1
strneb r1, [ip], #1
- mov pc, lr
+ ret lr
6: subs r2, r2, #4 @ 1 do we have enough
blt 5b @ 1 bytes to align with?
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
strneb r2, [r0], #1 @ 1
- mov pc, lr @ 1
+ ret lr @ 1
ENDPROC(__memzero)
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
adc xh, xh, yh, lsr #16
adds xl, xl, ip, lsl #16
adc xh, xh, ip, lsr #16
- mov pc, lr
+ ret lr
ENDPROC(__muldi3)
ENDPROC(__aeabi_lmul)
check_uaccess r0, 1, r1, ip, __put_user_bad
1: TUSER(strb) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
#endif
#endif /* CONFIG_THUMB2_KERNEL */
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
check_uaccess r0, 4, r1, ip, __put_user_bad
4: TUSER(str) r2, [r0]
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
6: TUSER(str) r3, [r0]
#endif
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(__put_user_8)
__put_user_bad:
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
ENDPROC(__put_user_bad)
.pushsection __ex_table, "a"
teq r2, r1
movne r0, #0
subeq r0, r0, #1
- mov pc, lr
+ ret lr
ENDPROC(strchr)
teq r2, #0
bne 1b
mov r0, r3
- mov pc, lr
+ ret lr
ENDPROC(strrchr)
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
movlo r0, #0
moveq r0, #1
movhi r0, #2
- mov pc, lr
+ ret lr
ENDPROC(__ucmpdi2)
movlo r0, #-1
moveq r0, #0
movhi r0, #1
- mov pc, lr
+ ret lr
ENDPROC(__aeabi_ulcmp)
cmp ip, r0
bne ddr2clk_stop_done
- mov pc, lr
+ ret lr
ENDPROC(davinci_ddr_psc_config)
CACHE_FLUSH:
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
/*
* Cache flushing area - SRAM
*/
get_thread_info r10
#endif
2: dec_preempt_count r10, r3
- mov pc, lr
+ ret lr
/*
* Back up crunch regs to save area and disable access to them
mov r3, lr @ preserve return address
bl crunch_save
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
/*
* Restore crunch state from given memory address
mov r3, lr @ preserve return address
bl crunch_load
msr cpsr_c, ip @ restore interrupt mode
- mov pc, r3
+ ret r3
+++ /dev/null
-/*
- * arch/arm/mach-ep93xx/include/mach/memory.h
- */
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xc0000000)
-#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xd0000000)
-#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xe0000000)
-#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)
-#define PLAT_PHYS_OFFSET UL(0xf0000000)
-#else
-#error "Kconfig bug: No EP93xx PHYS_OFFSET set"
-#endif
-
-#endif
bool "Exynos5420 Multi-Cluster PM support"
depends on MCPM && SOC_EXYNOS5420
select ARM_CCI
+ select ARM_CPU_SUSPEND
help
This is needed to provide CPU and cluster power management
on Exynos5420 implementing big.LITTLE.
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
arch_spin_unlock(&exynos_mcpm_lock);
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/*
* On the Cortex-A15 we need to disable
* L2 prefetching before flushing the cache.
"b cci_enable_port_for_self");
}
+static void __init exynos_cache_off(void)
+{
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
+ /* disable L2 prefetching on the Cortex-A15 */
+ asm volatile(
+ "mcr p15, 1, %0, c15, c0, 3\n\t"
+ "isb\n\t"
+ "dsb"
+ : : "r" (0x400));
+ }
+ exynos_v7_exit_coherency_flush(all);
+}
+
static const struct of_device_id exynos_dt_mcpm_match[] = {
{ .compatible = "samsung,exynos5420" },
{ .compatible = "samsung,exynos5800" },
ret = mcpm_platform_register(&exynos_power_ops);
if (!ret)
ret = mcpm_sync_init(exynos_pm_power_up_setup);
+ if (!ret)
+ ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */
if (ret) {
iounmap(ns_sram_base_addr);
return ret;
void __iomem *scu_base = scu_base_addr();
unsigned int i, ncores;
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
ncores = scu_base ? scu_get_core_count(scu_base) : 1;
else
/*
exynos_sysram_init();
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(scu_base_addr());
/*
tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0);
__raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION);
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_save_register();
return 0;
if (exynos_pm_central_resume())
goto early_wakeup;
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_restore_register();
/* For release retention */
s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save));
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(S5P_VA_SCU);
early_wakeup:
case CPU_PM_ENTER:
if (cpu == 0) {
exynos_pm_central_suspend();
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_save_register();
}
break;
case CPU_PM_EXIT:
if (cpu == 0) {
- if (read_cpuid_part_number() ==
- ARM_CPU_PART_CORTEX_A9) {
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
scu_enable(S5P_VA_SCU);
exynos_cpu_restore_register();
}
*/
#define FLUSH_BASE 0xf9000000
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
#define FLUSH_BASE_PHYS 0x50000000
#endif
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hardware/cache-l2x0.h>
#include "hardware.h"
resume_mmdc
/* return to suspend finish */
- mov pc, lr
+ ret lr
resume:
/* invalidate L1 I-cache first */
mov r5, #0x1
resume_mmdc
- mov pc, lr
+ ret lr
ENDPROC(imx6_suspend)
/*
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
#define BUS_OFFSET UL(0x80000000)
#define __virt_to_bus(x) ((x) - PAGE_OFFSET + BUS_OFFSET)
#define __bus_to_virt(x) ((x) - BUS_OFFSET + PAGE_OFFSET)
#ifndef __ASSEMBLY__
-#include <linux/reboot.h>
+enum reboot_mode;
/* The ATU offsets can change based on the strapping */
extern u32 iop13xx_atux_pmmr_offset;
#include <mach/hardware.h>
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-
#ifndef __ASSEMBLY__
#if defined(CONFIG_ARCH_IOP13XX)
#include <linux/dma-mapping.h>
#include <linux/serial_8250.h>
#include <linux/io.h>
+#include <linux/reboot.h>
#ifdef CONFIG_MTD_PHYSMAP
#include <linux/mtd/physmap.h>
#endif
#include <mach/hardware.h>
-/*
- * Physical SRAM offset.
- */
-#define PLAT_PHYS_OFFSET KS8695_SDRAM_PA
-
#ifndef __ASSEMBLY__
#ifdef CONFIG_PCI
ldr r1, =coherency_base
ldr r1, [r1]
2:
- mov pc, lr
+ ret lr
ENDPROC(ll_get_coherency_base)
/*
mov r2, #(1 << 24)
lsl r3, r2, r3
ARM_BE8(rev r3, r3)
- mov pc, lr
+ ret lr
ENDPROC(ll_get_coherency_cpumask)
/*
strex r1, r2, [r0]
cmp r1, #0
bne 1b
- mov pc, lr
+ ret lr
ENDPROC(ll_add_cpu_to_smp_group)
ENTRY(ll_enable_coherency)
bne 1b
dsb
mov r0, #0
- mov pc, lr
+ ret lr
ENDPROC(ll_enable_coherency)
ENTRY(ll_disable_coherency)
cmp r1, #0
bne 1b
dsb
- mov pc, lr
+ ret lr
ENDPROC(ll_disable_coherency)
.align 2
ldr r0, [r0]
ldr r1, [r0]
ARM_BE8(rev r1, r1)
- mov pc, r1
+ ret r1
1:
.word CPU_RESUME_ADDR_REG
armada_375_smp_cpu1_enable_code_end:
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x10000000)
-
/*
* Bus address is physical address, except for OMAP-1510 Local Bus.
* OMAP-1510 bus address is translated into a Local Bus address if the
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/smp_scu.h>
#include <asm/memory.h>
#include <asm/hardware/cache-l2x0.h>
#ifndef CONFIG_OMAP4_ERRATA_I688
ENTRY(omap_bus_sync)
- mov pc, lr
+ ret lr
ENDPROC(omap_bus_sync)
#endif
i_dll_delay:
subs r4, r4, #0x1
bne i_dll_delay
- mov pc, lr
+ ret lr
/*
* shift up or down voltage, use R9 as input to tell level.
ldr r7, [r3] @ get timer value
cmp r5, r7 @ time up?
bhi volt_delay @ not yet->branch
- mov pc, lr @ back to caller.
+ ret lr @ back to caller.
omap242x_sdi_cm_clksel2_pll:
.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
ldr r7, [r10] @ get timer value
cmp r8, r7 @ time up?
bhi volt_delay_c @ not yet->branch
- mov pc, lr @ back to caller
+ ret lr @ back to caller
omap242x_srs_cm_clksel2_pll:
.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
i_dll_delay:
subs r4, r4, #0x1
bne i_dll_delay
- mov pc, lr
+ ret lr
/*
* shift up or down voltage, use R9 as input to tell level.
ldr r7, [r3] @ get timer value
cmp r5, r7 @ time up?
bhi volt_delay @ not yet->branch
- mov pc, lr @ back to caller.
+ ret lr @ back to caller.
omap243x_sdi_cm_clksel2_pll:
.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
ldr r7, [r10] @ get timer value
cmp r8, r7 @ time up?
bhi volt_delay_c @ not yet->branch
- mov pc, lr @ back to caller
+ ret lr @ back to caller
omap243x_srs_cm_clksel2_pll:
.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
str r1, [r0] @ Early disable resume for next boot
ldr r0, mioa701_jumpaddr @ (Murphy's Law)
ldr r0, [r0]
- mov pc, r0
+ ret r0
2:
ENTRY(mioa701_bootstrap_lg)
.align 5
1: mcr p14, 0, r2, c7, c0, 0 @ put the system into Standby
str r1, [r0] @ make sure PSSR_PH/STS are clear
- mov pc, lr
+ ret lr
#endif
bic r0, r0, #0x20000000
str r0, [r1, #PXA3_DMCIER]
- mov pc, lr
+ ret lr
ENTRY(pm_enter_standby_end)
#endif
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET
-#define PLAT_PHYS_OFFSET UL(0x70000000)
-#else
-#define PLAT_PHYS_OFFSET UL(0x00000000)
-#endif
-
#ifdef CONFIG_SPARSEMEM
/*
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET UL(0x10000000)
-
/*
* Cache flushing area - ROM
*/
streq r8, [r5] @ SDRAM power-down config
streq r9, [r6] @ CPU sleep
1: beq 1b
- mov pc, r14
+ ret lr
strne r9, [r3]
bne s3c2412_sleep_enter1
- mov pc, r14
+ ret lr
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
-#define PLAT_PHYS_OFFSET UL(0x20000000)
-
/*
* Sparsemem support
* Physical memory can be located from 0x20000000 to 0x7fffffff,
#include <asm/sizes.h>
-/*
- * Physical DRAM offset is 0xc0000000 on the SA1100
- */
-#define PLAT_PHYS_OFFSET UL(0xc0000000)
-
/*
* Because of the wide memory address space between physical RAM banks on the
* SA1100, it's much convenient to use Linux's SparseMEM support to implement
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/memory.h>
ENTRY(shmobile_invalidate_start)
shmobile_smp_boot_found:
ldr r0, [r7, r1, lsl #2]
- mov pc, r9
+ ret r9
ENDPROC(shmobile_smp_boot)
ENTRY(shmobile_smp_sleep)
/* Put this CPU down */
cpu_id r0
bl tegra20_cpu_shutdown
- mov pc, lr @ should never get here
+ ret lr @ should never get here
ENDPROC(tegra20_hotplug_shutdown)
/*
*/
ENTRY(tegra20_cpu_shutdown)
cmp r0, #0
- moveq pc, lr @ must not be called for CPU 0
+ reteq lr @ must not be called for CPU 0
mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
mov r12, #CPU_RESETTABLE
str r12, [r1]
cpu_id r3
cmp r3, r0
beq .
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_shutdown)
#endif
cmpeq r12, r0 @ !turn == cpu?
beq 1b @ while !turn == cpu && flag[!cpu] == 1
- mov pc, lr @ locked
+ ret lr @ locked
ENDPROC(tegra_pen_lock)
ENTRY(tegra_pen_unlock)
addne r2, r3, #PMC_SCRATCH39
mov r12, #0
str r12, [r2]
- mov pc, lr
+ ret lr
ENDPROC(tegra_pen_unlock)
/*
mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
mov r12, #CPU_NOT_RESETTABLE
str r12, [r1]
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_clear_resettable)
/*
mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
mov r12, #CPU_RESETTABLE_SOON
str r12, [r1]
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_set_resettable_soon)
/*
cmp r12, #CPU_RESETTABLE_SOON
moveq r0, #1
movne r0, #0
- mov pc, lr
+ ret lr
ENDPROC(tegra20_cpu_is_resettable_soon)
/*
mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA
add r0, r0, r1
- mov pc, r3
+ ret r3
ENDPROC(tegra20_sleep_core_finish)
/*
mov32 r0, TEGRA_PMC_BASE
ldr r0, [r0, #PMC_SCRATCH41]
- mov pc, r0 @ jump to tegra_resume
+ ret r0 @ jump to tegra_resume
ENDPROC(tegra20_lp1_reset)
/*
mov r0, #0 /* brust policy = 32KHz */
str r0, [r5, #CLK_RESET_SCLK_BURST]
- mov pc, lr
+ ret lr
/*
* tegra20_enter_sleep
adr r2, tegra20_sclk_save
str r0, [r2]
dsb
- mov pc, lr
+ ret lr
tegra20_sdram_pad_address:
.word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL
/* Powergate this CPU */
mov r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN
bl tegra30_cpu_shutdown
- mov pc, lr @ should never get here
+ ret lr @ should never get here
ENDPROC(tegra30_hotplug_shutdown)
/*
bne _no_cpu0_chk @ It's not Tegra30
cmp r3, #0
- moveq pc, lr @ Must never be called for CPU 0
+ reteq lr @ Must never be called for CPU 0
_no_cpu0_chk:
ldr r12, =TEGRA_FLOW_CTRL_VIRT
mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA
add r0, r0, r1
- mov pc, r3
+ ret r3
ENDPROC(tegra30_sleep_core_finish)
/*
mov r0, #0 @ power mode flags (!hotplug)
bl tegra30_cpu_shutdown
mov r0, #1 @ never return here
- mov pc, r7
+ ret r7
ENDPROC(tegra30_sleep_cpu_secondary_finish)
/*
mov32 r0, TEGRA_PMC_BASE
ldr r0, [r0, #PMC_SCRATCH41]
- mov pc, r0 @ jump to tegra_resume
+ ret r0 @ jump to tegra_resume
ENDPROC(tegra30_lp1_reset)
.align L1_CACHE_SHIFT
mov r0, #0 /* brust policy = 32KHz */
str r0, [r5, #CLK_RESET_SCLK_BURST]
- mov pc, lr
+ ret lr
/*
* tegra30_enter_sleep
dsb
- mov pc, lr
+ ret lr
.ltorg
/* dummy symbol for end of IRAM */
mcrne p15, 0x1, r0, c9, c0, 2
_exit_init_l2_a15:
- mov pc, lr
+ ret lr
ENDPROC(tegra_init_l2_for_a15)
/*
add r3, r3, r0
mov r0, r1
- mov pc, r3
+ ret r3
ENDPROC(tegra_sleep_cpu_finish)
/*
moveq r3, #0
streq r3, [r2, #L2X0_CTRL]
#endif
- mov pc, r0
+ ret r0
ENDPROC(tegra_shut_off_mmu)
.popsection
str r0, [r5, #CLK_RESET_CCLK_BURST]
mov r0, #0
str r0, [r5, #CLK_RESET_CCLK_DIVIDER]
- mov pc, lr
+ ret lr
ENDPROC(tegra_switch_cpu_to_pllp)
#endif
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
arch_spin_unlock(&tc2_pm_lock);
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/*
* On the Cortex-A15 we need to disable
* L2 prefetching before flushing the cache.
" b cci_enable_port_for_self ");
}
+static void __init tc2_cache_off(void)
+{
+ pr_info("TC2: disabling cache during MCPM loopback test\n");
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
+ /* disable L2 prefetching on the Cortex-A15 */
+ asm volatile(
+ "mcr p15, 1, %0, c15, c0, 3 \n\t"
+ "isb \n\t"
+ "dsb "
+ : : "r" (0x400) );
+ }
+ v7_exit_coherency_flush(all);
+ cci_disable_port_by_cpu(read_cpuid_mpidr());
+}
+
static int __init tc2_pm_init(void)
{
int ret, irq;
ret = mcpm_platform_register(&tc2_pm_power_ops);
if (!ret) {
mcpm_sync_init(tc2_pm_power_up_setup);
+ /* test if we can (re)enable the CCI on our own */
+ BUG_ON(mcpm_loopback(tc2_cache_off) != 0);
pr_info("TC2 power management initialized\n");
}
return ret;
details.
config SWP_EMULATE
- bool "Emulate SWP/SWPB instructions"
+ bool "Emulate SWP/SWPB instructions" if !SMP
depends on CPU_V7
default y if SMP
select HAVE_PROC_CPU if PROC_FS
They are architecturally defined to behave as the execution of a
clean operation followed immediately by an invalidate operation,
both performing to the same memory location. This functionality
- is not correctly implemented in PL310 as clean lines are not
- invalidated as a result of these operations.
+ is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0)
+ as clean lines are not invalidated as a result of these operations.
config PL310_ERRATA_727915
bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
PL310 can handle normal accesses while it is in progress. Under very
rare circumstances, due to this erratum, write data can be lost when
PL310 treats a cacheable write transaction during a Clean &
- Invalidate by Way operation.
+ Invalidate by Way operation. Revisions prior to r3p1 are affected by
+ this errata (fixed in r3p1).
config PL310_ERRATA_753970
bool "PL310 errata: cache sync operation may be faulty"
static unsigned long ai_user;
static unsigned long ai_sys;
+static void *ai_sys_last_pc;
static unsigned long ai_skipped;
static unsigned long ai_half;
static unsigned long ai_word;
static int alignment_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "User:\t\t%lu\n", ai_user);
- seq_printf(m, "System:\t\t%lu\n", ai_sys);
+ seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc);
seq_printf(m, "Skipped:\t%lu\n", ai_skipped);
seq_printf(m, "Half:\t\t%lu\n", ai_half);
seq_printf(m, "Word:\t\t%lu\n", ai_word);
goto user;
ai_sys += 1;
+ ai_sys_last_pc = (void *)instruction_pointer(regs);
fixup:
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/page.h>
ENTRY(fa_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(fa_flush_icache_all)
/*
mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start,end)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(fa_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(fa_dma_unmap_area)
.globl fa_flush_kern_cache_louis
static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
{
unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
- bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+ bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
if (rev >= L310_CACHE_ID_RTL_R2P0) {
if (cortex_a9) {
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include "proc-macros.S"
ENTRY(nop_flush_icache_all)
- mov pc, lr
+ ret lr
ENDPROC(nop_flush_icache_all)
.globl nop_flush_kern_cache_all
ENTRY(nop_coherent_user_range)
mov r0, 0
- mov pc, lr
+ ret lr
ENDPROC(nop_coherent_user_range)
.globl nop_flush_kern_dcache_area
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
* Unconditionally clean and invalidate the entire icache.
*/
ENTRY(v4_flush_icache_all)
- mov pc, lr
+ ret lr
ENDPROC(v4_flush_icache_all)
/*
#ifdef CONFIG_CPU_CP15
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
- mov pc, lr
+ ret lr
#else
/* FALLTHROUGH */
#endif
#ifdef CONFIG_CPU_CP15
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ flush ID cache
- mov pc, lr
+ ret lr
#else
/* FALLTHROUGH */
#endif
*/
ENTRY(v4_coherent_user_range)
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
#endif
- mov pc, lr
+ ret lr
/*
* dma_unmap_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(v4_dma_map_area)
- mov pc, lr
+ ret lr
ENDPROC(v4_dma_unmap_area)
ENDPROC(v4_dma_map_area)
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/page.h>
#include "proc-macros.S"
ENTRY(v4wb_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(v4wb_flush_icache_all)
/*
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
* - dir - DMA direction
*/
ENTRY(v4wb_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(v4wb_dma_unmap_area)
.globl v4wb_flush_kern_cache_louis
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
ENTRY(v4wt_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(v4wt_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
cmp r0, r1
blo 1b
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
* - dir - DMA direction
*/
ENTRY(v4wt_dma_map_area)
- mov pc, lr
+ ret lr
ENDPROC(v4wt_dma_unmap_area)
ENDPROC(v4wt_dma_map_area)
#else
mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(v6_flush_icache_all)
/*
#else
mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate
#endif
- mov pc, lr
+ ret lr
/*
* v6_flush_cache_all()
* - we have a VIPT cache.
*/
ENTRY(v6_flush_user_cache_range)
- mov pc, lr
+ ret lr
/*
* v6_coherent_kern_range(start,end)
#else
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
#endif
- mov pc, lr
+ ret lr
/*
* Fault handling for the cache operation above. If the virtual address in r0
*/
9001:
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
UNWIND(.fnend )
ENDPROC(v6_coherent_user_range)
ENDPROC(v6_coherent_kern_range)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4
#endif
- mov pc, lr
+ ret lr
/*
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* v6_dma_clean_range(start,end)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* v6_dma_flush_range(start,end)
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
teq r2, #DMA_TO_DEVICE
bne v6_dma_inv_range
#endif
- mov pc, lr
+ ret lr
ENDPROC(v6_dma_unmap_area)
.globl v6_flush_kern_cache_louis
bgt 1b
dsb st
isb
- mov pc, lr
+ ret lr
ENDPROC(v7_invalidate_l1)
/*
mov r0, #0
ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_icache_all)
/*
ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr
#ifdef CONFIG_ARM_ERRATA_643719
ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register
- ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do
+ ALT_UP(reteq lr) @ LoUU is zero, so nothing to do
ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p?
biceq r2, r2, #0x0000000f @ clear minor revision number
teqeq r2, r1 @ test for errata affected core and if so...
#endif
ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2
ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2
- moveq pc, lr @ return if level == 0
+ reteq lr @ return if level == 0
mov r10, #0 @ r10 (starting level) = 0
b flush_levels @ start flushing cache levels
ENDPROC(v7_flush_dcache_louis)
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
dsb st
isb
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_dcache_all)
/*
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_kern_cache_all)
/*
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_kern_cache_louis)
/*
* - we have a VIPT cache.
*/
ENTRY(v7_flush_user_cache_range)
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_user_cache_all)
ENDPROC(v7_flush_user_cache_range)
ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB
dsb ishst
isb
- mov pc, lr
+ ret lr
/*
* Fault handling for the cache operation above. If the virtual address in r0
dsb
#endif
mov r0, #-EFAULT
- mov pc, lr
+ ret lr
UNWIND(.fnend )
ENDPROC(v7_coherent_kern_range)
ENDPROC(v7_coherent_user_range)
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_flush_kern_dcache_area)
/*
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_inv_range)
/*
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_clean_range)
/*
cmp r0, r1
blo 1b
dsb st
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_flush_range)
/*
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
bne v7_dma_inv_range
- mov pc, lr
+ ret lr
ENDPROC(v7_dma_unmap_area)
__INITDATA
.val = PMD_SECT_USER,
.set = "USR",
}, {
- .mask = PMD_SECT_RDONLY,
- .val = PMD_SECT_RDONLY,
+ .mask = L_PMD_SECT_RDONLY,
+ .val = L_PMD_SECT_RDONLY,
.set = "ro",
.clear = "RW",
#elif __LINUX_ARM_ARCH__ >= 6
* This code can only be used to if you are running in the secure world.
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/hardware/cache-l2x0.h>
.text
@ Check that the address has been initialised
teq r1, #0
- moveq pc, lr
+ reteq lr
@ The prefetch and power control registers are revision dependent
@ and can be written whether or not the L2 cache is enabled
@ Don't setup the L2 cache if it is already enabled
ldr r0, [r1, #L2X0_CTRL]
tst r0, #L2X0_CTRL_EN
- movne pc, lr
+ retne lr
str r3, [r1, #L310_TAG_LATENCY_CTRL]
str r4, [r1, #L310_DATA_LATENCY_CTRL]
str r2, [r1, #L2X0_AUX_CTRL]
mov r9, #L2X0_CTRL_EN
str r9, [r1, #L2X0_CTRL]
- mov pc, lr
+ ret lr
ENDPROC(l2c310_early_resume)
.align
dsb(ishst);
isb();
- /* remap level 1 table */
+ /*
+ * FIXME: This code is not architecturally compliant: we modify
+ * the mappings in-place, indeed while they are in use by this
+ * very same code. This may lead to unpredictable behaviour of
+ * the CPU.
+ *
+ * Even modifying the mappings in a separate page table does
+ * not resolve this.
+ *
+ * The architecture strongly recommends that when a mapping is
+ * changed, that it is changed by first going via an invalid
+ * mapping and back to the new mapping. This is to ensure that
+ * no TLB conflicts (caused by the TLB having more than one TLB
+ * entry match a translation) can occur. However, doing that
+ * here will result in unmapping the code we are running.
+ */
+ pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+
+ /*
+ * Remap level 1 table. This changes the physical addresses
+ * used to refer to the level 2 page tables to the high
+ * physical address alias, leaving everything else the same.
+ */
for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
set_pud(pud0,
__pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
pmd0 += PTRS_PER_PMD;
}
- /* remap pmds for kernel mapping */
+ /*
+ * Remap the level 2 table, pointing the mappings at the high
+ * physical address alias of these pages.
+ */
phys = __pa(map_start);
do {
*pmdk++ = __pmd(phys | pmdprot);
phys += PMD_SIZE;
} while (phys < map_end);
+ /*
+ * Ensure that the above updates are flushed out of the cache.
+ * This is not strictly correct; on a system where the caches
+ * are coherent with each other, but the MMU page table walks
+ * may not be coherent, flush_cache_all() may be a no-op, and
+ * this will fail.
+ */
flush_cache_all();
+
+ /*
+ * Re-write the TTBR values to point them at the high physical
+ * alias of the page tables. We expect __va() will work on
+ * cpu_get_pgd(), which returns the value of TTBR0.
+ */
cpu_switch_mm(pgd0, &init_mm);
cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+
+ /* Finally flush any stale TLB values. */
local_flush_bp_all();
local_flush_tlb_all();
}
* cpu_arm1020_proc_init()
*/
ENTRY(cpu_arm1020_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1020_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1020_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1020_reset)
.popsection
.align 5
ENTRY(cpu_arm1020_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1020_flush_icache_all)
/*
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm1020_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1020_dma_unmap_area)
.globl arm1020_flush_kern_cache_louis
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
/*
* cpu_arm1020_set_pte(ptep, pte)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1020_setup, #function
__arm1020_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm1020_setup, . - __arm1020_setup
/*
* cpu_arm1020e_proc_init()
*/
ENTRY(cpu_arm1020e_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1020e_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1020e_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1020e_reset)
.popsection
.align 5
ENTRY(cpu_arm1020e_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1020e_flush_icache_all)
/*
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm1020e_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1020e_dma_unmap_area)
.globl arm1020e_flush_kern_cache_louis
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm1020e_set_pte(ptep, pte)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
#endif
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1020e_setup, #function
__arm1020e_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm1020e_setup, . - __arm1020e_setup
/*
* cpu_arm1022_proc_init()
*/
ENTRY(cpu_arm1022_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1022_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1022_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1022_reset)
.popsection
.align 5
ENTRY(cpu_arm1022_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1022_flush_icache_all)
/*
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm1022_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1022_dma_unmap_area)
.globl arm1022_flush_kern_cache_louis
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm1022_set_pte_ext(ptep, pte, ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
#endif
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1022_setup, #function
__arm1022_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R..............
#endif
- mov pc, lr
+ ret lr
.size __arm1022_setup, . - __arm1022_setup
/*
* cpu_arm1026_proc_init()
*/
ENTRY(cpu_arm1026_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm1026_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm1026_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm1026_reset)
.popsection
.align 5
ENTRY(cpu_arm1026_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
- mov pc, lr
+ ret lr
ENDPROC(arm1026_flush_icache_all)
/*
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
blo 1b
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm1026_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm1026_dma_unmap_area)
.globl arm1026_flush_kern_cache_louis
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm1026_set_pte_ext(ptep, pte, ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
#endif
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm1026_setup, #function
__arm1026_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .R.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm1026_setup, . - __arm1026_setup
/*
*/
ENTRY(cpu_arm720_dcache_clean_area)
ENTRY(cpu_arm720_proc_init)
- mov pc, lr
+ ret lr
ENTRY(cpu_arm720_proc_fin)
mrc p15, 0, r0, c1, c0, 0
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* Function: arm720_proc_do_idle(void)
* Purpose : put the processor in proper idle mode
*/
ENTRY(cpu_arm720_do_idle)
- mov pc, lr
+ ret lr
/*
* Function: arm720_switch_mm(unsigned long pgd_phys)
mcr p15, 0, r0, c2, c0, 0 @ update page table ptr
mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4)
#endif
- mov pc, lr
+ ret lr
/*
* Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext wc_disable=0
#endif
- mov pc, lr
+ ret lr
/*
* Function: arm720_reset
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x2100 @ ..v....s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm720_reset)
.popsection
bic r0, r0, r5
ldr r5, arm710_cr1_set
orr r0, r0, r5
- mov pc, lr @ __ret (head.S)
+ ret lr @ __ret (head.S)
.size __arm710_setup, . - __arm710_setup
/*
mrc p15, 0, r0, c1, c0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr @ __ret (head.S)
+ ret lr @ __ret (head.S)
.size __arm720_setup, . - __arm720_setup
/*
ENTRY(cpu_arm740_do_idle)
ENTRY(cpu_arm740_dcache_clean_area)
ENTRY(cpu_arm740_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm740_proc_fin()
bic r0, r0, #0x3f000000 @ bank/f/lock/s
bic r0, r0, #0x0000000c @ w-buffer/cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm740_reset(loc)
mrc p15, 0, ip, c1, c0, 0 @ get ctrl register
bic ip, ip, #0x0000000c @ ............wc..
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm740_reset)
.popsection
@ need some benchmark
orr r0, r0, #0x0000000d @ MPU/Cache/WB
- mov pc, lr
+ ret lr
.size __arm740_setup, . - __arm740_setup
ENTRY(cpu_arm7tdmi_do_idle)
ENTRY(cpu_arm7tdmi_dcache_clean_area)
ENTRY(cpu_arm7tdmi_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm7tdmi_proc_fin()
*/
ENTRY(cpu_arm7tdmi_proc_fin)
- mov pc, lr
+ ret lr
/*
* Function: cpu_arm7tdmi_reset(loc)
*/
.pushsection .idmap.text, "ax"
ENTRY(cpu_arm7tdmi_reset)
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm7tdmi_reset)
.popsection
.type __arm7tdmi_setup, #function
__arm7tdmi_setup:
- mov pc, lr
+ ret lr
.size __arm7tdmi_setup, . - __arm7tdmi_setup
__INITDATA
* cpu_arm920_proc_init()
*/
ENTRY(cpu_arm920_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm920_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm920_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm920_reset)
.popsection
.align 5
ENTRY(cpu_arm920_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
ENTRY(arm920_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm920_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm920_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm920_dma_unmap_area)
.globl arm920_flush_kern_cache_louis
add r0, r0, #CACHE_DLINESIZE
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm920_set_pte(ptep, pte, ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
.globl cpu_arm920_suspend_size
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __arm920_setup, . - __arm920_setup
/*
* cpu_arm922_proc_init()
*/
ENTRY(cpu_arm922_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm922_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm922_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm922_reset)
.popsection
.align 5
ENTRY(cpu_arm922_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
ENTRY(arm922_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm922_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm922_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm922_dma_unmap_area)
.globl arm922_flush_kern_cache_louis
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
#endif
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm922_set_pte_ext(ptep, pte, ext)
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm922_setup, #function
__arm922_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __arm922_setup, . - __arm922_setup
/*
* cpu_arm925_proc_init()
*/
ENTRY(cpu_arm925_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm925_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm925_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
/*
* cpu_arm925_do_idle()
mcr p15, 0, r2, c1, c0, 0 @ Disable I cache
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
ENTRY(arm925_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm925_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm925_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm925_dma_unmap_area)
.globl arm925_flush_kern_cache_louis
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm925_set_pte_ext(ptep, pte, ext)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
- mov pc, lr
+ ret lr
.type __arm925_setup, #function
__arm925_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .1.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm925_setup, . - __arm925_setup
/*
* cpu_arm926_proc_init()
*/
ENTRY(cpu_arm926_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_arm926_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm926_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm926_reset)
.popsection
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable
msr cpsr_c, r3 @ Restore FIQ state
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
ENTRY(arm926_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm926_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm926_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm926_dma_unmap_area)
.globl arm926_flush_kern_cache_louis
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
- mov pc, lr
+ ret lr
/*
* cpu_arm926_set_pte_ext(ptep, pte, ext)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
.globl cpu_arm926_suspend_size
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x4000 @ .1.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm926_setup, . - __arm926_setup
/*
*/
ENTRY(cpu_arm940_proc_init)
ENTRY(cpu_arm940_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm940_proc_fin()
bic r0, r0, #0x00001000 @ i-cache
bic r0, r0, #0x00000004 @ d-cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm940_reset(loc)
bic ip, ip, #0x00000005 @ .............c.p
bic ip, ip, #0x00001000 @ i-cache
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm940_reset)
.popsection
.align 5
ENTRY(cpu_arm940_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
ENTRY(arm940_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm940_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
bcs 1b @ segments 7 to 0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
subs r1, r1, #1 << 4
bcs 1b @ segments 7 to 0
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
bcs 1b @ segments 7 to 0
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
subs r1, r1, #1 << 4
bcs 1b @ segments 7 to 0
mcr p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm940_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm940_dma_unmap_area)
.globl arm940_flush_kern_cache_louis
orr r0, r0, #0x00001000 @ I-cache
orr r0, r0, #0x00000005 @ MPU/D-cache
- mov pc, lr
+ ret lr
.size __arm940_setup, . - __arm940_setup
*/
ENTRY(cpu_arm946_proc_init)
ENTRY(cpu_arm946_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm946_proc_fin()
bic r0, r0, #0x00001000 @ i-cache
bic r0, r0, #0x00000004 @ d-cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_arm946_reset(loc)
bic ip, ip, #0x00000005 @ .............c.p
bic ip, ip, #0x00001000 @ i-cache
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm946_reset)
.popsection
.align 5
ENTRY(cpu_arm946_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
ENTRY(arm946_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(arm946_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ flush I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
blo 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(arm946_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(arm946_dma_unmap_area)
.globl arm946_flush_kern_cache_louis
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.type __arm946_setup, #function
__arm946_setup:
#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
orr r0, r0, #0x00004000 @ .1.. .... .... ....
#endif
- mov pc, lr
+ ret lr
.size __arm946_setup, . - __arm946_setup
ENTRY(cpu_arm9tdmi_do_idle)
ENTRY(cpu_arm9tdmi_dcache_clean_area)
ENTRY(cpu_arm9tdmi_switch_mm)
- mov pc, lr
+ ret lr
/*
* cpu_arm9tdmi_proc_fin()
*/
ENTRY(cpu_arm9tdmi_proc_fin)
- mov pc, lr
+ ret lr
/*
* Function: cpu_arm9tdmi_reset(loc)
*/
.pushsection .idmap.text, "ax"
ENTRY(cpu_arm9tdmi_reset)
- mov pc, r0
+ ret r0
ENDPROC(cpu_arm9tdmi_reset)
.popsection
.type __arm9tdmi_setup, #function
__arm9tdmi_setup:
- mov pc, lr
+ ret lr
.size __arm9tdmi_setup, . - __arm9tdmi_setup
__INITDATA
* cpu_fa526_proc_init()
*/
ENTRY(cpu_fa526_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_fa526_proc_fin()
mcr p15, 0, r0, c1, c0, 0 @ disable caches
nop
nop
- mov pc, lr
+ ret lr
/*
* cpu_fa526_reset(loc)
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
nop
nop
- mov pc, r0
+ ret r0
ENDPROC(cpu_fa526_reset)
.popsection
*/
.align 4
ENTRY(cpu_fa526_do_idle)
- mov pc, lr
+ ret lr
ENTRY(cpu_fa526_dcache_clean_area)
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB
#endif
- mov pc, lr
+ ret lr
/*
* cpu_fa526_set_pte_ext(ptep, pte, ext)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
.type __fa526_setup, #function
__fa526_setup:
bic r0, r0, r5
ldr r5, fa526_cr1_set
orr r0, r0, r5
- mov pc, lr
+ ret lr
.size __fa526_setup, . - __fa526_setup
/*
movne r2, r2, lsr #2 @ turned into # of sets
sub r2, r2, #(1 << 5)
stmia r1, {r2, r3}
- mov pc, lr
+ ret lr
/*
* cpu_feroceon_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_feroceon_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_feroceon_reset)
.popsection
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
ENTRY(feroceon_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(feroceon_flush_icache_all)
/*
mov ip, #0
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
tst r2, #VM_EXEC
mov ip, #0
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
ENTRY(feroceon_range_flush_kern_dcache_area)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
feroceon_range_dma_inv_range:
mcr p15, 5, r0, c15, c14, 0 @ D inv range start
mcr p15, 5, r1, c15, c14, 1 @ D inv range top
msr cpsr_c, r2 @ restore interrupts
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
feroceon_range_dma_clean_range:
mcr p15, 5, r1, c15, c13, 1 @ D clean range top
msr cpsr_c, r2 @ restore interrupts
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.align 5
ENTRY(feroceon_range_dma_flush_range)
mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top
msr cpsr_c, r2 @ restore interrupts
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(feroceon_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(feroceon_dma_unmap_area)
.globl feroceon_flush_kern_cache_louis
bhi 1b
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
- mov pc, r2
+ ret r2
#else
- mov pc, lr
+ ret lr
#endif
/*
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
.globl cpu_feroceon_suspend_size
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __feroceon_setup, . - __feroceon_setup
/*
* cpu_mohawk_proc_init()
*/
ENTRY(cpu_mohawk_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_proc_fin()
bic r0, r0, #0x1800 @ ...iz...........
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_reset(loc)
bic ip, ip, #0x0007 @ .............cam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_mohawk_reset)
.popsection
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt
- mov pc, lr
+ ret lr
/*
* flush_icache_all()
ENTRY(mohawk_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(mohawk_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, flags)
blo 1b
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(mohawk_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(mohawk_dma_unmap_area)
.globl mohawk_flush_kern_cache_louis
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_switch_mm(pgd)
orr r0, r0, #0x18 @ cache the page table in L2
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
- mov pc, lr
+ ret lr
/*
* cpu_mohawk_set_pte_ext(ptep, pte, ext)
mov r0, r0
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
- mov pc, lr
+ ret lr
.globl cpu_mohawk_suspend_size
.equ cpu_mohawk_suspend_size, 4 * 6
mrc p15, 0, r0, c1, c0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __mohawk_setup, . - __mohawk_setup
ENTRY(cpu_sa110_proc_init)
mov r0, #0
mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching
- mov pc, lr
+ ret lr
/*
* cpu_sa110_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_sa110_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_sa110_reset)
.popsection
mov r0, r0 @ safety
mov r0, r0 @ safety
mcr p15, 0, r0, c15, c1, 2 @ enable clock switching
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
add r0, r0, #DCACHELINESIZE
subs r1, r1, #DCACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
ldr pc, [sp], #4
#else
- mov pc, lr
+ ret lr
#endif
/*
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
.type __sa110_setup, #function
__sa110_setup:
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __sa110_setup, . - __sa110_setup
/*
mov r0, #0
mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching
mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland
- mov pc, lr
+ ret lr
/*
* cpu_sa1100_proc_fin()
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_sa1100_reset(loc)
bic ip, ip, #0x000f @ ............wcam
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
- mov pc, r0
+ ret r0
ENDPROC(cpu_sa1100_reset)
.popsection
mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt
mov r0, r0 @ safety
mcr p15, 0, r0, c15, c1, 2 @ enable clock switching
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
add r0, r0, #DCACHELINESIZE
subs r1, r1, #DCACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
ldr pc, [sp], #4
#else
- mov pc, lr
+ ret lr
#endif
/*
mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
- mov pc, lr
+ ret lr
.globl cpu_sa1100_suspend_size
.equ cpu_sa1100_suspend_size, 4 * 3
mrc p15, 0, r0, c1, c0 @ get control register v4
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __sa1100_setup, . - __sa1100_setup
/*
#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S
ENTRY(cpu_v6_proc_init)
- mov pc, lr
+ ret lr
ENTRY(cpu_v6_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_v6_reset(loc)
mcr p15, 0, r1, c1, c0, 0 @ disable MMU
mov r1, #0
mcr p15, 0, r1, c7, c5, 4 @ ISB
- mov pc, r0
+ ret r0
ENDPROC(cpu_v6_reset)
.popsection
mov r1, #0
mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode
mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt
- mov pc, lr
+ ret lr
ENTRY(cpu_v6_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #D_CACHE_LINE_SIZE
subs r1, r1, #D_CACHE_LINE_SIZE
bhi 1b
- mov pc, lr
+ ret lr
/*
* cpu_v6_switch_mm(pgd_phys, tsk)
#endif
mcr p15, 0, r1, c13, c0, 1 @ set context ID
#endif
- mov pc, lr
+ ret lr
/*
* cpu_v6_set_pte_ext(ptep, pte, ext)
#ifdef CONFIG_MMU
armv6_set_pte_ext cpu_v6
#endif
- mov pc, lr
+ ret lr
/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
.globl cpu_v6_suspend_size
mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg
orreq r0, r0, #(1 << 21) @ low interrupt latency configuration
#endif
- mov pc, lr @ return to head.S:__ret
+ ret lr @ return to head.S:__ret
/*
* V X F I D LR
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
isb
#endif
- mov pc, lr
+ bx lr
ENDPROC(cpu_v7_switch_mm)
/*
ALT_SMP(W(nop))
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
#endif
- mov pc, lr
+ bx lr
ENDPROC(cpu_v7_set_pte_ext)
/*
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <asm/assembler.h>
#define TTB_IRGN_NC (0 << 8)
#define TTB_IRGN_WBWA (1 << 8)
mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0
isb
#endif
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_switch_mm)
#ifdef __ARMEB__
tst rh, #1 << (57 - 32) @ L_PTE_NONE
bicne rl, #L_PTE_VALID
bne 1f
- tst rh, #1 << (55 - 32) @ L_PTE_DIRTY
- orreq rl, #L_PTE_RDONLY
+
+ eor ip, rh, #1 << (55 - 32) @ toggle L_PTE_DIRTY in temp reg to
+ @ test for !L_PTE_DIRTY || L_PTE_RDONLY
+ tst ip, #1 << (55 - 32) | 1 << (58 - 32)
+ orrne rl, #PTE_AP2
+ biceq rl, #PTE_AP2
+
1: strd r2, r3, [r0]
ALT_SMP(W(nop))
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
#endif
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_set_pte_ext)
/*
#endif
ENTRY(cpu_v7_proc_init)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_proc_init)
ENTRY(cpu_v7_proc_fin)
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_proc_fin)
/*
ENTRY(cpu_v7_do_idle)
dsb @ WFI may enter a low-power mode
wfi
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_do_idle)
ENTRY(cpu_v7_dcache_clean_area)
ALT_SMP(W(nop)) @ MP extensions imply L1 PTW
ALT_UP_B(1f)
- mov pc, lr
+ ret lr
1: dcache_line_size r2, r3
2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, r2
subs r1, r1, r2
bhi 2b
dsb ishst
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7_dcache_clean_area)
string cpu_v7_name, "ARMv7 Processor"
ENDPROC(cpu_v7_do_resume)
#endif
+/*
+ * Cortex-A9 processor functions
+ */
+ globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init
+ globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin
+ globl_equ cpu_ca9mp_reset, cpu_v7_reset
+ globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle
+ globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
+ globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm
+ globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext
+.globl cpu_ca9mp_suspend_size
+.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_ca9mp_do_suspend)
+ stmfd sp!, {r4 - r5}
+ mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register
+ mrc p15, 0, r5, c15, c0, 0 @ Power register
+ stmia r0!, {r4 - r5}
+ ldmfd sp!, {r4 - r5}
+ b cpu_v7_do_suspend
+ENDPROC(cpu_ca9mp_do_suspend)
+
+ENTRY(cpu_ca9mp_do_resume)
+ ldmia r0!, {r4 - r5}
+ mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register
+ teq r4, r10 @ Already restored?
+ mcrne p15, 0, r4, c15, c0, 1 @ No, so restore it
+ mrc p15, 0, r10, c15, c0, 0 @ Read Power register
+ teq r5, r10 @ Already restored?
+ mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it
+ b cpu_v7_do_resume
+ENDPROC(cpu_ca9mp_do_resume)
+#endif
+
#ifdef CONFIG_CPU_PJ4B
globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm
globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext
dsb @ WFI may enter a low-power mode
wfi
dsb @barrier
- mov pc, lr
+ ret lr
ENDPROC(cpu_pj4b_do_idle)
#else
globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle
ENTRY(cpu_pj4b_do_resume)
ldmia r0!, {r6 - r10}
- mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features
- mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0
- mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2
- mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1
- mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC
+ mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features
+ mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0
+ mcr p15, 1, r8, c15, c1, 2 @ restore CP15 - Aux Debug Modes Ctrl 2
+ mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1
+ mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC
b cpu_v7_do_resume
ENDPROC(cpu_pj4b_do_resume)
#endif
.globl cpu_pj4b_suspend_size
-.equ cpu_pj4b_suspend_size, 4 * 14
+.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5
#endif
__v7_ca7mp_setup:
__v7_ca12mp_setup:
__v7_ca15mp_setup:
+__v7_b15mp_setup:
__v7_ca17mp_setup:
mov r10, #0
1:
bic r0, r0, r5 @ clear bits them
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
- mov pc, lr @ return to head.S:__ret
+ ret lr @ return to head.S:__ret
ENDPROC(__v7_setup)
.align 2
@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+ define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
#ifdef CONFIG_CPU_PJ4B
define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
#endif
__v7_ca9mp_proc_info:
.long 0x410fc090
.long 0xff0ffff0
- __v7_proc __v7_ca9mp_setup
+ __v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
.size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
#endif /* CONFIG_ARM_LPAE */
__v7_proc __v7_ca15mp_setup
.size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
+ /*
+ * Broadcom Corporation Brahma-B15 processor.
+ */
+ .type __v7_b15mp_proc_info, #object
+__v7_b15mp_proc_info:
+ .long 0x420f00f0
+ .long 0xff0ffff0
+ __v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV
+ .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info
+
/*
* ARM Ltd. Cortex A17 processor.
*/
#include "proc-macros.S"
ENTRY(cpu_v7m_proc_init)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_proc_init)
ENTRY(cpu_v7m_proc_fin)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_proc_fin)
/*
*/
.align 5
ENTRY(cpu_v7m_reset)
- mov pc, r0
+ ret r0
ENDPROC(cpu_v7m_reset)
/*
*/
ENTRY(cpu_v7m_do_idle)
wfi
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_do_idle)
ENTRY(cpu_v7m_dcache_clean_area)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_dcache_clean_area)
/*
* There is no MMU, so here is nothing to do.
*/
ENTRY(cpu_v7m_switch_mm)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_switch_mm)
.globl cpu_v7m_suspend_size
#ifdef CONFIG_ARM_CPU_SUSPEND
ENTRY(cpu_v7m_do_suspend)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_do_suspend)
ENTRY(cpu_v7m_do_resume)
- mov pc, lr
+ ret lr
ENDPROC(cpu_v7m_do_resume)
#endif
ldr r12, [r0, V7M_SCB_CCR] @ system control register
orr r12, #V7M_SCB_CCR_STKALIGN
str r12, [r0, V7M_SCB_CCR]
- mov pc, lr
+ ret lr
ENDPROC(__v7m_setup)
.align 2
* Nothing too exciting at the moment
*/
ENTRY(cpu_xsc3_proc_init)
- mov pc, lr
+ ret lr
/*
* cpu_xsc3_proc_fin()
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_xsc3_reset(loc)
@ CAUTION: MMU turned off from this point. We count on the pipeline
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
- mov pc, r0
+ ret r0
ENDPROC(cpu_xsc3_reset)
.popsection
ENTRY(cpu_xsc3_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ go to idle
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
ENTRY(xsc3_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(xsc3_flush_icache_all)
/*
mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, vm_flags)
mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(xsc3_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(xsc3_dma_unmap_area)
.globl xsc3_flush_kern_cache_louis
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
orr r2, r2, ip
xscale_set_pte_ext_epilogue
- mov pc, lr
+ ret lr
.ltorg
.align
bic r0, r0, r5 @ ..V. ..R. .... ..A.
orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu)
@ ...I Z..S .... .... (uc)
- mov pc, lr
+ ret lr
.size __xsc3_setup, . - __xsc3_setup
mrc p15, 0, r1, c1, c0, 1
bic r1, r1, #1
mcr p15, 0, r1, c1, c0, 1
- mov pc, lr
+ ret lr
/*
* cpu_xscale_proc_fin()
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
- mov pc, lr
+ ret lr
/*
* cpu_xscale_reset(loc)
@ CAUTION: MMU turned off from this point. We count on the pipeline
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
- mov pc, r0
+ ret r0
ENDPROC(cpu_xscale_reset)
.popsection
ENTRY(cpu_xscale_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE
- mov pc, lr
+ ret lr
/* ================================= CACHE ================================ */
ENTRY(xscale_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
- mov pc, lr
+ ret lr
ENDPROC(xscale_flush_icache_all)
/*
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* flush_user_cache_range(start, end, vm_flags)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* coherent_kern_range(start, end)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* coherent_user_range(start, end)
mov r0, #0
mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* flush_kern_dcache_area(void *addr, size_t size)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_inv_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_clean_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_flush_range(start, end)
cmp r0, r1
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
- mov pc, lr
+ ret lr
/*
* dma_map_area(start, size, dir)
* - dir - DMA direction
*/
ENTRY(xscale_dma_unmap_area)
- mov pc, lr
+ ret lr
ENDPROC(xscale_dma_unmap_area)
.globl xscale_flush_kern_cache_louis
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
- mov pc, lr
+ ret lr
/* =============================== PageTable ============================== */
orr r2, r2, ip
xscale_set_pte_ext_epilogue
- mov pc, lr
+ ret lr
.ltorg
.align
mrc p15, 0, r0, c1, c0, 0 @ get control register
bic r0, r0, r5
orr r0, r0, r6
- mov pc, lr
+ ret lr
.size __xscale_setup, . - __xscale_setup
/*
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
bic r0, r0, #0x0ff
cmp r0, r1
blo 1b
mcr p15, 0, r3, c7, c10, 4 @ data write barrier
- mov pc, lr
+ ret lr
ENTRY(fa_flush_kern_tlb_range)
blo 1b
mcr p15, 0, r3, c7, c10, 4 @ data write barrier
mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb)
- mov pc, lr
+ ret lr
__INITDATA
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
.v4_flush_kern_tlb_range:
bic r0, r0, #0x0ff
bic r0, r0, #0xf00
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* v4_flush_kern_tlb_range(start, end)
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
vma_vm_flags r2, r2
mcr p15, 0, r3, c7, c10, 4 @ drain WB
tst r2, #VM_EXEC
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
/*
* v4_flush_kern_tlb_range(start, end)
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
__INITDATA
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
- movne pc, lr @ no, we dont do anything
+ retne lr @ no, we dont do anything
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
vma_vm_flags r2, r2
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
ENTRY(v4wbi_flush_kern_tlb_range)
mov r3, #0
add r0, r0, #PAGE_SZ
cmp r0, r1
blo 1b
- mov pc, lr
+ ret lr
__INITDATA
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include "proc-macros.S"
cmp r0, r1
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier
- mov pc, lr
+ ret lr
/*
* v6wbi_flush_kern_tlb_range(start,end)
blo 1b
mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier
mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb)
- mov pc, lr
+ ret lr
__INIT
cmp r0, r1
blo 1b
dsb ish
- mov pc, lr
+ ret lr
ENDPROC(v7wbi_flush_user_tlb_range)
/*
blo 1b
dsb ish
isb
- mov pc, lr
+ ret lr
ENDPROC(v7wbi_flush_kern_tlb_range)
__INIT
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-
+#include <asm/assembler.h>
#include <asm/opcodes.h>
/* This is the kernel's entry point into the floating point emulator.
mov r0, r6 @ prepare for EmulateAll()
bl EmulateAll @ emulate the instruction
cmp r0, #0 @ was emulation successful
- moveq pc, r4 @ no, return failure
+ reteq r4 @ no, return failure
next:
.Lx1: ldrt r6, [r5], #4 @ get the next instruction and
teq r2, #0x0C000000
teqne r2, #0x0D000000
teqne r2, #0x0E000000
- movne pc, r9 @ return ok if not a fp insn
+ retne r9 @ return ok if not a fp insn
str r5, [sp, #S_PC] @ update PC copy in regs
@ plain LDR instruction. Weird, but it seems harmless.
.pushsection .fixup,"ax"
.align 2
-.Lfix: mov pc, r9 @ let the user eat segfaults
+.Lfix: ret r9 @ let the user eat segfaults
.popsection
.pushsection __ex_table,"a"
char *perf_name;
char *op_name;
} op_perf_name_map[] = {
- { "xscale1", "arm/xscale1" },
- { "xscale1", "arm/xscale2" },
- { "v6", "arm/armv6" },
- { "v6mpcore", "arm/mpcore" },
- { "ARMv7 Cortex-A8", "arm/armv7" },
- { "ARMv7 Cortex-A9", "arm/armv7-ca9" },
+ { "armv5_xscale1", "arm/xscale1" },
+ { "armv5_xscale2", "arm/xscale2" },
+ { "armv6_1136", "arm/armv6" },
+ { "armv6_1156", "arm/armv6" },
+ { "armv6_1176", "arm/armv6" },
+ { "armv6_11mpcore", "arm/mpcore" },
+ { "armv7_cortex_a8", "arm/armv7" },
+ { "armv7_cortex_a9", "arm/armv7-ca9" },
};
char *op_name_from_perf_id(void)
if (!user_mode(regs)) {
struct stackframe frame;
- frame.fp = regs->ARM_fp;
- frame.sp = regs->ARM_sp;
- frame.lr = regs->ARM_lr;
- frame.pc = regs->ARM_pc;
+ arm_get_current_stackframe(regs, &frame);
walk_stackframe(&frame, report_trace, &depth);
return;
}
unsigned long flags;
struct omap_dma_lch *chan;
+ WARN(strcmp(dev_name, "DMA engine"), "Using deprecated platform DMA API - please update to DMA engine");
+
spin_lock_irqsave(&dma_chan_lock, flags);
for (ch = 0; ch < dma_chan_count; ch++) {
if (free_ch == -1 && dma_chan[ch].dev_id == -1) {
ENTRY(vfp_null_entry)
dec_preempt_count_ti r10, r4
- mov pc, lr
+ ret lr
ENDPROC(vfp_null_entry)
.align 2
dec_preempt_count_ti r10, r4
ldr r0, VFP_arch_address
str r0, [r0] @ set to non-zero value
- mov pc, r9 @ we have handled the fault
+ ret r9 @ we have handled the fault
ENDPROC(vfp_testing_entry)
.align 2
@ always subtract 4 from the following
@ instruction address.
dec_preempt_count_ti r10, r4
- mov pc, r9 @ we think we have handled things
+ ret r9 @ we think we have handled things
look_for_VFP_exceptions:
DBGSTR "not VFP"
dec_preempt_count_ti r10, r4
- mov pc, lr
+ ret lr
process_exception:
DBGSTR "bounce"
VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present)
1:
stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2
- mov pc, lr
+ ret lr
ENDPROC(vfp_save_state)
.align
#ifdef CONFIG_THUMB2_KERNEL
adr \tmp, 1f
add \tmp, \tmp, \base, lsl \shift
- mov pc, \tmp
+ ret \tmp
#else
add pc, pc, \base, lsl \shift
mov r0, r0
tbl_branch r0, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0
- mov pc, lr
+ ret lr
.org 1b + 8
1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
ENDPROC(vfp_get_float)
tbl_branch r1, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0
- mov pc, lr
+ ret lr
.org 1b + 8
1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
ENDPROC(vfp_put_float)
tbl_branch r0, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: fmrrd r0, r1, d\dr
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#ifdef CONFIG_VFPv3
@ d16 - d31 registers
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#endif
@ virtual register 16 (or 32 if VFPv3) for compare with zero
mov r0, #0
mov r1, #0
- mov pc, lr
+ ret lr
ENDPROC(vfp_get_double)
ENTRY(vfp_put_double)
tbl_branch r2, r3, #3
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: fmdrr d\dr, r0, r1
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#ifdef CONFIG_VFPv3
@ d16 - d31 registers
.irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr
- mov pc, lr
+ ret lr
.org 1b + 8
.endr
#endif
ENTRY(HYPERVISOR_##hypercall) \
mov r12, #__HYPERVISOR_##hypercall; \
__HVC(XEN_IMM); \
- mov pc, lr; \
+ ret lr; \
ENDPROC(HYPERVISOR_##hypercall)
#define HYPERCALL0 HYPERCALL_SIMPLE
mov r12, #__HYPERVISOR_##hypercall; \
__HVC(XEN_IMM); \
ldm sp!, {r4} \
- mov pc, lr \
+ ret lr \
ENDPROC(HYPERVISOR_##hypercall)
.text
ldr r4, [sp, #4]
__HVC(XEN_IMM)
ldm sp!, {r4}
- mov pc, lr
+ ret lr
ENDPROC(privcmd_call);
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM assembler.
+config CRYPTO_SHA1_ARM_NEON
+ tristate "SHA1 digest algorithm (ARM NEON)"
+ depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+ select CRYPTO_SHA1_ARM
+ select CRYPTO_SHA1
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+ using optimized ARM NEON assembly, when NEON instructions are
+ available.
+
config CRYPTO_SHA1_PPC
tristate "SHA1 digest algorithm (powerpc)"
depends on PPC
SHA-512 secure hash standard (DFIPS 180-2) implemented
using sparc64 crypto instructions, when available.
+config CRYPTO_SHA512_ARM_NEON
+ tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
+ depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+ select CRYPTO_SHA512
+ select CRYPTO_HASH
+ help
+ SHA-512 secure hash standard (DFIPS 180-2) implemented
+ using ARM NEON instructions, when available.
+
+ This version of SHA implements a 512 bit hash with 256 bits of
+ security against collision attacks.
+
+ This code also includes SHA-384, a 384 bit hash with 192 bits
+ of security against collision attacks.
+
config CRYPTO_TGR192
tristate "Tiger digest algorithms"
select CRYPTO_HASH
* fire when the timer value is greater than or equal to. In previous
* revisions the comparators fired when the timer value was equal to.
*/
- if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9
&& (read_cpuid_id() & 0xf0000f) < 0x200000) {
pr_warn("global-timer: non support for this cpu version.\n");
return;