static const char *fault_name(unsigned int esr);
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
+{
+ int ret = 0;
+
+ /* kprobe_running() needs smp_processor_id() */
+ if (!user_mode(regs)) {
+ preempt_disable();
+ if (kprobe_running() && kprobe_fault_handler(regs, esr))
+ ret = 1;
+ preempt_enable();
+ }
+
+ return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
+{
+ return 0;
+}
+#endif
+
/*
* Dump out the page tables associated with 'addr' in mm 'mm'.
*/
printk("\n");
}
+#ifdef CONFIG_ARM64_HW_AFDBM
+/*
+ * This function sets the access flags (dirty, accessed), as well as write
+ * permission, and only to a more permissive setting.
+ *
+ * It needs to cope with hardware update of the accessed/dirty state by other
+ * agents in the system and can safely skip the __sync_icache_dcache() call as,
+ * like set_pte_at(), the PTE is never changed from no-exec to exec here.
+ *
+ * Returns whether or not the PTE actually changed.
+ */
+int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ pteval_t old_pteval;
+ unsigned int tmp;
+
+ if (pte_same(*ptep, entry))
+ return 0;
+
+ /* only preserve the access flags and write permission */
+ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
+
+ /*
+ * PTE_RDONLY is cleared by default in the asm below, so set it in
+ * back if necessary (read-only or clean PTE).
+ */
+ if (!pte_write(entry) || !pte_sw_dirty(entry))
+ pte_val(entry) |= PTE_RDONLY;
+
+ /*
+ * Setting the flags must be done atomically to avoid racing with the
+ * hardware update of the access/dirty state.
+ */
+ asm volatile("// ptep_set_access_flags\n"
+ " prfm pstl1strm, %2\n"
+ "1: ldxr %0, %2\n"
+ " and %0, %0, %3 // clear PTE_RDONLY\n"
+ " orr %0, %0, %4 // set flags\n"
+ " stxr %w1, %0, %2\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
+ : "L" (~PTE_RDONLY), "r" (pte_val(entry)));
+
+ flush_tlb_fix_spurious_fault(vma, address);
+ return 1;
+}
+#endif
+
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
/*
* The kernel tried to access some page that wasn't present.
*/
{
/*
* Are we prepared to handle this kernel fault?
+ * We are almost certainly not prepared to handle instruction faults.
*/
- if (fixup_exception(regs))
+ if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
/*
#define VM_FAULT_BADMAP 0x010000
#define VM_FAULT_BADACCESS 0x020000
-#define ESR_LNX_EXEC (1 << 24)
-
static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
unsigned int mm_flags, unsigned long vm_flags,
struct task_struct *tsk)
return fault;
}
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
+{
+ unsigned int ec = ESR_ELx_EC(esr);
+ unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+ if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ return false;
+
+ if (system_uses_ttbr0_pan())
+ return fsc_type == ESR_ELx_FSC_FAULT &&
+ (regs->pstate & PSR_PAN_BIT);
+ else
+ return fsc_type == ESR_ELx_FSC_PERM;
+}
+
+static bool is_el0_instruction_abort(unsigned int esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (notify_page_fault(regs, esr))
+ return 0;
+
tsk = current;
mm = tsk->mm;
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
- if (esr & ESR_LNX_EXEC) {
+ if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
- /*
- * PAN bit set implies the fault happened in kernel space, but not
- * in the arch's user access functions.
- */
- if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
- goto no_context;
+ if (addr < USER_DS && is_permission_fault(esr, regs)) {
+ if (get_fs() == KERNEL_DS)
+ die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+
+ if (is_el1_instruction_abort(esr))
+ die("Attempting to execute userspace memory", regs, esr);
+
+ if (!search_exception_tables(regs->pc))
+ die("Accessing user space memory outside uaccess.h routines", regs, esr);
+ }
/*
* As per x86, we may deadlock here. However, since the kernel only
return 1;
}
-static struct fault_info {
+static const struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
int sig;
int code;
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
return 0;
}
+NOKPROBE_SYMBOL(do_debug_exception);
#ifdef CONFIG_ARM64_PAN
void cpu_enable_pan(void *__unused)
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
#endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+void cpu_enable_uao(void *__unused)
+{
+ asm(SET_PSTATE_UAO(1));
+}
+#endif /* CONFIG_ARM64_UAO */