commit
759496ba6407c6994d6a5ce3a5e74937d7816208 upstream.
Unlike global OOM handling, memory cgroup code will invoke the OOM killer
in any OOM situation because it has no way of telling faults occuring in
kernel context - which could be handled more gracefully - from
user-triggered faults.
Pass a flag that identifies faults originating in user space from the
architecture-specific fault handlers to generic code so that memcg OOM
handling can be improved.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
29 files changed:
const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR;
siginfo_t info;
const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR;
siginfo_t info;
- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (cause > 0 ? FAULT_FLAG_WRITE : 0));
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
(or is suppressed by the PALcode). Support that for older CPUs
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
(or is suppressed by the PALcode). Support that for older CPUs
if (address >= TASK_SIZE)
goto vmalloc_fault;
#endif
if (address >= TASK_SIZE)
goto vmalloc_fault;
#endif
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
} else {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
}
/* If for any reason at all we couldn't handle the fault,
}
/* If for any reason at all we couldn't handle the fault,
struct mm_struct *mm = tsk->mm;
siginfo_t info;
int fault, ret;
struct mm_struct *mm = tsk->mm;
siginfo_t info;
int fault, ret;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/*
* We fault-in kernel-space virtual memory on-demand. The
/*
* We fault-in kernel-space virtual memory on-demand. The
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
- int write = fsr & FSR_WRITE;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, fsr))
return 0;
if (notify_page_fault(regs, fsr))
return 0;
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (fsr & FSR_WRITE)
+ flags |= FAULT_FLAG_WRITE;
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- if (esr & ESR_LNX_EXEC) {
- vm_flags = VM_EXEC;
- } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
- vm_flags = VM_WRITE;
- mm_flags |= FAULT_FLAG_WRITE;
- }
-
tsk = current;
mm = tsk->mm;
tsk = current;
mm = tsk->mm;
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ mm_flags |= FAULT_FLAG_USER;
+
+ if (esr & ESR_LNX_EXEC) {
+ vm_flags = VM_EXEC;
+ } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+ vm_flags = VM_WRITE;
+ mm_flags |= FAULT_FLAG_WRITE;
+ }
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
retry:
down_read(&mm->mmap_sem);
struct vm_area_struct * vma;
siginfo_t info;
int fault;
struct vm_area_struct * vma;
siginfo_t info;
int fault;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
D(printk(KERN_DEBUG
"Page fault for %lX on %X at %lX, prot %d write %d\n",
D(printk(KERN_DEBUG
"Page fault for %lX on %X at %lX, prot %d write %d\n",
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
} else if (writeaccess == 1) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else if (writeaccess == 1) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long _pme, lrai, lrad, fixup;
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long _pme, lrai, lrad, fixup;
+ unsigned long flags = 0;
siginfo_t info;
pgd_t *pge;
pud_t *pue;
pte_t *pte;
siginfo_t info;
pgd_t *pge;
pud_t *pue;
pte_t *pte;
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(__frame))
+ flags |= FAULT_FLAG_USER;
+
down_read(&mm->mmap_sem);
vma = find_vma(mm, ear0);
down_read(&mm->mmap_sem);
vma = find_vma(mm, ear0);
*/
good_area:
info.si_code = SEGV_ACCERR;
*/
good_area:
info.si_code = SEGV_ACCERR;
switch (esr0 & ESR0_ATXC) {
default:
/* handle write to write protected page */
switch (esr0 & ESR0_ATXC) {
default:
/* handle write to write protected page */
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
break;
/* handle read from protected page */
break;
/* handle read from protected page */
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(mm, vma, ear0, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
int si_code = SEGV_MAPERR;
int fault;
const struct exception_table_entry *fixup;
int si_code = SEGV_MAPERR;
int fault;
const struct exception_table_entry *fixup;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (cause > 0 ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/*
* If we're in an interrupt or have no user context,
/*
* If we're in an interrupt or have no user context,
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
case FLT_STORE:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
case FLT_STORE:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
- flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
-
/* mmap_sem is performance critical.... */
prefetchw(&mm->mmap_sem);
/* mmap_sem is performance critical.... */
prefetchw(&mm->mmap_sem);
if (notify_page_fault(regs, TRAP_BRKPT))
return;
if (notify_page_fault(regs, TRAP_BRKPT))
return;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (mask & VM_WRITE)
+ flags |= FAULT_FLAG_WRITE;
retry:
down_read(&mm->mmap_sem);
retry:
down_read(&mm->mmap_sem);
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long page, addr;
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long page, addr;
+ unsigned long flags = 0;
int fault;
siginfo_t info;
int fault;
siginfo_t info;
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
+ if (error_code & ACE_USERMODE)
+ flags |= FAULT_FLAG_USER;
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
*/
good_area:
info.si_code = SEGV_ACCERR;
*/
good_area:
info.si_code = SEGV_ACCERR;
switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
default: /* 3: write, present */
/* fall through */
case ACE_WRITE: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
default: /* 3: write, present */
/* fall through */
case ACE_WRITE: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
break;
case ACE_PROTECTION: /* read, present */
case 0: /* read, not present */
break;
case ACE_PROTECTION: /* read, present */
case 0: /* read, not present */
*/
addr = (address & PAGE_MASK);
set_thread_fault_code(error_code);
*/
addr = (address & PAGE_MASK);
set_thread_fault_code(error_code);
- fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(mm, vma, addr, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
retry:
down_read(&mm->mmap_sem);
struct vm_area_struct *vma, *prev_vma;
siginfo_t info;
int fault;
struct vm_area_struct *vma, *prev_vma;
siginfo_t info;
int fault;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write_access ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
retry:
down_read(&mm->mmap_sem);
if (write_access) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
if (write_access) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
int code = SEGV_MAPERR;
int is_write = error_code & ESR_S;
int fault;
int code = SEGV_MAPERR;
int is_write = error_code & ESR_S;
int fault;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (is_write ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
regs->ear = address;
regs->esr = error_code;
regs->ear = address;
regs->esr = error_code;
die("Weird page fault", regs, SIGSEGV);
}
die("Weird page fault", regs, SIGSEGV);
}
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
if (unlikely(is_write)) {
if (unlikely(!(vma->vm_flags & VM_WRITE)))
goto bad_area;
if (unlikely(is_write)) {
if (unlikely(!(vma->vm_flags & VM_WRITE)))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
/* a read */
} else {
/* protection fault */
const int field = sizeof(unsigned long) * 2;
siginfo_t info;
int fault;
const int field = sizeof(unsigned long) * 2;
siginfo_t info;
int fault;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
#if 0
printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
#if 0
printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (cpu_has_rixi) {
if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
} else {
if (cpu_has_rixi) {
if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
retry:
down_read(&mm->mmap_sem);
if (user_mode(regs)) {
/* Exception was in userspace: reenable interrupts */
local_irq_enable();
if (user_mode(regs)) {
/* Exception was in userspace: reenable interrupts */
local_irq_enable();
+ flags |= FAULT_FLAG_USER;
} else {
/* If exception was in a syscall, then IRQ's may have
* been enabled or disabled. If they were enabled,
} else {
/* If exception was in a syscall, then IRQ's may have
* been enabled or disabled. If they were enabled,
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (acc_type & VM_WRITE)
+ flags |= FAULT_FLAG_WRITE;
retry:
down_read(&mm->mmap_sem);
vma = find_vma_prev(mm, address, &prev_vma);
retry:
down_read(&mm->mmap_sem);
vma = find_vma_prev(mm, address, &prev_vma);
- fault = handle_mm_fault(mm, vma, address,
- flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
+ fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return;
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return;
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
- if (is_write)
- flags |= FAULT_FLAG_WRITE;
-
#ifdef CONFIG_PPC_ICSWX
/*
* we need to do this early because this "data storage
#ifdef CONFIG_PPC_ICSWX
/*
* we need to do this early because this "data storage
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
/* a read */
} else {
/* protection fault */
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem);
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem);
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
const int field = sizeof(unsigned long) * 2;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
const int field = sizeof(unsigned long) * 2;
+ unsigned long flags = 0;
siginfo_t info;
int fault;
siginfo_t info;
int fault;
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
goto bad_area;
} else {
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
goto bad_area;
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, write);
+ fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
struct mm_struct *mm;
struct vm_area_struct * vma;
int fault;
struct mm_struct *mm;
struct vm_area_struct * vma;
int fault;
- int write = error_code & FAULT_CODE_WRITE;
- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0));
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
tsk = current;
mm = tsk->mm;
set_thread_fault_code(error_code);
set_thread_fault_code(error_code);
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (error_code & FAULT_CODE_WRITE)
+ flags |= FAULT_FLAG_WRITE;
+
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
unsigned long g2;
int from_user = !(regs->psr & PSR_PS);
int fault, code;
unsigned long g2;
int from_user = !(regs->psr & PSR_PS);
int fault, code;
- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0));
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (text_fault)
address = regs->pc;
if (text_fault)
address = regs->pc;
+ if (from_user)
+ flags |= FAULT_FLAG_USER;
+ if (write)
+ flags |= FAULT_FLAG_WRITE;
+
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
struct vm_area_struct *vma;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
struct vm_area_struct *vma;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ unsigned int flags = FAULT_FLAG_USER;
int code;
code = SEGV_MAPERR;
int code;
code = SEGV_MAPERR;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
- switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
+ switch (handle_mm_fault(mm, vma, address, flags)) {
case VM_FAULT_SIGBUS:
case VM_FAULT_OOM:
goto do_sigbus;
case VM_FAULT_SIGBUS:
case VM_FAULT_OOM:
goto do_sigbus;
bad_kernel_pc(regs, address);
return;
}
bad_kernel_pc(regs, address);
return;
}
+ } else
+ flags |= FAULT_FLAG_USER;
/*
* If we're in an interrupt or have no user
/*
* If we're in an interrupt or have no user
vma->vm_file != NULL)
set_thread_fault_code(fault_code |
FAULT_CODE_BLKCOMMIT);
vma->vm_file != NULL)
set_thread_fault_code(fault_code |
FAULT_CODE_BLKCOMMIT);
+
+ flags |= FAULT_FLAG_WRITE;
} else {
/* Allow reads even for write-only mappings */
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
} else {
/* Allow reads even for write-only mappings */
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
- flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
if (!is_page_fault)
write = 1;
if (!is_page_fault)
write = 1;
- flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0));
+ flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
goto bad_area_nosemaphore;
}
goto bad_area_nosemaphore;
}
+ if (!is_kernel_mode)
+ flags |= FAULT_FLAG_USER;
+
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!is_page_fault || !(vma->vm_flags & VM_READ))
goto bad_area;
} else {
if (!is_page_fault || !(vma->vm_flags & VM_READ))
goto bad_area;
pmd_t *pmd;
pte_t *pte;
int err = -EFAULT;
pmd_t *pmd;
pte_t *pte;
int err = -EFAULT;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (is_write ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (in_atomic())
goto out_nosemaphore;
if (in_atomic())
goto out_nosemaphore;
+ if (is_user)
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
good_area:
*code_out = SEGV_ACCERR;
good_area:
*code_out = SEGV_ACCERR;
- if (is_write && !(vma->vm_flags & VM_WRITE))
- goto out;
-
- /* Don't require VM_READ|VM_EXEC for write faults! */
- if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
- goto out;
+ if (is_write) {
+ if (!(vma->vm_flags & VM_WRITE))
+ goto out;
+ flags |= FAULT_FLAG_WRITE;
+ } else {
+ /* Don't require VM_READ|VM_EXEC for write faults! */
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto out;
+ }
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
tsk = current;
mm = tsk->mm;
if (in_atomic() || !mm)
goto no_context;
if (in_atomic() || !mm)
goto no_context;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (!(fsr ^ 0x12))
+ flags |= FAULT_FLAG_WRITE;
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
unsigned long address;
struct mm_struct *mm;
int fault;
unsigned long address;
struct mm_struct *mm;
int fault;
- int write = error_code & PF_WRITE;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
- (write ? FAULT_FLAG_WRITE : 0);
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
tsk = current;
mm = tsk->mm;
if (user_mode_vm(regs)) {
local_irq_enable();
error_code |= PF_USER;
if (user_mode_vm(regs)) {
local_irq_enable();
error_code |= PF_USER;
+ flags |= FAULT_FLAG_USER;
} else {
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
} else {
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
+ if (error_code & PF_WRITE)
+ flags |= FAULT_FLAG_WRITE;
+
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in
address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
#endif
address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
#endif
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
#define FAULT_FLAG_TRIED 0x40 /* second try */
#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
#define FAULT_FLAG_TRIED 0x40 /* second try */
+#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's