x86/asm/entry/32, selftests: Add 'test_syscall_vdso' test
authorDenys Vlasenko <dvlasenk@redhat.com>
Wed, 16 Sep 2015 18:23:29 +0000 (20:23 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 18 Sep 2015 07:40:48 +0000 (09:40 +0200)
This new test checks that all x86 registers are preserved across
32-bit syscalls. It tests syscalls through VDSO (if available)
and through INT 0x80, normally and under ptrace.

If kernel is a 64-bit one, high registers (r8..r15) are poisoned
before the syscall is called and are checked afterwards.

They must be either preserved, or cleared to zero (but r11 is
special); r12..15 must be preserved for INT 0x80.

EFLAGS is checked for changes too, but change there is not
considered to be a bug (paravirt kernels do not preserve
arithmetic flags).

Run-tested on 64-bit kernel:

$ ./test_syscall_vdso_32
[RUN] Executing 6-argument 32-bit syscall via VDSO
[OK] Arguments are preserved across syscall
[NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by
SYSRET insn [OK] R8..R15 did not leak kernel data
[RUN] Executing 6-argument 32-bit syscall via INT 80
[OK] Arguments are preserved across syscall
[OK] R8..R15 did not leak kernel data
[RUN] Running tests under ptrace
[RUN] Executing 6-argument 32-bit syscall via VDSO
[OK] Arguments are preserved across syscall
[OK] R8..R15 did not leak kernel data
[RUN] Executing 6-argument 32-bit syscall via INT 80
[OK] Arguments are preserved across syscall
[OK] R8..R15 did not leak kernel data

On 32-bit paravirt kernel:

$ ./test_syscall_vdso_32
[NOTE] Not a 64-bit kernel, won't test R8..R15 leaks
[RUN] Executing 6-argument 32-bit syscall via VDSO
[WARN] Flags before=0000000000200ed7 id 0 00 o d i s z 0 a 0 p 1 c
[WARN] Flags  after=0000000000200246 id 0 00 i z 0 0 p 1
[WARN] Flags change=0000000000000c91 0 00 o d s 0 a 0 0 c
[OK] Arguments are preserved across syscall
[RUN] Executing 6-argument 32-bit syscall via INT 80
[OK] Arguments are preserved across syscall
[RUN] Running tests under ptrace
[RUN] Executing 6-argument 32-bit syscall via VDSO
[OK] Arguments are preserved across syscall
[RUN] Executing 6-argument 32-bit syscall via INT 80
[OK] Arguments are preserved across syscall

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1442427809-2027-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/test_syscall_vdso.c [new file with mode: 0644]
tools/testing/selftests/x86/thunks_32.S [new file with mode: 0644]

index 29089b24d18bca1bd3b4a3b8ab92a65cf6316053..fd55bc37fa18c29fc64ab611f28b23d9c421c9fa 100644 (file)
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
@@ -60,3 +60,4 @@ endif
 
 # Some tests have additional dependencies.
 sysret_ss_attrs_64: thunks.S
+test_syscall_vdso_32: thunks_32.S
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
new file mode 100644 (file)
index 0000000..4037035
--- /dev/null
@@ -0,0 +1,401 @@
+/*
+ * 32-bit syscall ABI conformance test.
+ *
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <elf.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#if !defined(__i386__)
+int main(int argc, char **argv, char **envp)
+{
+       printf("[SKIP]\tNot a 32-bit x86 userspace\n");
+       return 0;
+}
+#else
+
+long syscall_addr;
+long get_syscall(char **envp)
+{
+       Elf32_auxv_t *auxv;
+       while (*envp++ != NULL)
+               continue;
+       for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
+               if (auxv->a_type == AT_SYSINFO)
+                       return auxv->a_un.a_val;
+       printf("[WARN]\tAT_SYSINFO not supplied\n");
+       return 0;
+}
+
+asm (
+       "       .pushsection .text\n"
+       "       .global int80\n"
+       "int80:\n"
+       "       int     $0x80\n"
+       "       ret\n"
+       "       .popsection\n"
+);
+extern char int80;
+
+struct regs64 {
+       uint64_t rax, rbx, rcx, rdx;
+       uint64_t rsi, rdi, rbp, rsp;
+       uint64_t r8,  r9,  r10, r11;
+       uint64_t r12, r13, r14, r15;
+};
+struct regs64 regs64;
+int kernel_is_64bit;
+
+asm (
+       "       .pushsection .text\n"
+       "       .code64\n"
+       "get_regs64:\n"
+       "       push    %rax\n"
+       "       mov     $regs64, %eax\n"
+       "       pop     0*8(%rax)\n"
+       "       movq    %rbx, 1*8(%rax)\n"
+       "       movq    %rcx, 2*8(%rax)\n"
+       "       movq    %rdx, 3*8(%rax)\n"
+       "       movq    %rsi, 4*8(%rax)\n"
+       "       movq    %rdi, 5*8(%rax)\n"
+       "       movq    %rbp, 6*8(%rax)\n"
+       "       movq    %rsp, 7*8(%rax)\n"
+       "       movq    %r8,  8*8(%rax)\n"
+       "       movq    %r9,  9*8(%rax)\n"
+       "       movq    %r10, 10*8(%rax)\n"
+       "       movq    %r11, 11*8(%rax)\n"
+       "       movq    %r12, 12*8(%rax)\n"
+       "       movq    %r13, 13*8(%rax)\n"
+       "       movq    %r14, 14*8(%rax)\n"
+       "       movq    %r15, 15*8(%rax)\n"
+       "       ret\n"
+       "poison_regs64:\n"
+       "       movq    $0x7f7f7f7f, %r8\n"
+       "       shl     $32, %r8\n"
+       "       orq     $0x7f7f7f7f, %r8\n"
+       "       movq    %r8, %r9\n"
+       "       movq    %r8, %r10\n"
+       "       movq    %r8, %r11\n"
+       "       movq    %r8, %r12\n"
+       "       movq    %r8, %r13\n"
+       "       movq    %r8, %r14\n"
+       "       movq    %r8, %r15\n"
+       "       ret\n"
+       "       .code32\n"
+       "       .popsection\n"
+);
+extern void get_regs64(void);
+extern void poison_regs64(void);
+extern unsigned long call64_from_32(void (*function)(void));
+void print_regs64(void)
+{
+       if (!kernel_is_64bit)
+               return;
+       printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax,  regs64.rbx,  regs64.rcx,  regs64.rdx);
+       printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi,  regs64.rdi,  regs64.rbp,  regs64.rsp);
+       printf(" 8:%016llx  9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 ,  regs64.r9 ,  regs64.r10,  regs64.r11);
+       printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12,  regs64.r13,  regs64.r14,  regs64.r15);
+}
+
+int check_regs64(void)
+{
+       int err = 0;
+       int num = 8;
+       uint64_t *r64 = &regs64.r8;
+
+       if (!kernel_is_64bit)
+               return 0;
+
+       do {
+               if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
+                       continue; /* register did not change */
+               if (syscall_addr != (long)&int80) {
+                       /*
+                        * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
+                        * either clear them to 0, or for R11, load EFLAGS.
+                        */
+                       if (*r64 == 0)
+                               continue;
+                       if (num == 11) {
+                               printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
+                               continue;
+                       }
+               } else {
+                       /* INT80 syscall entrypoint can be used by
+                        * 64-bit programs too, unlike SYSCALL/SYSENTER.
+                        * Therefore it must preserve R12+
+                        * (they are callee-saved registers in 64-bit C ABI).
+                        *
+                        * This was probably historically not intended,
+                        * but R8..11 are clobbered (cleared to 0).
+                        * IOW: they are the only registers which aren't
+                        * preserved across INT80 syscall.
+                        */
+                       if (*r64 == 0 && num <= 11)
+                               continue;
+               }
+               printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
+               err++;
+       } while (r64++, ++num < 16);
+
+       if (!err)
+               printf("[OK]\tR8..R15 did not leak kernel data\n");
+       return err;
+}
+
+int nfds;
+fd_set rfds;
+fd_set wfds;
+fd_set efds;
+struct timespec timeout;
+sigset_t sigmask;
+struct {
+       sigset_t *sp;
+       int sz;
+} sigmask_desc;
+
+void prep_args()
+{
+       nfds = 42;
+       FD_ZERO(&rfds);
+       FD_ZERO(&wfds);
+       FD_ZERO(&efds);
+       FD_SET(0, &rfds);
+       FD_SET(1, &wfds);
+       FD_SET(2, &efds);
+       timeout.tv_sec = 0;
+       timeout.tv_nsec = 123;
+       sigemptyset(&sigmask);
+       sigaddset(&sigmask, SIGINT);
+       sigaddset(&sigmask, SIGUSR2);
+       sigaddset(&sigmask, SIGRTMAX);
+       sigmask_desc.sp = &sigmask;
+       sigmask_desc.sz = 8; /* bytes */
+}
+
+static void print_flags(const char *name, unsigned long r)
+{
+       static const char *bitarray[] = {
+       "\n" ,"c\n" ,/* Carry Flag */
+       "0 " ,"1 "  ,/* Bit 1 - always on */
+       ""   ,"p "  ,/* Parity Flag */
+       "0 " ,"3? " ,
+       ""   ,"a "  ,/* Auxiliary carry Flag */
+       "0 " ,"5? " ,
+       ""   ,"z "  ,/* Zero Flag */
+       ""   ,"s "  ,/* Sign Flag */
+       ""   ,"t "  ,/* Trap Flag */
+       ""   ,"i "  ,/* Interrupt Flag */
+       ""   ,"d "  ,/* Direction Flag */
+       ""   ,"o "  ,/* Overflow Flag */
+       "0 " ,"1 "  ,/* I/O Privilege Level (2 bits) */
+       "0"  ,"1"   ,/* I/O Privilege Level (2 bits) */
+       ""   ,"n "  ,/* Nested Task */
+       "0 " ,"15? ",
+       ""   ,"r "  ,/* Resume Flag */
+       ""   ,"v "  ,/* Virtual Mode */
+       ""   ,"ac " ,/* Alignment Check/Access Control */
+       ""   ,"vif ",/* Virtual Interrupt Flag */
+       ""   ,"vip ",/* Virtual Interrupt Pending */
+       ""   ,"id " ,/* CPUID detection */
+       NULL
+       };
+       const char **bitstr;
+       int bit;
+
+       printf("%s=%016lx ", name, r);
+       bitstr = bitarray + 42;
+       bit = 21;
+       if ((r >> 22) != 0)
+               printf("(extra bits are set) ");
+       do {
+               if (bitstr[(r >> bit) & 1][0])
+                       fputs(bitstr[(r >> bit) & 1], stdout);
+               bitstr -= 2;
+               bit--;
+       } while (bit >= 0);
+}
+
+int run_syscall(void)
+{
+       long flags, bad_arg;
+
+       prep_args();
+
+       if (kernel_is_64bit)
+               call64_from_32(poison_regs64);
+       /*print_regs64();*/
+
+       asm("\n"
+       /* Try 6-arg syscall: pselect. It should return quickly */
+       "       push    %%ebp\n"
+       "       mov     $308, %%eax\n"     /* PSELECT */
+       "       mov     nfds, %%ebx\n"     /* ebx  arg1 */
+       "       mov     $rfds, %%ecx\n"    /* ecx  arg2 */
+       "       mov     $wfds, %%edx\n"    /* edx  arg3 */
+       "       mov     $efds, %%esi\n"    /* esi  arg4 */
+       "       mov     $timeout, %%edi\n" /* edi  arg5 */
+       "       mov     $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+       "       push    $0x200ed7\n"      /* set almost all flags */
+       "       popf\n"         /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
+       "       call    *syscall_addr\n"
+       /* Check that registers are not clobbered */
+       "       pushf\n"
+       "       pop     %%eax\n"
+       "       cld\n"
+       "       cmp     nfds, %%ebx\n"     /* ebx  arg1 */
+       "       mov     $1, %%ebx\n"
+       "       jne     1f\n"
+       "       cmp     $rfds, %%ecx\n"    /* ecx  arg2 */
+       "       mov     $2, %%ebx\n"
+       "       jne     1f\n"
+       "       cmp     $wfds, %%edx\n"    /* edx  arg3 */
+       "       mov     $3, %%ebx\n"
+       "       jne     1f\n"
+       "       cmp     $efds, %%esi\n"    /* esi  arg4 */
+       "       mov     $4, %%ebx\n"
+       "       jne     1f\n"
+       "       cmp     $timeout, %%edi\n" /* edi  arg5 */
+       "       mov     $5, %%ebx\n"
+       "       jne     1f\n"
+       "       cmpl    $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+       "       mov     $6, %%ebx\n"
+       "       jne     1f\n"
+       "       mov     $0, %%ebx\n"
+       "1:\n"
+       "       pop     %%ebp\n"
+       : "=a" (flags), "=b" (bad_arg)
+       :
+       : "cx", "dx", "si", "di"
+       );
+
+       if (kernel_is_64bit) {
+               memset(&regs64, 0x77, sizeof(regs64));
+               call64_from_32(get_regs64);
+               /*print_regs64();*/
+       }
+
+       /*
+        * On paravirt kernels, flags are not preserved across syscalls.
+        * Thus, we do not consider it a bug if some are changed.
+        * We just show ones which do.
+        */
+       if ((0x200ed7 ^ flags) != 0) {
+               print_flags("[WARN]\tFlags before", 0x200ed7);
+               print_flags("[WARN]\tFlags  after", flags);
+               print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
+       }
+
+       if (bad_arg) {
+               printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
+               return 1;
+       }
+       printf("[OK]\tArguments are preserved across syscall\n");
+
+       return check_regs64();
+}
+
+int run_syscall_twice()
+{
+       int exitcode = 0;
+       long sv;
+
+       if (syscall_addr) {
+               printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
+               exitcode = run_syscall();
+       }
+       sv = syscall_addr;
+       syscall_addr = (long)&int80;
+       printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
+       exitcode += run_syscall();
+       syscall_addr = sv;
+       return exitcode;
+}
+
+void ptrace_me()
+{
+       pid_t pid;
+
+       fflush(NULL);
+       pid = fork();
+       if (pid < 0)
+               exit(1);
+       if (pid == 0) {
+               /* child */
+               if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
+                       exit(0);
+               raise(SIGSTOP);
+               return;
+       }
+       /* parent */
+       printf("[RUN]\tRunning tests under ptrace\n");
+       while (1) {
+               int status;
+               pid = waitpid(-1, &status, __WALL);
+               if (WIFEXITED(status))
+                       exit(WEXITSTATUS(status));
+               if (WIFSIGNALED(status))
+                       exit(WTERMSIG(status));
+               if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
+                       exit(255);
+               /*
+                * Note: we do not inject sig = WSTOPSIG(status).
+                * We probably should, but careful: do not inject SIGTRAP
+                * generated by syscall entry/exit stops.
+                * That kills the child.
+                */
+               ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
+       }
+}
+
+int main(int argc, char **argv, char **envp)
+{
+       int exitcode = 0;
+       int cs;
+
+       asm("\n"
+       "       movl    %%cs, %%eax\n"
+       : "=a" (cs)
+       );
+       kernel_is_64bit = (cs == 0x23);
+       if (!kernel_is_64bit)
+               printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
+
+       /* This only works for non-static builds:
+        * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
+        */
+       syscall_addr = get_syscall(envp);
+
+       exitcode += run_syscall_twice();
+       ptrace_me();
+       exitcode += run_syscall_twice();
+
+       return exitcode;
+}
+#endif
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
new file mode 100644 (file)
index 0000000..29b644b
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * thunks_32.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+       .text
+       .code32
+
+       .global call64_from_32
+       .type call32_from_64, @function
+
+       // 4(%esp): function to call
+call64_from_32:
+       // Fetch function address
+       mov     4(%esp), %eax
+
+       // Save registers which are callee-clobbered by 64-bit ABI
+       push    %ecx
+       push    %edx
+       push    %esi
+       push    %edi
+
+       // Switch to long mode
+       jmp     $0x33,$1f
+1:     .code64
+
+       // Call the function
+       call    *%rax
+
+       // Switch to compatibility mode
+       push    $0x23  /* USER32_CS */
+       .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
+       lretq
+1:     .code32
+
+       pop     %edi
+       pop     %esi
+       pop     %edx
+       pop     %ecx
+
+       ret
+
+.size call64_from_32, .-call64_from_32