x86/asm/entry, x86/vdso: Move the vDSO code to arch/x86/entry/vdso/
authorIngo Molnar <mingo@kernel.org>
Wed, 3 Jun 2015 16:05:44 +0000 (18:05 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 3 Jun 2015 16:51:37 +0000 (18:51 +0200)
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
48 files changed:
MAINTAINERS
arch/x86/Kbuild
arch/x86/Makefile
arch/x86/entry/Makefile
arch/x86/entry/vdso/.gitignore [new file with mode: 0644]
arch/x86/entry/vdso/Makefile [new file with mode: 0644]
arch/x86/entry/vdso/checkundef.sh [new file with mode: 0755]
arch/x86/entry/vdso/vclock_gettime.c [new file with mode: 0644]
arch/x86/entry/vdso/vdso-layout.lds.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso-note.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso.lds.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso2c.c [new file with mode: 0644]
arch/x86/entry/vdso/vdso2c.h [new file with mode: 0644]
arch/x86/entry/vdso/vdso32-setup.c [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/.gitignore [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/int80.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/note.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/sigreturn.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/syscall.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/sysenter.S [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/vclock_gettime.c [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/vdso-fakesections.c [new file with mode: 0644]
arch/x86/entry/vdso/vdso32/vdso32.lds.S [new file with mode: 0644]
arch/x86/entry/vdso/vdsox32.lds.S [new file with mode: 0644]
arch/x86/entry/vdso/vgetcpu.c [new file with mode: 0644]
arch/x86/entry/vdso/vma.c [new file with mode: 0644]
arch/x86/vdso/.gitignore [deleted file]
arch/x86/vdso/Makefile [deleted file]
arch/x86/vdso/checkundef.sh [deleted file]
arch/x86/vdso/vclock_gettime.c [deleted file]
arch/x86/vdso/vdso-layout.lds.S [deleted file]
arch/x86/vdso/vdso-note.S [deleted file]
arch/x86/vdso/vdso.lds.S [deleted file]
arch/x86/vdso/vdso2c.c [deleted file]
arch/x86/vdso/vdso2c.h [deleted file]
arch/x86/vdso/vdso32-setup.c [deleted file]
arch/x86/vdso/vdso32/.gitignore [deleted file]
arch/x86/vdso/vdso32/int80.S [deleted file]
arch/x86/vdso/vdso32/note.S [deleted file]
arch/x86/vdso/vdso32/sigreturn.S [deleted file]
arch/x86/vdso/vdso32/syscall.S [deleted file]
arch/x86/vdso/vdso32/sysenter.S [deleted file]
arch/x86/vdso/vdso32/vclock_gettime.c [deleted file]
arch/x86/vdso/vdso32/vdso-fakesections.c [deleted file]
arch/x86/vdso/vdso32/vdso32.lds.S [deleted file]
arch/x86/vdso/vdsox32.lds.S [deleted file]
arch/x86/vdso/vgetcpu.c [deleted file]
arch/x86/vdso/vma.c [deleted file]

index f8e0afb708b4fe2a60f3f6a3b697bd1dd60753d4..68c0cc3654328d07d232ac5a2c7a014bb6990623 100644 (file)
@@ -10892,7 +10892,7 @@ M:      Andy Lutomirski <luto@amacapital.net>
 L:     linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
 S:     Maintained
-F:     arch/x86/vdso/
+F:     arch/x86/entry/vdso/
 
 XC2028/3028 TUNER DRIVER
 M:     Mauro Carvalho Chehab <mchehab@osg.samsung.com>
index b9b816277e722a96f795f17dd6e44caf90e04255..1538562cc720e78132d0eb31c38116b029d56ce0 100644 (file)
@@ -14,7 +14,7 @@ obj-y += kernel/
 obj-y += mm/
 
 obj-y += crypto/
-obj-y += vdso/
+
 obj-$(CONFIG_IA32_EMULATION) += ia32/
 
 obj-y += platform/
index 43e8328a23e4cf1780ae87cca8103e918d23e2b0..90b1c3bfec468002612f706a8c5d4dc0b2ee1b8e 100644 (file)
@@ -244,7 +244,7 @@ install:
 
 PHONY += vdso_install
 vdso_install:
-       $(Q)$(MAKE) $(build)=arch/x86/vdso $@
+       $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
 
 archclean:
        $(Q)rm -rf $(objtree)/arch/i386
index 4a626594fa790e603fb7619a5f6c5561f2feb4d8..c97532492ef577c0400d26adb5ff1c98fb77af81 100644 (file)
@@ -3,5 +3,7 @@
 #
 obj-y                          := entry_$(BITS).o
 
+obj-y                          += vdso/
+
 obj-$(CONFIG_IA32_EMULATION)   += ia32entry.o
 
diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore
new file mode 100644 (file)
index 0000000..aae8ffd
--- /dev/null
@@ -0,0 +1,7 @@
+vdso.lds
+vdsox32.lds
+vdso32-syscall-syms.lds
+vdso32-sysenter-syms.lds
+vdso32-int80-syms.lds
+vdso-image-*.c
+vdso2c
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
new file mode 100644 (file)
index 0000000..e970320
--- /dev/null
@@ -0,0 +1,209 @@
+#
+# Building vDSO images for x86.
+#
+
+KBUILD_CFLAGS += $(DISABLE_LTO)
+KASAN_SANITIZE := n
+
+VDSO64-$(CONFIG_X86_64)                := y
+VDSOX32-$(CONFIG_X86_X32_ABI)  := y
+VDSO32-$(CONFIG_X86_32)                := y
+VDSO32-$(CONFIG_COMPAT)                := y
+
+# files to link into the vdso
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+
+# files to link into kernel
+obj-y                          += vma.o
+
+# vDSO images to build
+vdso_img-$(VDSO64-y)           += 64
+vdso_img-$(VDSOX32-y)          += x32
+vdso_img-$(VDSO32-y)           += 32-int80
+vdso_img-$(CONFIG_COMPAT)      += 32-syscall
+vdso_img-$(VDSO32-y)           += 32-sysenter
+
+obj-$(VDSO32-y)                        += vdso32-setup.o
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.lds $(vobjs-y)
+
+# Build the vDSO image C files and link them in.
+vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
+vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
+vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
+obj-y += $(vdso_img_objs)
+targets += $(vdso_img_cfiles)
+targets += $(vdso_img_sodbg)
+.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
+       $(vdso_img-y:%=$(obj)/vdso%.so)
+
+export CPPFLAGS_vdso.lds += -P -C
+
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+                       -Wl,--no-undefined \
+                       -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
+                       $(DISABLE_LTO)
+
+$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+       $(call if_changed,vdso)
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
+hostprogs-y                    += vdso2c
+
+quiet_cmd_vdso2c = VDSO2C  $@
+define cmd_vdso2c
+       $(obj)/vdso2c $< $(<:%.dbg=%) $@
+endef
+
+$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
+       $(call if_changed,vdso2c)
+
+#
+# Don't omit frame pointers for ease of userspace debugging, but do
+# optimize sibling calls.
+#
+CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
+       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
+       -fno-omit-frame-pointer -foptimize-sibling-calls \
+       -DDISABLE_BRANCH_PROFILING
+
+$(vobjs): KBUILD_CFLAGS += $(CFL)
+
+#
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+#
+CFLAGS_REMOVE_vdso-note.o = -pg
+CFLAGS_REMOVE_vclock_gettime.o = -pg
+CFLAGS_REMOVE_vgetcpu.o = -pg
+CFLAGS_REMOVE_vvar.o = -pg
+
+#
+# X32 processes use x32 vDSO to access 64bit kernel data.
+#
+# Build x32 vDSO image:
+# 1. Compile x32 vDSO as 64bit.
+# 2. Convert object files to x32.
+# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes
+# so that it can reach 64bit address space with 64bit pointers.
+#
+
+CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
+                          -Wl,-soname=linux-vdso.so.1 \
+                          -Wl,-z,max-page-size=4096 \
+                          -Wl,-z,common-page-size=4096
+
+# 64-bit objects to re-brand as x32
+vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
+
+# x32-rebranded versions
+vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
+
+# same thing, but in the output directory
+vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
+
+# Convert 64bit object file to x32 for x32 vDSO.
+quiet_cmd_x32 = X32     $@
+      cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@
+
+$(obj)/%-x32.o: $(obj)/%.o FORCE
+       $(call if_changed,x32)
+
+targets += vdsox32.lds $(vobjx32s-y)
+
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg
+       $(call if_changed,objcopy)
+
+$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
+       $(call if_changed,vdso)
+
+#
+# Build multiple 32-bit vDSO images to choose from at boot time.
+#
+vdso32.so-$(VDSO32-y)          += int80
+vdso32.so-$(CONFIG_COMPAT)     += syscall
+vdso32.so-$(VDSO32-y)          += sysenter
+
+vdso32-images                  = $(vdso32.so-y:%=vdso32-%.so)
+
+CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
+
+# This makes sure the $(obj) subdirectory exists even though vdso32/
+# is not a kbuild sub-make subdirectory.
+override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
+
+targets += vdso32/vdso32.lds
+targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
+targets += vdso32/vclock_gettime.o
+
+$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
+
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
+KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
+KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
+KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
+KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
+$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+                                $(obj)/vdso32/vdso32.lds \
+                                $(obj)/vdso32/vclock_gettime.o \
+                                $(obj)/vdso32/note.o \
+                                $(obj)/vdso32/%.o
+       $(call if_changed,vdso)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+                      $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+                      -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
+                sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+       $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
+GCOV_PROFILE := n
+
+#
+# Install the unstripped copies of vdso*.so.  If our toolchain supports
+# build-id, install .build-id links as well.
+#
+quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
+define cmd_vdso_install
+       cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
+       if readelf -n $< |grep -q 'Build ID'; then \
+         buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+         first=`echo $$buildid | cut -b-2`; \
+         last=`echo $$buildid | cut -b3-`; \
+         mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+         ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+       fi
+endef
+
+vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
+
+$(MODLIB)/vdso: FORCE
+       @mkdir -p $(MODLIB)/vdso
+
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
+       $(call cmd,vdso_install)
+
+PHONY += vdso_install $(vdso_img_insttargets)
+vdso_install: $(vdso_img_insttargets) FORCE
+
+clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/entry/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh
new file mode 100755 (executable)
index 0000000..7ee90a9
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh
+nm="$1"
+file="$2"
+$nm "$file" | grep '^ *U' > /dev/null 2>&1
+if [ $? -eq 1 ]; then
+    exit 0
+else
+    echo "$file: undefined symbols found" >&2
+    exit 1
+fi
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
new file mode 100644 (file)
index 0000000..9793322
--- /dev/null
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ *
+ * The code should have no internal unresolved relocations.
+ * Check with readelf after changing.
+ */
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/hpet.h>
+#include <asm/vvar.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <linux/math64.h>
+#include <linux/time.h>
+
+#define gtod (&VVAR(vsyscall_gtod_data))
+
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifdef CONFIG_HPET_TIMER
+extern u8 hpet_page
+       __attribute__((visibility("hidden")));
+
+static notrace cycle_t vread_hpet(void)
+{
+       return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
+}
+#endif
+
+#ifndef BUILD_VDSO32
+
+#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+#include <asm/fixmap.h>
+#include <asm/pvclock.h>
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+       long ret;
+       asm("syscall" : "=a" (ret) :
+           "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+       return ret;
+}
+
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
+{
+       long ret;
+
+       asm("syscall" : "=a" (ret) :
+           "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+       return ret;
+}
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+
+static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
+{
+       const struct pvclock_vsyscall_time_info *pvti_base;
+       int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
+       int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
+
+       BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
+
+       pvti_base = (struct pvclock_vsyscall_time_info *)
+                   __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
+
+       return &pvti_base[offset];
+}
+
+static notrace cycle_t vread_pvclock(int *mode)
+{
+       const struct pvclock_vsyscall_time_info *pvti;
+       cycle_t ret;
+       u64 last;
+       u32 version;
+       u8 flags;
+       unsigned cpu, cpu1;
+
+
+       /*
+        * Note: hypervisor must guarantee that:
+        * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
+        * 2. that per-CPU pvclock time info is updated if the
+        *    underlying CPU changes.
+        * 3. that version is increased whenever underlying CPU
+        *    changes.
+        *
+        */
+       do {
+               cpu = __getcpu() & VGETCPU_CPU_MASK;
+               /* TODO: We can put vcpu id into higher bits of pvti.version.
+                * This will save a couple of cycles by getting rid of
+                * __getcpu() calls (Gleb).
+                */
+
+               pvti = get_pvti(cpu);
+
+               version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
+
+               /*
+                * Test we're still on the cpu as well as the version.
+                * We could have been migrated just after the first
+                * vgetcpu but before fetching the version, so we
+                * wouldn't notice a version change.
+                */
+               cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+       } while (unlikely(cpu != cpu1 ||
+                         (pvti->pvti.version & 1) ||
+                         pvti->pvti.version != version));
+
+       if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
+               *mode = VCLOCK_NONE;
+
+       /* refer to tsc.c read_tsc() comment for rationale */
+       last = gtod->cycle_last;
+
+       if (likely(ret >= last))
+               return ret;
+
+       return last;
+}
+#endif
+
+#else
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+       long ret;
+
+       asm(
+               "mov %%ebx, %%edx \n"
+               "mov %2, %%ebx \n"
+               "call __kernel_vsyscall \n"
+               "mov %%edx, %%ebx \n"
+               : "=a" (ret)
+               : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+               : "memory", "edx");
+       return ret;
+}
+
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
+{
+       long ret;
+
+       asm(
+               "mov %%ebx, %%edx \n"
+               "mov %2, %%ebx \n"
+               "call __kernel_vsyscall \n"
+               "mov %%edx, %%ebx \n"
+               : "=a" (ret)
+               : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+               : "memory", "edx");
+       return ret;
+}
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+
+static notrace cycle_t vread_pvclock(int *mode)
+{
+       *mode = VCLOCK_NONE;
+       return 0;
+}
+#endif
+
+#endif
+
+notrace static cycle_t vread_tsc(void)
+{
+       cycle_t ret;
+       u64 last;
+
+       /*
+        * Empirically, a fence (of type that depends on the CPU)
+        * before rdtsc is enough to ensure that rdtsc is ordered
+        * with respect to loads.  The various CPU manuals are unclear
+        * as to whether rdtsc can be reordered with later loads,
+        * but no one has ever seen it happen.
+        */
+       rdtsc_barrier();
+       ret = (cycle_t)__native_read_tsc();
+
+       last = gtod->cycle_last;
+
+       if (likely(ret >= last))
+               return ret;
+
+       /*
+        * GCC likes to generate cmov here, but this branch is extremely
+        * predictable (it's just a funciton of time and the likely is
+        * very likely) and there's a data dependence, so force GCC
+        * to generate a branch instead.  I don't barrier() because
+        * we don't actually need a barrier, and if this function
+        * ever gets inlined it will generate worse code.
+        */
+       asm volatile ("");
+       return last;
+}
+
+notrace static inline u64 vgetsns(int *mode)
+{
+       u64 v;
+       cycles_t cycles;
+
+       if (gtod->vclock_mode == VCLOCK_TSC)
+               cycles = vread_tsc();
+#ifdef CONFIG_HPET_TIMER
+       else if (gtod->vclock_mode == VCLOCK_HPET)
+               cycles = vread_hpet();
+#endif
+#ifdef CONFIG_PARAVIRT_CLOCK
+       else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
+               cycles = vread_pvclock(mode);
+#endif
+       else
+               return 0;
+       v = (cycles - gtod->cycle_last) & gtod->mask;
+       return v * gtod->mult;
+}
+
+/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
+notrace static int __always_inline do_realtime(struct timespec *ts)
+{
+       unsigned long seq;
+       u64 ns;
+       int mode;
+
+       do {
+               seq = gtod_read_begin(gtod);
+               mode = gtod->vclock_mode;
+               ts->tv_sec = gtod->wall_time_sec;
+               ns = gtod->wall_time_snsec;
+               ns += vgetsns(&mode);
+               ns >>= gtod->shift;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
+
+       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+       ts->tv_nsec = ns;
+
+       return mode;
+}
+
+notrace static int __always_inline do_monotonic(struct timespec *ts)
+{
+       unsigned long seq;
+       u64 ns;
+       int mode;
+
+       do {
+               seq = gtod_read_begin(gtod);
+               mode = gtod->vclock_mode;
+               ts->tv_sec = gtod->monotonic_time_sec;
+               ns = gtod->monotonic_time_snsec;
+               ns += vgetsns(&mode);
+               ns >>= gtod->shift;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
+
+       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+       ts->tv_nsec = ns;
+
+       return mode;
+}
+
+notrace static void do_realtime_coarse(struct timespec *ts)
+{
+       unsigned long seq;
+       do {
+               seq = gtod_read_begin(gtod);
+               ts->tv_sec = gtod->wall_time_coarse_sec;
+               ts->tv_nsec = gtod->wall_time_coarse_nsec;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
+}
+
+notrace static void do_monotonic_coarse(struct timespec *ts)
+{
+       unsigned long seq;
+       do {
+               seq = gtod_read_begin(gtod);
+               ts->tv_sec = gtod->monotonic_time_coarse_sec;
+               ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
+       } while (unlikely(gtod_read_retry(gtod, seq)));
+}
+
+notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+       switch (clock) {
+       case CLOCK_REALTIME:
+               if (do_realtime(ts) == VCLOCK_NONE)
+                       goto fallback;
+               break;
+       case CLOCK_MONOTONIC:
+               if (do_monotonic(ts) == VCLOCK_NONE)
+                       goto fallback;
+               break;
+       case CLOCK_REALTIME_COARSE:
+               do_realtime_coarse(ts);
+               break;
+       case CLOCK_MONOTONIC_COARSE:
+               do_monotonic_coarse(ts);
+               break;
+       default:
+               goto fallback;
+       }
+
+       return 0;
+fallback:
+       return vdso_fallback_gettime(clock, ts);
+}
+int clock_gettime(clockid_t, struct timespec *)
+       __attribute__((weak, alias("__vdso_clock_gettime")));
+
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       if (likely(tv != NULL)) {
+               if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+                       return vdso_fallback_gtod(tv, tz);
+               tv->tv_usec /= 1000;
+       }
+       if (unlikely(tz != NULL)) {
+               tz->tz_minuteswest = gtod->tz_minuteswest;
+               tz->tz_dsttime = gtod->tz_dsttime;
+       }
+
+       return 0;
+}
+int gettimeofday(struct timeval *, struct timezone *)
+       __attribute__((weak, alias("__vdso_gettimeofday")));
+
+/*
+ * This will break when the xtime seconds get inaccurate, but that is
+ * unlikely
+ */
+notrace time_t __vdso_time(time_t *t)
+{
+       /* This is atomic on x86 so we don't need any locks. */
+       time_t result = ACCESS_ONCE(gtod->wall_time_sec);
+
+       if (t)
+               *t = result;
+       return result;
+}
+int time(time_t *t)
+       __attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
new file mode 100644 (file)
index 0000000..de2c921
--- /dev/null
@@ -0,0 +1,118 @@
+#include <asm/vdso.h>
+
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+#if defined(BUILD_VDSO64)
+# define SHDR_SIZE 64
+#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32)
+# define SHDR_SIZE 40
+#else
+# error unknown VDSO target
+#endif
+
+#define NUM_FAKE_SHDRS 13
+
+SECTIONS
+{
+       /*
+        * User/kernel shared data is before the vDSO.  This may be a little
+        * uglier than putting it after the vDSO, but it avoids issues with
+        * non-allocatable things that dangle past the end of the PT_LOAD
+        * segment.
+        */
+
+       vvar_start = . - 2 * PAGE_SIZE;
+       vvar_page = vvar_start;
+
+       /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+#undef EMIT_VVAR
+
+       hpet_page = vvar_start + PAGE_SIZE;
+
+       . = SIZEOF_HEADERS;
+
+       .hash           : { *(.hash) }                  :text
+       .gnu.hash       : { *(.gnu.hash) }
+       .dynsym         : { *(.dynsym) }
+       .dynstr         : { *(.dynstr) }
+       .gnu.version    : { *(.gnu.version) }
+       .gnu.version_d  : { *(.gnu.version_d) }
+       .gnu.version_r  : { *(.gnu.version_r) }
+
+       .dynamic        : { *(.dynamic) }               :text   :dynamic
+
+       .rodata         : {
+               *(.rodata*)
+               *(.data*)
+               *(.sdata*)
+               *(.got.plt) *(.got)
+               *(.gnu.linkonce.d.*)
+               *(.bss*)
+               *(.dynbss*)
+               *(.gnu.linkonce.b.*)
+
+               /*
+                * Ideally this would live in a C file, but that won't
+                * work cleanly for x32 until we start building the x32
+                * C code using an x32 toolchain.
+                */
+               VDSO_FAKE_SECTION_TABLE_START = .;
+               . = . + NUM_FAKE_SHDRS * SHDR_SIZE;
+               VDSO_FAKE_SECTION_TABLE_END = .;
+       }                                               :text
+
+       .fake_shstrtab  : { *(.fake_shstrtab) }         :text
+
+
+       .note           : { *(.note.*) }                :text   :note
+
+       .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
+       .eh_frame       : { KEEP (*(.eh_frame)) }       :text
+
+
+       /*
+        * Text is well-separated from actual data: there's plenty of
+        * stuff that isn't used at runtime in between.
+        */
+
+       .text           : { *(.text*) }                 :text   =0x90909090,
+
+       /*
+        * At the end so that eu-elflint stays happy when vdso2c strips
+        * these.  A better implementation would avoid allocating space
+        * for these.
+        */
+       .altinstructions        : { *(.altinstructions) }       :text
+       .altinstr_replacement   : { *(.altinstr_replacement) }  :text
+
+       /DISCARD/ : {
+               *(.discard)
+               *(.discard.*)
+               *(__bug_table)
+       }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME        0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+       text            PT_LOAD         FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+       dynamic         PT_DYNAMIC      FLAGS(4);               /* PF_R */
+       note            PT_NOTE         FLAGS(4);               /* PF_R */
+       eh_frame_hdr    PT_GNU_EH_FRAME;
+}
diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S
new file mode 100644 (file)
index 0000000..79a071e
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
new file mode 100644 (file)
index 0000000..6807932
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define BUILD_VDSO64
+
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+       LINUX_2.6 {
+       global:
+               clock_gettime;
+               __vdso_clock_gettime;
+               gettimeofday;
+               __vdso_gettimeofday;
+               getcpu;
+               __vdso_getcpu;
+               time;
+               __vdso_time;
+       local: *;
+       };
+}
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
new file mode 100644 (file)
index 0000000..8627db2
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+ * vdso2c - A vdso image preparation tool
+ * Copyright (c) 2014 Andy Lutomirski and others
+ * Licensed under the GPL v2
+ *
+ * vdso2c requires stripped and unstripped input.  It would be trivial
+ * to fully strip the input in here, but, for reasons described below,
+ * we need to write a section table.  Doing this is more or less
+ * equivalent to dropping all non-allocatable sections, but it's
+ * easier to let objcopy handle that instead of doing it ourselves.
+ * If we ever need to do something fancier than what objcopy provides,
+ * it would be straightforward to add here.
+ *
+ * We're keep a section table for a few reasons:
+ *
+ * The Go runtime had a couple of bugs: it would read the section
+ * table to try to figure out how many dynamic symbols there were (it
+ * shouldn't have looked at the section table at all) and, if there
+ * were no SHT_SYNDYM section table entry, it would use an
+ * uninitialized value for the number of symbols.  An empty DYNSYM
+ * table would work, but I see no reason not to write a valid one (and
+ * keep full performance for old Go programs).  This hack is only
+ * needed on x86_64.
+ *
+ * The bug was introduced on 2012-08-31 by:
+ * https://code.google.com/p/go/source/detail?r=56ea40aac72b
+ * and was fixed on 2014-06-13 by:
+ * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
+ *
+ * Binutils has issues debugging the vDSO: it reads the section table to
+ * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
+ * would break build-id if we removed the section table.  Binutils
+ * also requires that shstrndx != 0.  See:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
+ *
+ * elfutils might not look for PT_NOTE if there is a section table at
+ * all.  I don't know whether this matters for any practical purpose.
+ *
+ * For simplicity, rather than hacking up a partial section table, we
+ * just write a mostly complete one.  We omit non-dynamic symbols,
+ * though, since they're rather large.
+ *
+ * Once binutils gets fixed, we might be able to drop this for all but
+ * the 64-bit vdso, since build-id only works in kernel RPMs, and
+ * systems that update to new enough kernel RPMs will likely update
+ * binutils in sync.  build-id has never worked for home-built kernel
+ * RPMs without manual symlinking, and I suspect that no one ever does
+ * that.
+ */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <err.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+
+#include <tools/le_byteshift.h>
+
+#include <linux/elf.h>
+#include <linux/types.h>
+
+const char *outfilename;
+
+/* Symbols that we need in vdso2c. */
+enum {
+       sym_vvar_start,
+       sym_vvar_page,
+       sym_hpet_page,
+       sym_VDSO_FAKE_SECTION_TABLE_START,
+       sym_VDSO_FAKE_SECTION_TABLE_END,
+};
+
+const int special_pages[] = {
+       sym_vvar_page,
+       sym_hpet_page,
+};
+
+struct vdso_sym {
+       const char *name;
+       bool export;
+};
+
+struct vdso_sym required_syms[] = {
+       [sym_vvar_start] = {"vvar_start", true},
+       [sym_vvar_page] = {"vvar_page", true},
+       [sym_hpet_page] = {"hpet_page", true},
+       [sym_VDSO_FAKE_SECTION_TABLE_START] = {
+               "VDSO_FAKE_SECTION_TABLE_START", false
+       },
+       [sym_VDSO_FAKE_SECTION_TABLE_END] = {
+               "VDSO_FAKE_SECTION_TABLE_END", false
+       },
+       {"VDSO32_NOTE_MASK", true},
+       {"VDSO32_SYSENTER_RETURN", true},
+       {"__kernel_vsyscall", true},
+       {"__kernel_sigreturn", true},
+       {"__kernel_rt_sigreturn", true},
+};
+
+__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
+static void fail(const char *format, ...)
+{
+       va_list ap;
+       va_start(ap, format);
+       fprintf(stderr, "Error: ");
+       vfprintf(stderr, format, ap);
+       if (outfilename)
+               unlink(outfilename);
+       exit(1);
+       va_end(ap);
+}
+
+/*
+ * Evil macros for little-endian reads and writes
+ */
+#define GLE(x, bits, ifnot)                                            \
+       __builtin_choose_expr(                                          \
+               (sizeof(*(x)) == bits/8),                               \
+               (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
+
+extern void bad_get_le(void);
+#define LAST_GLE(x)                                                    \
+       __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
+
+#define GET_LE(x)                                                      \
+       GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x))))
+
+#define PLE(x, val, bits, ifnot)                                       \
+       __builtin_choose_expr(                                          \
+               (sizeof(*(x)) == bits/8),                               \
+               put_unaligned_le##bits((val), (x)), ifnot)
+
+extern void bad_put_le(void);
+#define LAST_PLE(x, val)                                               \
+       __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le())
+
+#define PUT_LE(x, val)                                 \
+       PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val))))
+
+
+#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
+
+#define BITSFUNC3(name, bits, suffix) name##bits##suffix
+#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
+#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
+
+#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+#define ELF_BITS 64
+#include "vdso2c.h"
+#undef ELF_BITS
+
+#define ELF_BITS 32
+#include "vdso2c.h"
+#undef ELF_BITS
+
+static void go(void *raw_addr, size_t raw_len,
+              void *stripped_addr, size_t stripped_len,
+              FILE *outfile, const char *name)
+{
+       Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
+
+       if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
+               go64(raw_addr, raw_len, stripped_addr, stripped_len,
+                    outfile, name);
+       } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
+               go32(raw_addr, raw_len, stripped_addr, stripped_len,
+                    outfile, name);
+       } else {
+               fail("unknown ELF class\n");
+       }
+}
+
+static void map_input(const char *name, void **addr, size_t *len, int prot)
+{
+       off_t tmp_len;
+
+       int fd = open(name, O_RDONLY);
+       if (fd == -1)
+               err(1, "%s", name);
+
+       tmp_len = lseek(fd, 0, SEEK_END);
+       if (tmp_len == (off_t)-1)
+               err(1, "lseek");
+       *len = (size_t)tmp_len;
+
+       *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
+       if (*addr == MAP_FAILED)
+               err(1, "mmap");
+
+       close(fd);
+}
+
+int main(int argc, char **argv)
+{
+       size_t raw_len, stripped_len;
+       void *raw_addr, *stripped_addr;
+       FILE *outfile;
+       char *name, *tmp;
+       int namelen;
+
+       if (argc != 4) {
+               printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
+               return 1;
+       }
+
+       /*
+        * Figure out the struct name.  If we're writing to a .so file,
+        * generate raw output insted.
+        */
+       name = strdup(argv[3]);
+       namelen = strlen(name);
+       if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
+               name = NULL;
+       } else {
+               tmp = strrchr(name, '/');
+               if (tmp)
+                       name = tmp + 1;
+               tmp = strchr(name, '.');
+               if (tmp)
+                       *tmp = '\0';
+               for (tmp = name; *tmp; tmp++)
+                       if (*tmp == '-')
+                               *tmp = '_';
+       }
+
+       map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
+       map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
+
+       outfilename = argv[3];
+       outfile = fopen(outfilename, "w");
+       if (!outfile)
+               err(1, "%s", argv[2]);
+
+       go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
+
+       munmap(raw_addr, raw_len);
+       munmap(stripped_addr, stripped_len);
+       fclose(outfile);
+
+       return 0;
+}
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
new file mode 100644 (file)
index 0000000..0224987
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * This file is included twice from vdso2c.c.  It generates code for 32-bit
+ * and 64-bit vDSOs.  We need both for 64-bit builds, since 32-bit vDSOs
+ * are built for 32-bit userspace.
+ */
+
+static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
+                        void *stripped_addr, size_t stripped_len,
+                        FILE *outfile, const char *name)
+{
+       int found_load = 0;
+       unsigned long load_size = -1;  /* Work around bogus warning */
+       unsigned long mapping_size;
+       ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
+       int i;
+       unsigned long j;
+       ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
+               *alt_sec = NULL;
+       ELF(Dyn) *dyn = 0, *dyn_end = 0;
+       const char *secstrings;
+       INT_BITS syms[NSYMS] = {};
+
+       ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
+
+       /* Walk the segment table. */
+       for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
+               if (GET_LE(&pt[i].p_type) == PT_LOAD) {
+                       if (found_load)
+                               fail("multiple PT_LOAD segs\n");
+
+                       if (GET_LE(&pt[i].p_offset) != 0 ||
+                           GET_LE(&pt[i].p_vaddr) != 0)
+                               fail("PT_LOAD in wrong place\n");
+
+                       if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz))
+                               fail("cannot handle memsz != filesz\n");
+
+                       load_size = GET_LE(&pt[i].p_memsz);
+                       found_load = 1;
+               } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
+                       dyn = raw_addr + GET_LE(&pt[i].p_offset);
+                       dyn_end = raw_addr + GET_LE(&pt[i].p_offset) +
+                               GET_LE(&pt[i].p_memsz);
+               }
+       }
+       if (!found_load)
+               fail("no PT_LOAD seg\n");
+
+       if (stripped_len < load_size)
+               fail("stripped input is too short\n");
+
+       /* Walk the dynamic table */
+       for (i = 0; dyn + i < dyn_end &&
+                    GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
+               typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
+               if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA ||
+                   tag == DT_RELENT || tag == DT_TEXTREL)
+                       fail("vdso image contains dynamic relocations\n");
+       }
+
+       /* Walk the section table */
+       secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
+               GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
+       secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset);
+       for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
+               ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) +
+                       GET_LE(&hdr->e_shentsize) * i;
+               if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
+                       symtab_hdr = sh;
+
+               if (!strcmp(secstrings + GET_LE(&sh->sh_name),
+                           ".altinstructions"))
+                       alt_sec = sh;
+       }
+
+       if (!symtab_hdr)
+               fail("no symbol table\n");
+
+       strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
+               GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
+
+       /* Walk the symbol table */
+       for (i = 0;
+            i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
+            i++) {
+               int k;
+               ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
+                       GET_LE(&symtab_hdr->sh_entsize) * i;
+               const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
+                       GET_LE(&sym->st_name);
+
+               for (k = 0; k < NSYMS; k++) {
+                       if (!strcmp(name, required_syms[k].name)) {
+                               if (syms[k]) {
+                                       fail("duplicate symbol %s\n",
+                                            required_syms[k].name);
+                               }
+
+                               /*
+                                * Careful: we use negative addresses, but
+                                * st_value is unsigned, so we rely
+                                * on syms[k] being a signed type of the
+                                * correct width.
+                                */
+                               syms[k] = GET_LE(&sym->st_value);
+                       }
+               }
+       }
+
+       /* Validate mapping addresses. */
+       for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
+               INT_BITS symval = syms[special_pages[i]];
+
+               if (!symval)
+                       continue;  /* The mapping isn't used; ignore it. */
+
+               if (symval % 4096)
+                       fail("%s must be a multiple of 4096\n",
+                            required_syms[i].name);
+               if (symval + 4096 < syms[sym_vvar_start])
+                       fail("%s underruns vvar_start\n",
+                            required_syms[i].name);
+               if (symval + 4096 > 0)
+                       fail("%s is on the wrong side of the vdso text\n",
+                            required_syms[i].name);
+       }
+       if (syms[sym_vvar_start] % 4096)
+               fail("vvar_begin must be a multiple of 4096\n");
+
+       if (!name) {
+               fwrite(stripped_addr, stripped_len, 1, outfile);
+               return;
+       }
+
+       mapping_size = (stripped_len + 4095) / 4096 * 4096;
+
+       fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
+       fprintf(outfile, "#include <linux/linkage.h>\n");
+       fprintf(outfile, "#include <asm/page_types.h>\n");
+       fprintf(outfile, "#include <asm/vdso.h>\n");
+       fprintf(outfile, "\n");
+       fprintf(outfile,
+               "static unsigned char raw_data[%lu] __page_aligned_data = {",
+               mapping_size);
+       for (j = 0; j < stripped_len; j++) {
+               if (j % 10 == 0)
+                       fprintf(outfile, "\n\t");
+               fprintf(outfile, "0x%02X, ",
+                       (int)((unsigned char *)stripped_addr)[j]);
+       }
+       fprintf(outfile, "\n};\n\n");
+
+       fprintf(outfile, "static struct page *pages[%lu];\n\n",
+               mapping_size / 4096);
+
+       fprintf(outfile, "const struct vdso_image %s = {\n", name);
+       fprintf(outfile, "\t.data = raw_data,\n");
+       fprintf(outfile, "\t.size = %lu,\n", mapping_size);
+       fprintf(outfile, "\t.text_mapping = {\n");
+       fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
+       fprintf(outfile, "\t\t.pages = pages,\n");
+       fprintf(outfile, "\t},\n");
+       if (alt_sec) {
+               fprintf(outfile, "\t.alt = %lu,\n",
+                       (unsigned long)GET_LE(&alt_sec->sh_offset));
+               fprintf(outfile, "\t.alt_len = %lu,\n",
+                       (unsigned long)GET_LE(&alt_sec->sh_size));
+       }
+       for (i = 0; i < NSYMS; i++) {
+               if (required_syms[i].export && syms[i])
+                       fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
+                               required_syms[i].name, (int64_t)syms[i]);
+       }
+       fprintf(outfile, "};\n");
+}
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
new file mode 100644 (file)
index 0000000..e904c27
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * (C) Copyright 2002 Linus Torvalds
+ * Portions based on the vdso-randomization code from exec-shield:
+ * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
+ *
+ * This file contains the needed initializations to support sysenter.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/vdso.h>
+
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_DEFAULT   0
+#else
+#define VDSO_DEFAULT   1
+#endif
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT;
+
+static int __init vdso32_setup(char *s)
+{
+       vdso32_enabled = simple_strtoul(s, NULL, 0);
+
+       if (vdso32_enabled > 1)
+               pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
+
+       return 1;
+}
+
+/*
+ * For consistency, the argument vdso32=[012] affects the 32-bit vDSO
+ * behavior on both 64-bit and 32-bit kernels.
+ * On 32-bit kernels, vdso=[012] means the same thing.
+ */
+__setup("vdso32=", vdso32_setup);
+
+#ifdef CONFIG_X86_32
+__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
+#endif
+
+#ifdef CONFIG_X86_64
+
+#define        vdso32_sysenter()       (boot_cpu_has(X86_FEATURE_SYSENTER32))
+#define        vdso32_syscall()        (boot_cpu_has(X86_FEATURE_SYSCALL32))
+
+#else  /* CONFIG_X86_32 */
+
+#define vdso32_sysenter()      (boot_cpu_has(X86_FEATURE_SEP))
+#define vdso32_syscall()       (0)
+
+#endif /* CONFIG_X86_64 */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+const struct vdso_image *selected_vdso32;
+#endif
+
+int __init sysenter_setup(void)
+{
+#ifdef CONFIG_COMPAT
+       if (vdso32_syscall())
+               selected_vdso32 = &vdso_image_32_syscall;
+       else
+#endif
+       if (vdso32_sysenter())
+               selected_vdso32 = &vdso_image_32_sysenter;
+       else
+               selected_vdso32 = &vdso_image_32_int80;
+
+       init_vdso_image(selected_vdso32);
+
+       return 0;
+}
+
+#ifdef CONFIG_X86_64
+
+subsys_initcall(sysenter_setup);
+
+#ifdef CONFIG_SYSCTL
+/* Register vsyscall32 into the ABI table */
+#include <linux/sysctl.h>
+
+static struct ctl_table abi_table2[] = {
+       {
+               .procname       = "vsyscall32",
+               .data           = &vdso32_enabled,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
+       {}
+};
+
+static struct ctl_table abi_root_table2[] = {
+       {
+               .procname = "abi",
+               .mode = 0555,
+               .child = abi_table2
+       },
+       {}
+};
+
+static __init int ia32_binfmt_init(void)
+{
+       register_sysctl_table(abi_root_table2);
+       return 0;
+}
+__initcall(ia32_binfmt_init);
+#endif /* CONFIG_SYSCTL */
+
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/entry/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore
new file mode 100644 (file)
index 0000000..e45fba9
--- /dev/null
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/x86/entry/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S
new file mode 100644 (file)
index 0000000..b15b7c0
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Code for the vDSO.  This version uses the old int $0x80 method.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#include "sigreturn.S"
+
+       .text
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+       ALIGN
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       int $0x80
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+       .long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIEDLSI:
+       .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+       .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0
+       .align 4
+.LENDFDEDLSI:
+       .previous
+
+       /*
+        * Pad out the segment to match the size of the sysenter.S version.
+        */
+VDSO32_vsyscall_eh_frame_size = 0x40
+       .section .data,"aw",@progbits
+       .space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
+       .previous
diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
new file mode 100644 (file)
index 0000000..c83f257
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+/* Ideally this would use UTS_NAME, but using a quoted string here
+   doesn't work. Remember to change this when changing the
+   kernel's name. */
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
+
+#ifdef CONFIG_XEN
+/*
+ * Add a special note telling glibc's dynamic linker a fake hardware
+ * flavor that it will use to choose the search path for libraries in the
+ * same way it uses real hardware capabilities like "mmx".
+ * We supply "nosegneg" as the fake capability, to indicate that we
+ * do not like negative offsets in instructions using segment overrides,
+ * since we implement those inefficiently.  This makes it possible to
+ * install libraries optimized to avoid those access patterns in someplace
+ * like /lib/i686/tls/nosegneg.  Note that an /etc/ld.so.conf.d/file
+ * corresponding to the bits here is needed to make ldconfig work right.
+ * It should contain:
+ *     hwcap 1 nosegneg
+ * to match the mapping of bit to name that we give here.
+ *
+ * At runtime, the fake hardware feature will be considered to be present
+ * if its bit is set in the mask word.  So, we start with the mask 0, and
+ * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
+ */
+
+#include "../../xen/vdso.h"    /* Defines VDSO_NOTE_NONEGSEG_BIT.  */
+
+ELFNOTE_START(GNU, 2, "a")
+       .long 1                 /* ncaps */
+VDSO32_NOTE_MASK:              /* Symbol used by arch/x86/xen/setup.c */
+       .long 0                 /* mask */
+       .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
+ELFNOTE_END
+#endif
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
new file mode 100644 (file)
index 0000000..d7ec4e2
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Common code for the sigreturn entry points in vDSO images.
+ * So far this code is the same for both int80 and sysenter versions.
+ * This file is #include'd by int80.S et al to define them first thing.
+ * The kernel assumes that the addresses of these routines are constant
+ * for all vDSO implementations.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd_32.h>
+#include <asm/asm-offsets.h>
+
+#ifndef SYSCALL_ENTER_KERNEL
+#define        SYSCALL_ENTER_KERNEL    int $0x80
+#endif
+
+       .text
+       .globl __kernel_sigreturn
+       .type __kernel_sigreturn,@function
+       nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
+       ALIGN
+__kernel_sigreturn:
+.LSTART_sigreturn:
+       popl %eax               /* XXX does this mean it needs unwind info? */
+       movl $__NR_sigreturn, %eax
+       SYSCALL_ENTER_KERNEL
+.LEND_sigreturn:
+       nop
+       .size __kernel_sigreturn,.-.LSTART_sigreturn
+
+       .globl __kernel_rt_sigreturn
+       .type __kernel_rt_sigreturn,@function
+       ALIGN
+__kernel_rt_sigreturn:
+.LSTART_rt_sigreturn:
+       movl $__NR_rt_sigreturn, %eax
+       SYSCALL_ENTER_KERNEL
+.LEND_rt_sigreturn:
+       nop
+       .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI1:
+       .long .LENDCIEDLSI1-.LSTARTCIEDLSI1
+.LSTARTCIEDLSI1:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zRS"           /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0                 /* DW_CFA_nop */
+       .align 4
+.LENDCIEDLSI1:
+       .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
+.LSTARTFDEDLSI1:
+       .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
+       /* HACK: The dwarf2 unwind routines will subtract 1 from the
+          return address to get an address in the middle of the
+          presumed call instruction.  Since we didn't get here via
+          a call, we need to include the nop before the real start
+          to make up for it.  */
+       .long .LSTART_sigreturn-1-.     /* PC-relative start address */
+       .long .LEND_sigreturn-.LSTART_sigreturn+1
+       .uleb128 0                      /* Augmentation */
+       /* What follows are the instructions for the table generation.
+          We record the locations of each register saved.  This is
+          complicated by the fact that the "CFA" is always assumed to
+          be the value of the stack pointer in the caller.  This means
+          that we must define the CFA of this body of code to be the
+          saved value of the stack pointer in the sigcontext.  Which
+          also means that there is no fixed relation to the other
+          saved registers, which means that we must use DW_CFA_expression
+          to compute their addresses.  It also means that when we
+          adjust the stack with the popl, we have to do it all over again.  */
+
+#define do_cfa_expr(offset)                                            \
+       .byte 0x0f;                     /* DW_CFA_def_cfa_expression */ \
+       .uleb128 1f-0f;                 /*   length */                  \
+0:     .byte 0x74;                     /*     DW_OP_breg4 */           \
+       .sleb128 offset;                /*      offset */               \
+       .byte 0x06;                     /*     DW_OP_deref */           \
+1:
+
+#define do_expr(regno, offset)                                         \
+       .byte 0x10;                     /* DW_CFA_expression */         \
+       .uleb128 regno;                 /*   regno */                   \
+       .uleb128 1f-0f;                 /*   length */                  \
+0:     .byte 0x74;                     /*     DW_OP_breg4 */           \
+       .sleb128 offset;                /*       offset */              \
+1:
+
+       do_cfa_expr(IA32_SIGCONTEXT_sp+4)
+       do_expr(0, IA32_SIGCONTEXT_ax+4)
+       do_expr(1, IA32_SIGCONTEXT_cx+4)
+       do_expr(2, IA32_SIGCONTEXT_dx+4)
+       do_expr(3, IA32_SIGCONTEXT_bx+4)
+       do_expr(5, IA32_SIGCONTEXT_bp+4)
+       do_expr(6, IA32_SIGCONTEXT_si+4)
+       do_expr(7, IA32_SIGCONTEXT_di+4)
+       do_expr(8, IA32_SIGCONTEXT_ip+4)
+
+       .byte 0x42      /* DW_CFA_advance_loc 2 -- nop; popl eax. */
+
+       do_cfa_expr(IA32_SIGCONTEXT_sp)
+       do_expr(0, IA32_SIGCONTEXT_ax)
+       do_expr(1, IA32_SIGCONTEXT_cx)
+       do_expr(2, IA32_SIGCONTEXT_dx)
+       do_expr(3, IA32_SIGCONTEXT_bx)
+       do_expr(5, IA32_SIGCONTEXT_bp)
+       do_expr(6, IA32_SIGCONTEXT_si)
+       do_expr(7, IA32_SIGCONTEXT_di)
+       do_expr(8, IA32_SIGCONTEXT_ip)
+
+       .align 4
+.LENDFDEDLSI1:
+
+       .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
+.LSTARTFDEDLSI2:
+       .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
+       /* HACK: See above wrt unwind library assumptions.  */
+       .long .LSTART_rt_sigreturn-1-.  /* PC-relative start address */
+       .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
+       .uleb128 0                      /* Augmentation */
+       /* What follows are the instructions for the table generation.
+          We record the locations of each register saved.  This is
+          slightly less complicated than the above, since we don't
+          modify the stack pointer in the process.  */
+
+       do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
+       do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
+       do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
+       do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
+       do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
+       do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
+       do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
+       do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
+       do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
+
+       .align 4
+.LENDFDEDLSI2:
+       .previous
diff --git a/arch/x86/entry/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S
new file mode 100644 (file)
index 0000000..6b286bb
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Code for the vDSO.  This version uses the syscall instruction.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#define SYSCALL_ENTER_KERNEL   syscall
+#include "sigreturn.S"
+
+#include <asm/segment.h>
+
+       .text
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+       ALIGN
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       push    %ebp
+.Lpush_ebp:
+       movl    %ecx, %ebp
+       syscall
+       movl    %ebp, %ecx
+       popl    %ebp
+.Lpop_ebp:
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+       .long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIE:
+
+       .long .LENDFDE1-.LSTARTFDE1     /* Length FDE */
+.LSTARTFDE1:
+       .long .LSTARTFDE1-.LSTARTFRAME  /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0                      /* Augmentation length */
+       /* What follows are the instructions for the table generation.
+          We have to record all changes of the stack pointer.  */
+       .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .uleb128 8
+       .byte 0x85, 0x02        /* DW_CFA_offset %ebp -8 */
+       .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
+       .byte 0xc5              /* DW_CFA_restore %ebp */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .uleb128 4
+       .align 4
+.LENDFDE1:
+       .previous
+
+       /*
+        * Pad out the segment to match the size of the sysenter.S version.
+        */
+VDSO32_vsyscall_eh_frame_size = 0x40
+       .section .data,"aw",@progbits
+       .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
+       .previous
diff --git a/arch/x86/entry/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S
new file mode 100644 (file)
index 0000000..e354bce
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Code for the vDSO.  This version uses the sysenter instruction.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#include "sigreturn.S"
+
+/*
+ * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
+ * %ecx itself for arg2. The pushing is because the sysexit instruction
+ * (found in entry.S) requires that we clobber %ecx with the desired %esp.
+ * User code might expect that %ecx is unclobbered though, as it would be
+ * for returning via the iret instruction, so we must push and pop.
+ *
+ * The caller puts arg3 in %edx, which the sysexit instruction requires
+ * for %eip. Thus, exactly as for arg2, we must push and pop.
+ *
+ * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
+ * instruction clobbers %esp, the user's %esp won't even survive entry
+ * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
+ * arg6 from the stack.
+ *
+ * You can not use this vsyscall for the clone() syscall because the
+ * three words on the parent stack do not get copied to the child.
+ */
+       .text
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+       ALIGN
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       push %ecx
+.Lpush_ecx:
+       push %edx
+.Lpush_edx:
+       push %ebp
+.Lenter_kernel:
+       movl %esp,%ebp
+       sysenter
+
+       /* 7: align return point with nop's to make disassembly easier */
+       .space 7,0x90
+
+       /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
+       int $0x80
+       /* 16: System call normal return point is here! */
+VDSO32_SYSENTER_RETURN:        /* Symbol used by sysenter.c via vdso32-syms.h */
+       pop %ebp
+.Lpop_ebp:
+       pop %edx
+.Lpop_edx:
+       pop %ecx
+.Lpop_ecx:
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+       .long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIEDLSI:
+       .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+       .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0
+       /* What follows are the instructions for the table generation.
+          We have to record all changes of the stack pointer.  */
+       .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x08              /* RA at offset 8 now */
+       .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x0c              /* RA at offset 12 now */
+       .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x10              /* RA at offset 16 now */
+       .byte 0x85, 0x04        /* DW_CFA_offset %ebp -16 */
+       /* Finally the epilogue.  */
+       .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x0c              /* RA at offset 12 now */
+       .byte 0xc5              /* DW_CFA_restore %ebp */
+       .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x08              /* RA at offset 8 now */
+       .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x04              /* RA at offset 4 now */
+       .align 4
+.LENDFDEDLSI:
+       .previous
+
+       /*
+        * Emit a symbol with the size of this .eh_frame data,
+        * to verify it matches the other versions.
+        */
+VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
new file mode 100644 (file)
index 0000000..175cc72
--- /dev/null
@@ -0,0 +1,30 @@
+#define BUILD_VDSO32
+
+#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+#undef CONFIG_OPTIMIZE_INLINING
+#endif
+
+#undef CONFIG_X86_PPRO_FENCE
+
+#ifdef CONFIG_X86_64
+
+/*
+ * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration
+ */
+#undef CONFIG_64BIT
+#undef CONFIG_X86_64
+#undef CONFIG_ILLEGAL_POINTER_VALUE
+#undef CONFIG_SPARSEMEM_VMEMMAP
+#undef CONFIG_NR_CPUS
+
+#define CONFIG_X86_32 1
+#define CONFIG_PAGE_OFFSET 0
+#define CONFIG_ILLEGAL_POINTER_VALUE 0
+#define CONFIG_NR_CPUS 1
+
+#define BUILD_VDSO32_64
+
+#endif
+
+#include "../vclock_gettime.c"
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
new file mode 100644 (file)
index 0000000..541468e
--- /dev/null
@@ -0,0 +1 @@
+#include "../vdso-fakesections.c"
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
new file mode 100644 (file)
index 0000000..31056cf
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Linker script for 32-bit vDSO.
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#include <asm/page.h>
+
+#define BUILD_VDSO32
+
+#include "../vdso-layout.lds.S"
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_vsyscall);
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION
+{
+       LINUX_2.6 {
+       global:
+               __vdso_clock_gettime;
+               __vdso_gettimeofday;
+               __vdso_time;
+       };
+
+       LINUX_2.5 {
+       global:
+               __kernel_vsyscall;
+               __kernel_sigreturn;
+               __kernel_rt_sigreturn;
+       local: *;
+       };
+}
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
new file mode 100644 (file)
index 0000000..697c11e
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Linker script for x32 vDSO.
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define BUILD_VDSOX32
+
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+       LINUX_2.6 {
+       global:
+               __vdso_clock_gettime;
+               __vdso_gettimeofday;
+               __vdso_getcpu;
+               __vdso_time;
+       local: *;
+       };
+}
diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
new file mode 100644 (file)
index 0000000..8ec3d1f
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of getcpu()
+ */
+
+#include <linux/kernel.h>
+#include <linux/getcpu.h>
+#include <linux/time.h>
+#include <asm/vgtod.h>
+
+notrace long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+{
+       unsigned int p;
+
+       p = __getcpu();
+
+       if (cpu)
+               *cpu = p & VGETCPU_CPU_MASK;
+       if (node)
+               *node = p >> 12;
+       return 0;
+}
+
+long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+       __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
new file mode 100644 (file)
index 0000000..1c9f750
--- /dev/null
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2007 Andi Kleen, SUSE Labs.
+ * Subject to the GPL, v.2
+ *
+ * This contains most of the x86 vDSO kernel-side code.
+ */
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/elf.h>
+#include <linux/cpu.h>
+#include <asm/vgtod.h>
+#include <asm/proto.h>
+#include <asm/vdso.h>
+#include <asm/vvar.h>
+#include <asm/page.h>
+#include <asm/hpet.h>
+#include <asm/desc.h>
+
+#if defined(CONFIG_X86_64)
+unsigned int __read_mostly vdso64_enabled = 1;
+#endif
+
+void __init init_vdso_image(const struct vdso_image *image)
+{
+       int i;
+       int npages = (image->size) / PAGE_SIZE;
+
+       BUG_ON(image->size % PAGE_SIZE != 0);
+       for (i = 0; i < npages; i++)
+               image->text_mapping.pages[i] =
+                       virt_to_page(image->data + i*PAGE_SIZE);
+
+       apply_alternatives((struct alt_instr *)(image->data + image->alt),
+                          (struct alt_instr *)(image->data + image->alt +
+                                               image->alt_len));
+}
+
+struct linux_binprm;
+
+/*
+ * Put the vdso above the (randomized) stack with another randomized
+ * offset.  This way there is no hole in the middle of address space.
+ * To save memory make sure it is still in the same PTE as the stack
+ * top.  This doesn't give that many random bits.
+ *
+ * Note that this algorithm is imperfect: the distribution of the vdso
+ * start address within a PMD is biased toward the end.
+ *
+ * Only used for the 64-bit and x32 vdsos.
+ */
+static unsigned long vdso_addr(unsigned long start, unsigned len)
+{
+#ifdef CONFIG_X86_32
+       return 0;
+#else
+       unsigned long addr, end;
+       unsigned offset;
+
+       /*
+        * Round up the start address.  It can start out unaligned as a result
+        * of stack start randomization.
+        */
+       start = PAGE_ALIGN(start);
+
+       /* Round the lowest possible end address up to a PMD boundary. */
+       end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+       if (end >= TASK_SIZE_MAX)
+               end = TASK_SIZE_MAX;
+       end -= len;
+
+       if (end > start) {
+               offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+               addr = start + (offset << PAGE_SHIFT);
+       } else {
+               addr = start;
+       }
+
+       /*
+        * Forcibly align the final address in case we have a hardware
+        * issue that requires alignment for performance reasons.
+        */
+       addr = align_vdso_addr(addr);
+
+       return addr;
+#endif
+}
+
+static int map_vdso(const struct vdso_image *image, bool calculate_addr)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long addr, text_start;
+       int ret = 0;
+       static struct page *no_pages[] = {NULL};
+       static struct vm_special_mapping vvar_mapping = {
+               .name = "[vvar]",
+               .pages = no_pages,
+       };
+
+       if (calculate_addr) {
+               addr = vdso_addr(current->mm->start_stack,
+                                image->size - image->sym_vvar_start);
+       } else {
+               addr = 0;
+       }
+
+       down_write(&mm->mmap_sem);
+
+       addr = get_unmapped_area(NULL, addr,
+                                image->size - image->sym_vvar_start, 0, 0);
+       if (IS_ERR_VALUE(addr)) {
+               ret = addr;
+               goto up_fail;
+       }
+
+       text_start = addr - image->sym_vvar_start;
+       current->mm->context.vdso = (void __user *)text_start;
+
+       /*
+        * MAYWRITE to allow gdb to COW and set breakpoints
+        */
+       vma = _install_special_mapping(mm,
+                                      text_start,
+                                      image->size,
+                                      VM_READ|VM_EXEC|
+                                      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+                                      &image->text_mapping);
+
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto up_fail;
+       }
+
+       vma = _install_special_mapping(mm,
+                                      addr,
+                                      -image->sym_vvar_start,
+                                      VM_READ|VM_MAYREAD,
+                                      &vvar_mapping);
+
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto up_fail;
+       }
+
+       if (image->sym_vvar_page)
+               ret = remap_pfn_range(vma,
+                                     text_start + image->sym_vvar_page,
+                                     __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
+                                     PAGE_SIZE,
+                                     PAGE_READONLY);
+
+       if (ret)
+               goto up_fail;
+
+#ifdef CONFIG_HPET_TIMER
+       if (hpet_address && image->sym_hpet_page) {
+               ret = io_remap_pfn_range(vma,
+                       text_start + image->sym_hpet_page,
+                       hpet_address >> PAGE_SHIFT,
+                       PAGE_SIZE,
+                       pgprot_noncached(PAGE_READONLY));
+
+               if (ret)
+                       goto up_fail;
+       }
+#endif
+
+up_fail:
+       if (ret)
+               current->mm->context.vdso = NULL;
+
+       up_write(&mm->mmap_sem);
+       return ret;
+}
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+static int load_vdso32(void)
+{
+       int ret;
+
+       if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
+               return 0;
+
+       ret = map_vdso(selected_vdso32, false);
+       if (ret)
+               return ret;
+
+       if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
+               current_thread_info()->sysenter_return =
+                       current->mm->context.vdso +
+                       selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
+
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_X86_64
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+       if (!vdso64_enabled)
+               return 0;
+
+       return map_vdso(&vdso_image_64, true);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
+                                      int uses_interp)
+{
+#ifdef CONFIG_X86_X32_ABI
+       if (test_thread_flag(TIF_X32)) {
+               if (!vdso64_enabled)
+                       return 0;
+
+               return map_vdso(&vdso_image_x32, true);
+       }
+#endif
+
+       return load_vdso32();
+}
+#endif
+#else
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+       return load_vdso32();
+}
+#endif
+
+#ifdef CONFIG_X86_64
+static __init int vdso_setup(char *s)
+{
+       vdso64_enabled = simple_strtoul(s, NULL, 0);
+       return 0;
+}
+__setup("vdso=", vdso_setup);
+#endif
+
+#ifdef CONFIG_X86_64
+static void vgetcpu_cpu_init(void *arg)
+{
+       int cpu = smp_processor_id();
+       struct desc_struct d = { };
+       unsigned long node = 0;
+#ifdef CONFIG_NUMA
+       node = cpu_to_node(cpu);
+#endif
+       if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+               write_rdtscp_aux((node << 12) | cpu);
+
+       /*
+        * Store cpu number in limit so that it can be loaded
+        * quickly in user space in vgetcpu. (12 bits for the CPU
+        * and 8 bits for the node)
+        */
+       d.limit0 = cpu | ((node & 0xf) << 12);
+       d.limit = node >> 4;
+       d.type = 5;             /* RO data, expand down, accessed */
+       d.dpl = 3;              /* Visible to user code */
+       d.s = 1;                /* Not a system segment */
+       d.p = 1;                /* Present */
+       d.d = 1;                /* 32-bit */
+
+       write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+}
+
+static int
+vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
+{
+       long cpu = (long)arg;
+
+       if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
+               smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
+
+       return NOTIFY_DONE;
+}
+
+static int __init init_vdso(void)
+{
+       init_vdso_image(&vdso_image_64);
+
+#ifdef CONFIG_X86_X32_ABI
+       init_vdso_image(&vdso_image_x32);
+#endif
+
+       cpu_notifier_register_begin();
+
+       on_each_cpu(vgetcpu_cpu_init, NULL, 1);
+       /* notifier priority > KVM */
+       __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
+
+       cpu_notifier_register_done();
+
+       return 0;
+}
+subsys_initcall(init_vdso);
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
deleted file mode 100644 (file)
index aae8ffd..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-vdso.lds
-vdsox32.lds
-vdso32-syscall-syms.lds
-vdso32-sysenter-syms.lds
-vdso32-int80-syms.lds
-vdso-image-*.c
-vdso2c
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
deleted file mode 100644 (file)
index e970320..0000000
+++ /dev/null
@@ -1,209 +0,0 @@
-#
-# Building vDSO images for x86.
-#
-
-KBUILD_CFLAGS += $(DISABLE_LTO)
-KASAN_SANITIZE := n
-
-VDSO64-$(CONFIG_X86_64)                := y
-VDSOX32-$(CONFIG_X86_X32_ABI)  := y
-VDSO32-$(CONFIG_X86_32)                := y
-VDSO32-$(CONFIG_COMPAT)                := y
-
-# files to link into the vdso
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
-
-# files to link into kernel
-obj-y                          += vma.o
-
-# vDSO images to build
-vdso_img-$(VDSO64-y)           += 64
-vdso_img-$(VDSOX32-y)          += x32
-vdso_img-$(VDSO32-y)           += 32-int80
-vdso_img-$(CONFIG_COMPAT)      += 32-syscall
-vdso_img-$(VDSO32-y)           += 32-sysenter
-
-obj-$(VDSO32-y)                        += vdso32-setup.o
-
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
-
-$(obj)/vdso.o: $(obj)/vdso.so
-
-targets += vdso.lds $(vobjs-y)
-
-# Build the vDSO image C files and link them in.
-vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
-vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
-vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
-obj-y += $(vdso_img_objs)
-targets += $(vdso_img_cfiles)
-targets += $(vdso_img_sodbg)
-.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
-       $(vdso_img-y:%=$(obj)/vdso%.so)
-
-export CPPFLAGS_vdso.lds += -P -C
-
-VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-                       -Wl,--no-undefined \
-                       -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
-                       $(DISABLE_LTO)
-
-$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
-       $(call if_changed,vdso)
-
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
-hostprogs-y                    += vdso2c
-
-quiet_cmd_vdso2c = VDSO2C  $@
-define cmd_vdso2c
-       $(obj)/vdso2c $< $(<:%.dbg=%) $@
-endef
-
-$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
-       $(call if_changed,vdso2c)
-
-#
-# Don't omit frame pointers for ease of userspace debugging, but do
-# optimize sibling calls.
-#
-CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
-       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-       -fno-omit-frame-pointer -foptimize-sibling-calls \
-       -DDISABLE_BRANCH_PROFILING
-
-$(vobjs): KBUILD_CFLAGS += $(CFL)
-
-#
-# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
-#
-CFLAGS_REMOVE_vdso-note.o = -pg
-CFLAGS_REMOVE_vclock_gettime.o = -pg
-CFLAGS_REMOVE_vgetcpu.o = -pg
-CFLAGS_REMOVE_vvar.o = -pg
-
-#
-# X32 processes use x32 vDSO to access 64bit kernel data.
-#
-# Build x32 vDSO image:
-# 1. Compile x32 vDSO as 64bit.
-# 2. Convert object files to x32.
-# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes
-# so that it can reach 64bit address space with 64bit pointers.
-#
-
-CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
-                          -Wl,-soname=linux-vdso.so.1 \
-                          -Wl,-z,max-page-size=4096 \
-                          -Wl,-z,common-page-size=4096
-
-# 64-bit objects to re-brand as x32
-vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
-
-# x32-rebranded versions
-vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
-
-# same thing, but in the output directory
-vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
-
-# Convert 64bit object file to x32 for x32 vDSO.
-quiet_cmd_x32 = X32     $@
-      cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@
-
-$(obj)/%-x32.o: $(obj)/%.o FORCE
-       $(call if_changed,x32)
-
-targets += vdsox32.lds $(vobjx32s-y)
-
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg
-       $(call if_changed,objcopy)
-
-$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
-       $(call if_changed,vdso)
-
-#
-# Build multiple 32-bit vDSO images to choose from at boot time.
-#
-vdso32.so-$(VDSO32-y)          += int80
-vdso32.so-$(CONFIG_COMPAT)     += syscall
-vdso32.so-$(VDSO32-y)          += sysenter
-
-vdso32-images                  = $(vdso32.so-y:%=vdso32-%.so)
-
-CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
-
-# This makes sure the $(obj) subdirectory exists even though vdso32/
-# is not a kbuild sub-make subdirectory.
-override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
-
-targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
-targets += vdso32/vclock_gettime.o
-
-$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
-
-KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
-
-KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
-KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
-KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
-KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
-KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
-
-$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
-                                $(obj)/vdso32/vdso32.lds \
-                                $(obj)/vdso32/vclock_gettime.o \
-                                $(obj)/vdso32/note.o \
-                                $(obj)/vdso32/%.o
-       $(call if_changed,vdso)
-
-#
-# The DSO images are built using a special linker script.
-#
-quiet_cmd_vdso = VDSO    $@
-      cmd_vdso = $(CC) -nostdlib -o $@ \
-                      $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-                      -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
-                sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
-       $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
-GCOV_PROFILE := n
-
-#
-# Install the unstripped copies of vdso*.so.  If our toolchain supports
-# build-id, install .build-id links as well.
-#
-quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
-define cmd_vdso_install
-       cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
-       if readelf -n $< |grep -q 'Build ID'; then \
-         buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
-         first=`echo $$buildid | cut -b-2`; \
-         last=`echo $$buildid | cut -b3-`; \
-         mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
-         ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
-       fi
-endef
-
-vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
-
-$(MODLIB)/vdso: FORCE
-       @mkdir -p $(MODLIB)/vdso
-
-$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
-       $(call cmd,vdso_install)
-
-PHONY += vdso_install $(vdso_img_insttargets)
-vdso_install: $(vdso_img_insttargets) FORCE
-
-clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/vdso/checkundef.sh b/arch/x86/vdso/checkundef.sh
deleted file mode 100755 (executable)
index 7ee90a9..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-nm="$1"
-file="$2"
-$nm "$file" | grep '^ *U' > /dev/null 2>&1
-if [ $? -eq 1 ]; then
-    exit 0
-else
-    echo "$file: undefined symbols found" >&2
-    exit 1
-fi
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
deleted file mode 100644 (file)
index 9793322..0000000
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- * Subject to the GNU Public License, v.2
- *
- * Fast user context implementation of clock_gettime, gettimeofday, and time.
- *
- * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
- *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * The code should have no internal unresolved relocations.
- * Check with readelf after changing.
- */
-
-#include <uapi/linux/time.h>
-#include <asm/vgtod.h>
-#include <asm/hpet.h>
-#include <asm/vvar.h>
-#include <asm/unistd.h>
-#include <asm/msr.h>
-#include <linux/math64.h>
-#include <linux/time.h>
-
-#define gtod (&VVAR(vsyscall_gtod_data))
-
-extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
-extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
-extern time_t __vdso_time(time_t *t);
-
-#ifdef CONFIG_HPET_TIMER
-extern u8 hpet_page
-       __attribute__((visibility("hidden")));
-
-static notrace cycle_t vread_hpet(void)
-{
-       return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
-}
-#endif
-
-#ifndef BUILD_VDSO32
-
-#include <linux/kernel.h>
-#include <asm/vsyscall.h>
-#include <asm/fixmap.h>
-#include <asm/pvclock.h>
-
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
-{
-       long ret;
-       asm("syscall" : "=a" (ret) :
-           "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
-       return ret;
-}
-
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
-       long ret;
-
-       asm("syscall" : "=a" (ret) :
-           "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
-       return ret;
-}
-
-#ifdef CONFIG_PARAVIRT_CLOCK
-
-static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
-{
-       const struct pvclock_vsyscall_time_info *pvti_base;
-       int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
-       int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
-
-       BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
-
-       pvti_base = (struct pvclock_vsyscall_time_info *)
-                   __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
-
-       return &pvti_base[offset];
-}
-
-static notrace cycle_t vread_pvclock(int *mode)
-{
-       const struct pvclock_vsyscall_time_info *pvti;
-       cycle_t ret;
-       u64 last;
-       u32 version;
-       u8 flags;
-       unsigned cpu, cpu1;
-
-
-       /*
-        * Note: hypervisor must guarantee that:
-        * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
-        * 2. that per-CPU pvclock time info is updated if the
-        *    underlying CPU changes.
-        * 3. that version is increased whenever underlying CPU
-        *    changes.
-        *
-        */
-       do {
-               cpu = __getcpu() & VGETCPU_CPU_MASK;
-               /* TODO: We can put vcpu id into higher bits of pvti.version.
-                * This will save a couple of cycles by getting rid of
-                * __getcpu() calls (Gleb).
-                */
-
-               pvti = get_pvti(cpu);
-
-               version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
-
-               /*
-                * Test we're still on the cpu as well as the version.
-                * We could have been migrated just after the first
-                * vgetcpu but before fetching the version, so we
-                * wouldn't notice a version change.
-                */
-               cpu1 = __getcpu() & VGETCPU_CPU_MASK;
-       } while (unlikely(cpu != cpu1 ||
-                         (pvti->pvti.version & 1) ||
-                         pvti->pvti.version != version));
-
-       if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
-               *mode = VCLOCK_NONE;
-
-       /* refer to tsc.c read_tsc() comment for rationale */
-       last = gtod->cycle_last;
-
-       if (likely(ret >= last))
-               return ret;
-
-       return last;
-}
-#endif
-
-#else
-
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
-{
-       long ret;
-
-       asm(
-               "mov %%ebx, %%edx \n"
-               "mov %2, %%ebx \n"
-               "call __kernel_vsyscall \n"
-               "mov %%edx, %%ebx \n"
-               : "=a" (ret)
-               : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
-               : "memory", "edx");
-       return ret;
-}
-
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
-       long ret;
-
-       asm(
-               "mov %%ebx, %%edx \n"
-               "mov %2, %%ebx \n"
-               "call __kernel_vsyscall \n"
-               "mov %%edx, %%ebx \n"
-               : "=a" (ret)
-               : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
-               : "memory", "edx");
-       return ret;
-}
-
-#ifdef CONFIG_PARAVIRT_CLOCK
-
-static notrace cycle_t vread_pvclock(int *mode)
-{
-       *mode = VCLOCK_NONE;
-       return 0;
-}
-#endif
-
-#endif
-
-notrace static cycle_t vread_tsc(void)
-{
-       cycle_t ret;
-       u64 last;
-
-       /*
-        * Empirically, a fence (of type that depends on the CPU)
-        * before rdtsc is enough to ensure that rdtsc is ordered
-        * with respect to loads.  The various CPU manuals are unclear
-        * as to whether rdtsc can be reordered with later loads,
-        * but no one has ever seen it happen.
-        */
-       rdtsc_barrier();
-       ret = (cycle_t)__native_read_tsc();
-
-       last = gtod->cycle_last;
-
-       if (likely(ret >= last))
-               return ret;
-
-       /*
-        * GCC likes to generate cmov here, but this branch is extremely
-        * predictable (it's just a funciton of time and the likely is
-        * very likely) and there's a data dependence, so force GCC
-        * to generate a branch instead.  I don't barrier() because
-        * we don't actually need a barrier, and if this function
-        * ever gets inlined it will generate worse code.
-        */
-       asm volatile ("");
-       return last;
-}
-
-notrace static inline u64 vgetsns(int *mode)
-{
-       u64 v;
-       cycles_t cycles;
-
-       if (gtod->vclock_mode == VCLOCK_TSC)
-               cycles = vread_tsc();
-#ifdef CONFIG_HPET_TIMER
-       else if (gtod->vclock_mode == VCLOCK_HPET)
-               cycles = vread_hpet();
-#endif
-#ifdef CONFIG_PARAVIRT_CLOCK
-       else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
-               cycles = vread_pvclock(mode);
-#endif
-       else
-               return 0;
-       v = (cycles - gtod->cycle_last) & gtod->mask;
-       return v * gtod->mult;
-}
-
-/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
-notrace static int __always_inline do_realtime(struct timespec *ts)
-{
-       unsigned long seq;
-       u64 ns;
-       int mode;
-
-       do {
-               seq = gtod_read_begin(gtod);
-               mode = gtod->vclock_mode;
-               ts->tv_sec = gtod->wall_time_sec;
-               ns = gtod->wall_time_snsec;
-               ns += vgetsns(&mode);
-               ns >>= gtod->shift;
-       } while (unlikely(gtod_read_retry(gtod, seq)));
-
-       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
-       ts->tv_nsec = ns;
-
-       return mode;
-}
-
-notrace static int __always_inline do_monotonic(struct timespec *ts)
-{
-       unsigned long seq;
-       u64 ns;
-       int mode;
-
-       do {
-               seq = gtod_read_begin(gtod);
-               mode = gtod->vclock_mode;
-               ts->tv_sec = gtod->monotonic_time_sec;
-               ns = gtod->monotonic_time_snsec;
-               ns += vgetsns(&mode);
-               ns >>= gtod->shift;
-       } while (unlikely(gtod_read_retry(gtod, seq)));
-
-       ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
-       ts->tv_nsec = ns;
-
-       return mode;
-}
-
-notrace static void do_realtime_coarse(struct timespec *ts)
-{
-       unsigned long seq;
-       do {
-               seq = gtod_read_begin(gtod);
-               ts->tv_sec = gtod->wall_time_coarse_sec;
-               ts->tv_nsec = gtod->wall_time_coarse_nsec;
-       } while (unlikely(gtod_read_retry(gtod, seq)));
-}
-
-notrace static void do_monotonic_coarse(struct timespec *ts)
-{
-       unsigned long seq;
-       do {
-               seq = gtod_read_begin(gtod);
-               ts->tv_sec = gtod->monotonic_time_coarse_sec;
-               ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
-       } while (unlikely(gtod_read_retry(gtod, seq)));
-}
-
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
-{
-       switch (clock) {
-       case CLOCK_REALTIME:
-               if (do_realtime(ts) == VCLOCK_NONE)
-                       goto fallback;
-               break;
-       case CLOCK_MONOTONIC:
-               if (do_monotonic(ts) == VCLOCK_NONE)
-                       goto fallback;
-               break;
-       case CLOCK_REALTIME_COARSE:
-               do_realtime_coarse(ts);
-               break;
-       case CLOCK_MONOTONIC_COARSE:
-               do_monotonic_coarse(ts);
-               break;
-       default:
-               goto fallback;
-       }
-
-       return 0;
-fallback:
-       return vdso_fallback_gettime(clock, ts);
-}
-int clock_gettime(clockid_t, struct timespec *)
-       __attribute__((weak, alias("__vdso_clock_gettime")));
-
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       if (likely(tv != NULL)) {
-               if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
-                       return vdso_fallback_gtod(tv, tz);
-               tv->tv_usec /= 1000;
-       }
-       if (unlikely(tz != NULL)) {
-               tz->tz_minuteswest = gtod->tz_minuteswest;
-               tz->tz_dsttime = gtod->tz_dsttime;
-       }
-
-       return 0;
-}
-int gettimeofday(struct timeval *, struct timezone *)
-       __attribute__((weak, alias("__vdso_gettimeofday")));
-
-/*
- * This will break when the xtime seconds get inaccurate, but that is
- * unlikely
- */
-notrace time_t __vdso_time(time_t *t)
-{
-       /* This is atomic on x86 so we don't need any locks. */
-       time_t result = ACCESS_ONCE(gtod->wall_time_sec);
-
-       if (t)
-               *t = result;
-       return result;
-}
-int time(time_t *t)
-       __attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
deleted file mode 100644 (file)
index de2c921..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-#include <asm/vdso.h>
-
-/*
- * Linker script for vDSO.  This is an ELF shared object prelinked to
- * its virtual address, and with only one read-only segment.
- * This script controls its layout.
- */
-
-#if defined(BUILD_VDSO64)
-# define SHDR_SIZE 64
-#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32)
-# define SHDR_SIZE 40
-#else
-# error unknown VDSO target
-#endif
-
-#define NUM_FAKE_SHDRS 13
-
-SECTIONS
-{
-       /*
-        * User/kernel shared data is before the vDSO.  This may be a little
-        * uglier than putting it after the vDSO, but it avoids issues with
-        * non-allocatable things that dangle past the end of the PT_LOAD
-        * segment.
-        */
-
-       vvar_start = . - 2 * PAGE_SIZE;
-       vvar_page = vvar_start;
-
-       /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-
-       hpet_page = vvar_start + PAGE_SIZE;
-
-       . = SIZEOF_HEADERS;
-
-       .hash           : { *(.hash) }                  :text
-       .gnu.hash       : { *(.gnu.hash) }
-       .dynsym         : { *(.dynsym) }
-       .dynstr         : { *(.dynstr) }
-       .gnu.version    : { *(.gnu.version) }
-       .gnu.version_d  : { *(.gnu.version_d) }
-       .gnu.version_r  : { *(.gnu.version_r) }
-
-       .dynamic        : { *(.dynamic) }               :text   :dynamic
-
-       .rodata         : {
-               *(.rodata*)
-               *(.data*)
-               *(.sdata*)
-               *(.got.plt) *(.got)
-               *(.gnu.linkonce.d.*)
-               *(.bss*)
-               *(.dynbss*)
-               *(.gnu.linkonce.b.*)
-
-               /*
-                * Ideally this would live in a C file, but that won't
-                * work cleanly for x32 until we start building the x32
-                * C code using an x32 toolchain.
-                */
-               VDSO_FAKE_SECTION_TABLE_START = .;
-               . = . + NUM_FAKE_SHDRS * SHDR_SIZE;
-               VDSO_FAKE_SECTION_TABLE_END = .;
-       }                                               :text
-
-       .fake_shstrtab  : { *(.fake_shstrtab) }         :text
-
-
-       .note           : { *(.note.*) }                :text   :note
-
-       .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
-       .eh_frame       : { KEEP (*(.eh_frame)) }       :text
-
-
-       /*
-        * Text is well-separated from actual data: there's plenty of
-        * stuff that isn't used at runtime in between.
-        */
-
-       .text           : { *(.text*) }                 :text   =0x90909090,
-
-       /*
-        * At the end so that eu-elflint stays happy when vdso2c strips
-        * these.  A better implementation would avoid allocating space
-        * for these.
-        */
-       .altinstructions        : { *(.altinstructions) }       :text
-       .altinstr_replacement   : { *(.altinstr_replacement) }  :text
-
-       /DISCARD/ : {
-               *(.discard)
-               *(.discard.*)
-               *(__bug_table)
-       }
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME        0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-       text            PT_LOAD         FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-       dynamic         PT_DYNAMIC      FLAGS(4);               /* PF_R */
-       note            PT_NOTE         FLAGS(4);               /* PF_R */
-       eh_frame_hdr    PT_GNU_EH_FRAME;
-}
diff --git a/arch/x86/vdso/vdso-note.S b/arch/x86/vdso/vdso-note.S
deleted file mode 100644 (file)
index 79a071e..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-       .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
deleted file mode 100644 (file)
index 6807932..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Linker script for 64-bit vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#define BUILD_VDSO64
-
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-       LINUX_2.6 {
-       global:
-               clock_gettime;
-               __vdso_clock_gettime;
-               gettimeofday;
-               __vdso_gettimeofday;
-               getcpu;
-               __vdso_getcpu;
-               time;
-               __vdso_time;
-       local: *;
-       };
-}
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
deleted file mode 100644 (file)
index 8627db2..0000000
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * vdso2c - A vdso image preparation tool
- * Copyright (c) 2014 Andy Lutomirski and others
- * Licensed under the GPL v2
- *
- * vdso2c requires stripped and unstripped input.  It would be trivial
- * to fully strip the input in here, but, for reasons described below,
- * we need to write a section table.  Doing this is more or less
- * equivalent to dropping all non-allocatable sections, but it's
- * easier to let objcopy handle that instead of doing it ourselves.
- * If we ever need to do something fancier than what objcopy provides,
- * it would be straightforward to add here.
- *
- * We're keep a section table for a few reasons:
- *
- * The Go runtime had a couple of bugs: it would read the section
- * table to try to figure out how many dynamic symbols there were (it
- * shouldn't have looked at the section table at all) and, if there
- * were no SHT_SYNDYM section table entry, it would use an
- * uninitialized value for the number of symbols.  An empty DYNSYM
- * table would work, but I see no reason not to write a valid one (and
- * keep full performance for old Go programs).  This hack is only
- * needed on x86_64.
- *
- * The bug was introduced on 2012-08-31 by:
- * https://code.google.com/p/go/source/detail?r=56ea40aac72b
- * and was fixed on 2014-06-13 by:
- * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
- *
- * Binutils has issues debugging the vDSO: it reads the section table to
- * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
- * would break build-id if we removed the section table.  Binutils
- * also requires that shstrndx != 0.  See:
- * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
- *
- * elfutils might not look for PT_NOTE if there is a section table at
- * all.  I don't know whether this matters for any practical purpose.
- *
- * For simplicity, rather than hacking up a partial section table, we
- * just write a mostly complete one.  We omit non-dynamic symbols,
- * though, since they're rather large.
- *
- * Once binutils gets fixed, we might be able to drop this for all but
- * the 64-bit vdso, since build-id only works in kernel RPMs, and
- * systems that update to new enough kernel RPMs will likely update
- * binutils in sync.  build-id has never worked for home-built kernel
- * RPMs without manual symlinking, and I suspect that no one ever does
- * that.
- */
-
-#include <inttypes.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <fcntl.h>
-#include <err.h>
-
-#include <sys/mman.h>
-#include <sys/types.h>
-
-#include <tools/le_byteshift.h>
-
-#include <linux/elf.h>
-#include <linux/types.h>
-
-const char *outfilename;
-
-/* Symbols that we need in vdso2c. */
-enum {
-       sym_vvar_start,
-       sym_vvar_page,
-       sym_hpet_page,
-       sym_VDSO_FAKE_SECTION_TABLE_START,
-       sym_VDSO_FAKE_SECTION_TABLE_END,
-};
-
-const int special_pages[] = {
-       sym_vvar_page,
-       sym_hpet_page,
-};
-
-struct vdso_sym {
-       const char *name;
-       bool export;
-};
-
-struct vdso_sym required_syms[] = {
-       [sym_vvar_start] = {"vvar_start", true},
-       [sym_vvar_page] = {"vvar_page", true},
-       [sym_hpet_page] = {"hpet_page", true},
-       [sym_VDSO_FAKE_SECTION_TABLE_START] = {
-               "VDSO_FAKE_SECTION_TABLE_START", false
-       },
-       [sym_VDSO_FAKE_SECTION_TABLE_END] = {
-               "VDSO_FAKE_SECTION_TABLE_END", false
-       },
-       {"VDSO32_NOTE_MASK", true},
-       {"VDSO32_SYSENTER_RETURN", true},
-       {"__kernel_vsyscall", true},
-       {"__kernel_sigreturn", true},
-       {"__kernel_rt_sigreturn", true},
-};
-
-__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
-static void fail(const char *format, ...)
-{
-       va_list ap;
-       va_start(ap, format);
-       fprintf(stderr, "Error: ");
-       vfprintf(stderr, format, ap);
-       if (outfilename)
-               unlink(outfilename);
-       exit(1);
-       va_end(ap);
-}
-
-/*
- * Evil macros for little-endian reads and writes
- */
-#define GLE(x, bits, ifnot)                                            \
-       __builtin_choose_expr(                                          \
-               (sizeof(*(x)) == bits/8),                               \
-               (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
-
-extern void bad_get_le(void);
-#define LAST_GLE(x)                                                    \
-       __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
-
-#define GET_LE(x)                                                      \
-       GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x))))
-
-#define PLE(x, val, bits, ifnot)                                       \
-       __builtin_choose_expr(                                          \
-               (sizeof(*(x)) == bits/8),                               \
-               put_unaligned_le##bits((val), (x)), ifnot)
-
-extern void bad_put_le(void);
-#define LAST_PLE(x, val)                                               \
-       __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le())
-
-#define PUT_LE(x, val)                                 \
-       PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val))))
-
-
-#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
-
-#define BITSFUNC3(name, bits, suffix) name##bits##suffix
-#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
-#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
-
-#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
-
-#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
-#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
-#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
-
-#define ELF_BITS 64
-#include "vdso2c.h"
-#undef ELF_BITS
-
-#define ELF_BITS 32
-#include "vdso2c.h"
-#undef ELF_BITS
-
-static void go(void *raw_addr, size_t raw_len,
-              void *stripped_addr, size_t stripped_len,
-              FILE *outfile, const char *name)
-{
-       Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
-
-       if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
-               go64(raw_addr, raw_len, stripped_addr, stripped_len,
-                    outfile, name);
-       } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
-               go32(raw_addr, raw_len, stripped_addr, stripped_len,
-                    outfile, name);
-       } else {
-               fail("unknown ELF class\n");
-       }
-}
-
-static void map_input(const char *name, void **addr, size_t *len, int prot)
-{
-       off_t tmp_len;
-
-       int fd = open(name, O_RDONLY);
-       if (fd == -1)
-               err(1, "%s", name);
-
-       tmp_len = lseek(fd, 0, SEEK_END);
-       if (tmp_len == (off_t)-1)
-               err(1, "lseek");
-       *len = (size_t)tmp_len;
-
-       *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
-       if (*addr == MAP_FAILED)
-               err(1, "mmap");
-
-       close(fd);
-}
-
-int main(int argc, char **argv)
-{
-       size_t raw_len, stripped_len;
-       void *raw_addr, *stripped_addr;
-       FILE *outfile;
-       char *name, *tmp;
-       int namelen;
-
-       if (argc != 4) {
-               printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
-               return 1;
-       }
-
-       /*
-        * Figure out the struct name.  If we're writing to a .so file,
-        * generate raw output insted.
-        */
-       name = strdup(argv[3]);
-       namelen = strlen(name);
-       if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
-               name = NULL;
-       } else {
-               tmp = strrchr(name, '/');
-               if (tmp)
-                       name = tmp + 1;
-               tmp = strchr(name, '.');
-               if (tmp)
-                       *tmp = '\0';
-               for (tmp = name; *tmp; tmp++)
-                       if (*tmp == '-')
-                               *tmp = '_';
-       }
-
-       map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
-       map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
-
-       outfilename = argv[3];
-       outfile = fopen(outfilename, "w");
-       if (!outfile)
-               err(1, "%s", argv[2]);
-
-       go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
-
-       munmap(raw_addr, raw_len);
-       munmap(stripped_addr, stripped_len);
-       fclose(outfile);
-
-       return 0;
-}
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
deleted file mode 100644 (file)
index 0224987..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * This file is included twice from vdso2c.c.  It generates code for 32-bit
- * and 64-bit vDSOs.  We need both for 64-bit builds, since 32-bit vDSOs
- * are built for 32-bit userspace.
- */
-
-static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
-                        void *stripped_addr, size_t stripped_len,
-                        FILE *outfile, const char *name)
-{
-       int found_load = 0;
-       unsigned long load_size = -1;  /* Work around bogus warning */
-       unsigned long mapping_size;
-       ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
-       int i;
-       unsigned long j;
-       ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
-               *alt_sec = NULL;
-       ELF(Dyn) *dyn = 0, *dyn_end = 0;
-       const char *secstrings;
-       INT_BITS syms[NSYMS] = {};
-
-       ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
-
-       /* Walk the segment table. */
-       for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
-               if (GET_LE(&pt[i].p_type) == PT_LOAD) {
-                       if (found_load)
-                               fail("multiple PT_LOAD segs\n");
-
-                       if (GET_LE(&pt[i].p_offset) != 0 ||
-                           GET_LE(&pt[i].p_vaddr) != 0)
-                               fail("PT_LOAD in wrong place\n");
-
-                       if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz))
-                               fail("cannot handle memsz != filesz\n");
-
-                       load_size = GET_LE(&pt[i].p_memsz);
-                       found_load = 1;
-               } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
-                       dyn = raw_addr + GET_LE(&pt[i].p_offset);
-                       dyn_end = raw_addr + GET_LE(&pt[i].p_offset) +
-                               GET_LE(&pt[i].p_memsz);
-               }
-       }
-       if (!found_load)
-               fail("no PT_LOAD seg\n");
-
-       if (stripped_len < load_size)
-               fail("stripped input is too short\n");
-
-       /* Walk the dynamic table */
-       for (i = 0; dyn + i < dyn_end &&
-                    GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
-               typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
-               if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA ||
-                   tag == DT_RELENT || tag == DT_TEXTREL)
-                       fail("vdso image contains dynamic relocations\n");
-       }
-
-       /* Walk the section table */
-       secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
-               GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
-       secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset);
-       for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
-               ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) +
-                       GET_LE(&hdr->e_shentsize) * i;
-               if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
-                       symtab_hdr = sh;
-
-               if (!strcmp(secstrings + GET_LE(&sh->sh_name),
-                           ".altinstructions"))
-                       alt_sec = sh;
-       }
-
-       if (!symtab_hdr)
-               fail("no symbol table\n");
-
-       strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
-               GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
-
-       /* Walk the symbol table */
-       for (i = 0;
-            i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
-            i++) {
-               int k;
-               ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
-                       GET_LE(&symtab_hdr->sh_entsize) * i;
-               const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
-                       GET_LE(&sym->st_name);
-
-               for (k = 0; k < NSYMS; k++) {
-                       if (!strcmp(name, required_syms[k].name)) {
-                               if (syms[k]) {
-                                       fail("duplicate symbol %s\n",
-                                            required_syms[k].name);
-                               }
-
-                               /*
-                                * Careful: we use negative addresses, but
-                                * st_value is unsigned, so we rely
-                                * on syms[k] being a signed type of the
-                                * correct width.
-                                */
-                               syms[k] = GET_LE(&sym->st_value);
-                       }
-               }
-       }
-
-       /* Validate mapping addresses. */
-       for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
-               INT_BITS symval = syms[special_pages[i]];
-
-               if (!symval)
-                       continue;  /* The mapping isn't used; ignore it. */
-
-               if (symval % 4096)
-                       fail("%s must be a multiple of 4096\n",
-                            required_syms[i].name);
-               if (symval + 4096 < syms[sym_vvar_start])
-                       fail("%s underruns vvar_start\n",
-                            required_syms[i].name);
-               if (symval + 4096 > 0)
-                       fail("%s is on the wrong side of the vdso text\n",
-                            required_syms[i].name);
-       }
-       if (syms[sym_vvar_start] % 4096)
-               fail("vvar_begin must be a multiple of 4096\n");
-
-       if (!name) {
-               fwrite(stripped_addr, stripped_len, 1, outfile);
-               return;
-       }
-
-       mapping_size = (stripped_len + 4095) / 4096 * 4096;
-
-       fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
-       fprintf(outfile, "#include <linux/linkage.h>\n");
-       fprintf(outfile, "#include <asm/page_types.h>\n");
-       fprintf(outfile, "#include <asm/vdso.h>\n");
-       fprintf(outfile, "\n");
-       fprintf(outfile,
-               "static unsigned char raw_data[%lu] __page_aligned_data = {",
-               mapping_size);
-       for (j = 0; j < stripped_len; j++) {
-               if (j % 10 == 0)
-                       fprintf(outfile, "\n\t");
-               fprintf(outfile, "0x%02X, ",
-                       (int)((unsigned char *)stripped_addr)[j]);
-       }
-       fprintf(outfile, "\n};\n\n");
-
-       fprintf(outfile, "static struct page *pages[%lu];\n\n",
-               mapping_size / 4096);
-
-       fprintf(outfile, "const struct vdso_image %s = {\n", name);
-       fprintf(outfile, "\t.data = raw_data,\n");
-       fprintf(outfile, "\t.size = %lu,\n", mapping_size);
-       fprintf(outfile, "\t.text_mapping = {\n");
-       fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
-       fprintf(outfile, "\t\t.pages = pages,\n");
-       fprintf(outfile, "\t},\n");
-       if (alt_sec) {
-               fprintf(outfile, "\t.alt = %lu,\n",
-                       (unsigned long)GET_LE(&alt_sec->sh_offset));
-               fprintf(outfile, "\t.alt_len = %lu,\n",
-                       (unsigned long)GET_LE(&alt_sec->sh_size));
-       }
-       for (i = 0; i < NSYMS; i++) {
-               if (required_syms[i].export && syms[i])
-                       fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
-                               required_syms[i].name, (int64_t)syms[i]);
-       }
-       fprintf(outfile, "};\n");
-}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
deleted file mode 100644 (file)
index e904c27..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * (C) Copyright 2002 Linus Torvalds
- * Portions based on the vdso-randomization code from exec-shield:
- * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
- *
- * This file contains the needed initializations to support sysenter.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/mm_types.h>
-
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/vdso.h>
-
-#ifdef CONFIG_COMPAT_VDSO
-#define VDSO_DEFAULT   0
-#else
-#define VDSO_DEFAULT   1
-#endif
-
-/*
- * Should the kernel map a VDSO page into processes and pass its
- * address down to glibc upon exec()?
- */
-unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT;
-
-static int __init vdso32_setup(char *s)
-{
-       vdso32_enabled = simple_strtoul(s, NULL, 0);
-
-       if (vdso32_enabled > 1)
-               pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
-
-       return 1;
-}
-
-/*
- * For consistency, the argument vdso32=[012] affects the 32-bit vDSO
- * behavior on both 64-bit and 32-bit kernels.
- * On 32-bit kernels, vdso=[012] means the same thing.
- */
-__setup("vdso32=", vdso32_setup);
-
-#ifdef CONFIG_X86_32
-__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
-#endif
-
-#ifdef CONFIG_X86_64
-
-#define        vdso32_sysenter()       (boot_cpu_has(X86_FEATURE_SYSENTER32))
-#define        vdso32_syscall()        (boot_cpu_has(X86_FEATURE_SYSCALL32))
-
-#else  /* CONFIG_X86_32 */
-
-#define vdso32_sysenter()      (boot_cpu_has(X86_FEATURE_SEP))
-#define vdso32_syscall()       (0)
-
-#endif /* CONFIG_X86_64 */
-
-#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
-const struct vdso_image *selected_vdso32;
-#endif
-
-int __init sysenter_setup(void)
-{
-#ifdef CONFIG_COMPAT
-       if (vdso32_syscall())
-               selected_vdso32 = &vdso_image_32_syscall;
-       else
-#endif
-       if (vdso32_sysenter())
-               selected_vdso32 = &vdso_image_32_sysenter;
-       else
-               selected_vdso32 = &vdso_image_32_int80;
-
-       init_vdso_image(selected_vdso32);
-
-       return 0;
-}
-
-#ifdef CONFIG_X86_64
-
-subsys_initcall(sysenter_setup);
-
-#ifdef CONFIG_SYSCTL
-/* Register vsyscall32 into the ABI table */
-#include <linux/sysctl.h>
-
-static struct ctl_table abi_table2[] = {
-       {
-               .procname       = "vsyscall32",
-               .data           = &vdso32_enabled,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
-       {}
-};
-
-static struct ctl_table abi_root_table2[] = {
-       {
-               .procname = "abi",
-               .mode = 0555,
-               .child = abi_table2
-       },
-       {}
-};
-
-static __init int ia32_binfmt_init(void)
-{
-       register_sysctl_table(abi_root_table2);
-       return 0;
-}
-__initcall(ia32_binfmt_init);
-#endif /* CONFIG_SYSCTL */
-
-#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/vdso/vdso32/.gitignore b/arch/x86/vdso/vdso32/.gitignore
deleted file mode 100644 (file)
index e45fba9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/x86/vdso/vdso32/int80.S b/arch/x86/vdso/vdso32/int80.S
deleted file mode 100644 (file)
index b15b7c0..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Code for the vDSO.  This version uses the old int $0x80 method.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
-       .text
-       .globl __kernel_vsyscall
-       .type __kernel_vsyscall,@function
-       ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
-       int $0x80
-       ret
-.LEND_vsyscall:
-       .size __kernel_vsyscall,.-.LSTART_vsyscall
-       .previous
-
-       .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
-       .long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
-       .long 0                 /* CIE ID */
-       .byte 1                 /* Version number */
-       .string "zR"            /* NUL-terminated augmentation string */
-       .uleb128 1              /* Code alignment factor */
-       .sleb128 -4             /* Data alignment factor */
-       .byte 8                 /* Return address register column */
-       .uleb128 1              /* Augmentation value length */
-       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-       .byte 0x0c              /* DW_CFA_def_cfa */
-       .uleb128 4
-       .uleb128 4
-       .byte 0x88              /* DW_CFA_offset, column 0x8 */
-       .uleb128 1
-       .align 4
-.LENDCIEDLSI:
-       .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
-       .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
-       .long .LSTART_vsyscall-.        /* PC-relative start address */
-       .long .LEND_vsyscall-.LSTART_vsyscall
-       .uleb128 0
-       .align 4
-.LENDFDEDLSI:
-       .previous
-
-       /*
-        * Pad out the segment to match the size of the sysenter.S version.
-        */
-VDSO32_vsyscall_eh_frame_size = 0x40
-       .section .data,"aw",@progbits
-       .space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
-       .previous
diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/vdso/vdso32/note.S
deleted file mode 100644 (file)
index c83f257..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-/* Ideally this would use UTS_NAME, but using a quoted string here
-   doesn't work. Remember to change this when changing the
-   kernel's name. */
-ELFNOTE_START(Linux, 0, "a")
-       .long LINUX_VERSION_CODE
-ELFNOTE_END
-
-#ifdef CONFIG_XEN
-/*
- * Add a special note telling glibc's dynamic linker a fake hardware
- * flavor that it will use to choose the search path for libraries in the
- * same way it uses real hardware capabilities like "mmx".
- * We supply "nosegneg" as the fake capability, to indicate that we
- * do not like negative offsets in instructions using segment overrides,
- * since we implement those inefficiently.  This makes it possible to
- * install libraries optimized to avoid those access patterns in someplace
- * like /lib/i686/tls/nosegneg.  Note that an /etc/ld.so.conf.d/file
- * corresponding to the bits here is needed to make ldconfig work right.
- * It should contain:
- *     hwcap 1 nosegneg
- * to match the mapping of bit to name that we give here.
- *
- * At runtime, the fake hardware feature will be considered to be present
- * if its bit is set in the mask word.  So, we start with the mask 0, and
- * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
- */
-
-#include "../../xen/vdso.h"    /* Defines VDSO_NOTE_NONEGSEG_BIT.  */
-
-ELFNOTE_START(GNU, 2, "a")
-       .long 1                 /* ncaps */
-VDSO32_NOTE_MASK:              /* Symbol used by arch/x86/xen/setup.c */
-       .long 0                 /* mask */
-       .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
-ELFNOTE_END
-#endif
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S
deleted file mode 100644 (file)
index d7ec4e2..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Common code for the sigreturn entry points in vDSO images.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by int80.S et al to define them first thing.
- * The kernel assumes that the addresses of these routines are constant
- * for all vDSO implementations.
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd_32.h>
-#include <asm/asm-offsets.h>
-
-#ifndef SYSCALL_ENTER_KERNEL
-#define        SYSCALL_ENTER_KERNEL    int $0x80
-#endif
-
-       .text
-       .globl __kernel_sigreturn
-       .type __kernel_sigreturn,@function
-       nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
-       ALIGN
-__kernel_sigreturn:
-.LSTART_sigreturn:
-       popl %eax               /* XXX does this mean it needs unwind info? */
-       movl $__NR_sigreturn, %eax
-       SYSCALL_ENTER_KERNEL
-.LEND_sigreturn:
-       nop
-       .size __kernel_sigreturn,.-.LSTART_sigreturn
-
-       .globl __kernel_rt_sigreturn
-       .type __kernel_rt_sigreturn,@function
-       ALIGN
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
-       movl $__NR_rt_sigreturn, %eax
-       SYSCALL_ENTER_KERNEL
-.LEND_rt_sigreturn:
-       nop
-       .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-       .previous
-
-       .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI1:
-       .long .LENDCIEDLSI1-.LSTARTCIEDLSI1
-.LSTARTCIEDLSI1:
-       .long 0                 /* CIE ID */
-       .byte 1                 /* Version number */
-       .string "zRS"           /* NUL-terminated augmentation string */
-       .uleb128 1              /* Code alignment factor */
-       .sleb128 -4             /* Data alignment factor */
-       .byte 8                 /* Return address register column */
-       .uleb128 1              /* Augmentation value length */
-       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-       .byte 0                 /* DW_CFA_nop */
-       .align 4
-.LENDCIEDLSI1:
-       .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
-.LSTARTFDEDLSI1:
-       .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
-       /* HACK: The dwarf2 unwind routines will subtract 1 from the
-          return address to get an address in the middle of the
-          presumed call instruction.  Since we didn't get here via
-          a call, we need to include the nop before the real start
-          to make up for it.  */
-       .long .LSTART_sigreturn-1-.     /* PC-relative start address */
-       .long .LEND_sigreturn-.LSTART_sigreturn+1
-       .uleb128 0                      /* Augmentation */
-       /* What follows are the instructions for the table generation.
-          We record the locations of each register saved.  This is
-          complicated by the fact that the "CFA" is always assumed to
-          be the value of the stack pointer in the caller.  This means
-          that we must define the CFA of this body of code to be the
-          saved value of the stack pointer in the sigcontext.  Which
-          also means that there is no fixed relation to the other
-          saved registers, which means that we must use DW_CFA_expression
-          to compute their addresses.  It also means that when we
-          adjust the stack with the popl, we have to do it all over again.  */
-
-#define do_cfa_expr(offset)                                            \
-       .byte 0x0f;                     /* DW_CFA_def_cfa_expression */ \
-       .uleb128 1f-0f;                 /*   length */                  \
-0:     .byte 0x74;                     /*     DW_OP_breg4 */           \
-       .sleb128 offset;                /*      offset */               \
-       .byte 0x06;                     /*     DW_OP_deref */           \
-1:
-
-#define do_expr(regno, offset)                                         \
-       .byte 0x10;                     /* DW_CFA_expression */         \
-       .uleb128 regno;                 /*   regno */                   \
-       .uleb128 1f-0f;                 /*   length */                  \
-0:     .byte 0x74;                     /*     DW_OP_breg4 */           \
-       .sleb128 offset;                /*       offset */              \
-1:
-
-       do_cfa_expr(IA32_SIGCONTEXT_sp+4)
-       do_expr(0, IA32_SIGCONTEXT_ax+4)
-       do_expr(1, IA32_SIGCONTEXT_cx+4)
-       do_expr(2, IA32_SIGCONTEXT_dx+4)
-       do_expr(3, IA32_SIGCONTEXT_bx+4)
-       do_expr(5, IA32_SIGCONTEXT_bp+4)
-       do_expr(6, IA32_SIGCONTEXT_si+4)
-       do_expr(7, IA32_SIGCONTEXT_di+4)
-       do_expr(8, IA32_SIGCONTEXT_ip+4)
-
-       .byte 0x42      /* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
-       do_cfa_expr(IA32_SIGCONTEXT_sp)
-       do_expr(0, IA32_SIGCONTEXT_ax)
-       do_expr(1, IA32_SIGCONTEXT_cx)
-       do_expr(2, IA32_SIGCONTEXT_dx)
-       do_expr(3, IA32_SIGCONTEXT_bx)
-       do_expr(5, IA32_SIGCONTEXT_bp)
-       do_expr(6, IA32_SIGCONTEXT_si)
-       do_expr(7, IA32_SIGCONTEXT_di)
-       do_expr(8, IA32_SIGCONTEXT_ip)
-
-       .align 4
-.LENDFDEDLSI1:
-
-       .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
-.LSTARTFDEDLSI2:
-       .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
-       /* HACK: See above wrt unwind library assumptions.  */
-       .long .LSTART_rt_sigreturn-1-.  /* PC-relative start address */
-       .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
-       .uleb128 0                      /* Augmentation */
-       /* What follows are the instructions for the table generation.
-          We record the locations of each register saved.  This is
-          slightly less complicated than the above, since we don't
-          modify the stack pointer in the process.  */
-
-       do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
-       do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
-       do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
-       do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
-       do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
-       do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
-       do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
-       do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
-       do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
-
-       .align 4
-.LENDFDEDLSI2:
-       .previous
diff --git a/arch/x86/vdso/vdso32/syscall.S b/arch/x86/vdso/vdso32/syscall.S
deleted file mode 100644 (file)
index 6b286bb..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Code for the vDSO.  This version uses the syscall instruction.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#define SYSCALL_ENTER_KERNEL   syscall
-#include "sigreturn.S"
-
-#include <asm/segment.h>
-
-       .text
-       .globl __kernel_vsyscall
-       .type __kernel_vsyscall,@function
-       ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
-       push    %ebp
-.Lpush_ebp:
-       movl    %ecx, %ebp
-       syscall
-       movl    %ebp, %ecx
-       popl    %ebp
-.Lpop_ebp:
-       ret
-.LEND_vsyscall:
-       .size __kernel_vsyscall,.-.LSTART_vsyscall
-
-       .section .eh_frame,"a",@progbits
-.LSTARTFRAME:
-       .long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
-       .long 0                 /* CIE ID */
-       .byte 1                 /* Version number */
-       .string "zR"            /* NUL-terminated augmentation string */
-       .uleb128 1              /* Code alignment factor */
-       .sleb128 -4             /* Data alignment factor */
-       .byte 8                 /* Return address register column */
-       .uleb128 1              /* Augmentation value length */
-       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-       .byte 0x0c              /* DW_CFA_def_cfa */
-       .uleb128 4
-       .uleb128 4
-       .byte 0x88              /* DW_CFA_offset, column 0x8 */
-       .uleb128 1
-       .align 4
-.LENDCIE:
-
-       .long .LENDFDE1-.LSTARTFDE1     /* Length FDE */
-.LSTARTFDE1:
-       .long .LSTARTFDE1-.LSTARTFRAME  /* CIE pointer */
-       .long .LSTART_vsyscall-.        /* PC-relative start address */
-       .long .LEND_vsyscall-.LSTART_vsyscall
-       .uleb128 0                      /* Augmentation length */
-       /* What follows are the instructions for the table generation.
-          We have to record all changes of the stack pointer.  */
-       .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .uleb128 8
-       .byte 0x85, 0x02        /* DW_CFA_offset %ebp -8 */
-       .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
-       .byte 0xc5              /* DW_CFA_restore %ebp */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .uleb128 4
-       .align 4
-.LENDFDE1:
-       .previous
-
-       /*
-        * Pad out the segment to match the size of the sysenter.S version.
-        */
-VDSO32_vsyscall_eh_frame_size = 0x40
-       .section .data,"aw",@progbits
-       .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
-       .previous
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
deleted file mode 100644 (file)
index e354bce..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Code for the vDSO.  This version uses the sysenter instruction.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
-/*
- * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
- * %ecx itself for arg2. The pushing is because the sysexit instruction
- * (found in entry.S) requires that we clobber %ecx with the desired %esp.
- * User code might expect that %ecx is unclobbered though, as it would be
- * for returning via the iret instruction, so we must push and pop.
- *
- * The caller puts arg3 in %edx, which the sysexit instruction requires
- * for %eip. Thus, exactly as for arg2, we must push and pop.
- *
- * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
- * instruction clobbers %esp, the user's %esp won't even survive entry
- * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
- * arg6 from the stack.
- *
- * You can not use this vsyscall for the clone() syscall because the
- * three words on the parent stack do not get copied to the child.
- */
-       .text
-       .globl __kernel_vsyscall
-       .type __kernel_vsyscall,@function
-       ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
-       push %ecx
-.Lpush_ecx:
-       push %edx
-.Lpush_edx:
-       push %ebp
-.Lenter_kernel:
-       movl %esp,%ebp
-       sysenter
-
-       /* 7: align return point with nop's to make disassembly easier */
-       .space 7,0x90
-
-       /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-       int $0x80
-       /* 16: System call normal return point is here! */
-VDSO32_SYSENTER_RETURN:        /* Symbol used by sysenter.c via vdso32-syms.h */
-       pop %ebp
-.Lpop_ebp:
-       pop %edx
-.Lpop_edx:
-       pop %ecx
-.Lpop_ecx:
-       ret
-.LEND_vsyscall:
-       .size __kernel_vsyscall,.-.LSTART_vsyscall
-       .previous
-
-       .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
-       .long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
-       .long 0                 /* CIE ID */
-       .byte 1                 /* Version number */
-       .string "zR"            /* NUL-terminated augmentation string */
-       .uleb128 1              /* Code alignment factor */
-       .sleb128 -4             /* Data alignment factor */
-       .byte 8                 /* Return address register column */
-       .uleb128 1              /* Augmentation value length */
-       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-       .byte 0x0c              /* DW_CFA_def_cfa */
-       .uleb128 4
-       .uleb128 4
-       .byte 0x88              /* DW_CFA_offset, column 0x8 */
-       .uleb128 1
-       .align 4
-.LENDCIEDLSI:
-       .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
-       .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
-       .long .LSTART_vsyscall-.        /* PC-relative start address */
-       .long .LEND_vsyscall-.LSTART_vsyscall
-       .uleb128 0
-       /* What follows are the instructions for the table generation.
-          We have to record all changes of the stack pointer.  */
-       .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .byte 0x08              /* RA at offset 8 now */
-       .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .byte 0x0c              /* RA at offset 12 now */
-       .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .byte 0x10              /* RA at offset 16 now */
-       .byte 0x85, 0x04        /* DW_CFA_offset %ebp -16 */
-       /* Finally the epilogue.  */
-       .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .byte 0x0c              /* RA at offset 12 now */
-       .byte 0xc5              /* DW_CFA_restore %ebp */
-       .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .byte 0x08              /* RA at offset 8 now */
-       .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
-       .byte 0x0e              /* DW_CFA_def_cfa_offset */
-       .byte 0x04              /* RA at offset 4 now */
-       .align 4
-.LENDFDEDLSI:
-       .previous
-
-       /*
-        * Emit a symbol with the size of this .eh_frame data,
-        * to verify it matches the other versions.
-        */
-VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c
deleted file mode 100644 (file)
index 175cc72..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#define BUILD_VDSO32
-
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-#undef CONFIG_OPTIMIZE_INLINING
-#endif
-
-#undef CONFIG_X86_PPRO_FENCE
-
-#ifdef CONFIG_X86_64
-
-/*
- * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
- * configuration
- */
-#undef CONFIG_64BIT
-#undef CONFIG_X86_64
-#undef CONFIG_ILLEGAL_POINTER_VALUE
-#undef CONFIG_SPARSEMEM_VMEMMAP
-#undef CONFIG_NR_CPUS
-
-#define CONFIG_X86_32 1
-#define CONFIG_PAGE_OFFSET 0
-#define CONFIG_ILLEGAL_POINTER_VALUE 0
-#define CONFIG_NR_CPUS 1
-
-#define BUILD_VDSO32_64
-
-#endif
-
-#include "../vclock_gettime.c"
diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/vdso/vdso32/vdso-fakesections.c
deleted file mode 100644 (file)
index 541468e..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso-fakesections.c"
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
deleted file mode 100644 (file)
index 31056cf..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Linker script for 32-bit vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#include <asm/page.h>
-
-#define BUILD_VDSO32
-
-#include "../vdso-layout.lds.S"
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION
-{
-       LINUX_2.6 {
-       global:
-               __vdso_clock_gettime;
-               __vdso_gettimeofday;
-               __vdso_time;
-       };
-
-       LINUX_2.5 {
-       global:
-               __kernel_vsyscall;
-               __kernel_sigreturn;
-               __kernel_rt_sigreturn;
-       local: *;
-       };
-}
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S
deleted file mode 100644 (file)
index 697c11e..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Linker script for x32 vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#define BUILD_VDSOX32
-
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-       LINUX_2.6 {
-       global:
-               __vdso_clock_gettime;
-               __vdso_gettimeofday;
-               __vdso_getcpu;
-               __vdso_time;
-       local: *;
-       };
-}
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
deleted file mode 100644 (file)
index 8ec3d1f..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- * Subject to the GNU Public License, v.2
- *
- * Fast user context implementation of getcpu()
- */
-
-#include <linux/kernel.h>
-#include <linux/getcpu.h>
-#include <linux/time.h>
-#include <asm/vgtod.h>
-
-notrace long
-__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
-{
-       unsigned int p;
-
-       p = __getcpu();
-
-       if (cpu)
-               *cpu = p & VGETCPU_CPU_MASK;
-       if (node)
-               *node = p >> 12;
-       return 0;
-}
-
-long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
-       __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
deleted file mode 100644 (file)
index 1c9f750..0000000
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright 2007 Andi Kleen, SUSE Labs.
- * Subject to the GPL, v.2
- *
- * This contains most of the x86 vDSO kernel-side code.
- */
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/elf.h>
-#include <linux/cpu.h>
-#include <asm/vgtod.h>
-#include <asm/proto.h>
-#include <asm/vdso.h>
-#include <asm/vvar.h>
-#include <asm/page.h>
-#include <asm/hpet.h>
-#include <asm/desc.h>
-
-#if defined(CONFIG_X86_64)
-unsigned int __read_mostly vdso64_enabled = 1;
-#endif
-
-void __init init_vdso_image(const struct vdso_image *image)
-{
-       int i;
-       int npages = (image->size) / PAGE_SIZE;
-
-       BUG_ON(image->size % PAGE_SIZE != 0);
-       for (i = 0; i < npages; i++)
-               image->text_mapping.pages[i] =
-                       virt_to_page(image->data + i*PAGE_SIZE);
-
-       apply_alternatives((struct alt_instr *)(image->data + image->alt),
-                          (struct alt_instr *)(image->data + image->alt +
-                                               image->alt_len));
-}
-
-struct linux_binprm;
-
-/*
- * Put the vdso above the (randomized) stack with another randomized
- * offset.  This way there is no hole in the middle of address space.
- * To save memory make sure it is still in the same PTE as the stack
- * top.  This doesn't give that many random bits.
- *
- * Note that this algorithm is imperfect: the distribution of the vdso
- * start address within a PMD is biased toward the end.
- *
- * Only used for the 64-bit and x32 vdsos.
- */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
-#ifdef CONFIG_X86_32
-       return 0;
-#else
-       unsigned long addr, end;
-       unsigned offset;
-
-       /*
-        * Round up the start address.  It can start out unaligned as a result
-        * of stack start randomization.
-        */
-       start = PAGE_ALIGN(start);
-
-       /* Round the lowest possible end address up to a PMD boundary. */
-       end = (start + len + PMD_SIZE - 1) & PMD_MASK;
-       if (end >= TASK_SIZE_MAX)
-               end = TASK_SIZE_MAX;
-       end -= len;
-
-       if (end > start) {
-               offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
-               addr = start + (offset << PAGE_SHIFT);
-       } else {
-               addr = start;
-       }
-
-       /*
-        * Forcibly align the final address in case we have a hardware
-        * issue that requires alignment for performance reasons.
-        */
-       addr = align_vdso_addr(addr);
-
-       return addr;
-#endif
-}
-
-static int map_vdso(const struct vdso_image *image, bool calculate_addr)
-{
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
-       unsigned long addr, text_start;
-       int ret = 0;
-       static struct page *no_pages[] = {NULL};
-       static struct vm_special_mapping vvar_mapping = {
-               .name = "[vvar]",
-               .pages = no_pages,
-       };
-
-       if (calculate_addr) {
-               addr = vdso_addr(current->mm->start_stack,
-                                image->size - image->sym_vvar_start);
-       } else {
-               addr = 0;
-       }
-
-       down_write(&mm->mmap_sem);
-
-       addr = get_unmapped_area(NULL, addr,
-                                image->size - image->sym_vvar_start, 0, 0);
-       if (IS_ERR_VALUE(addr)) {
-               ret = addr;
-               goto up_fail;
-       }
-
-       text_start = addr - image->sym_vvar_start;
-       current->mm->context.vdso = (void __user *)text_start;
-
-       /*
-        * MAYWRITE to allow gdb to COW and set breakpoints
-        */
-       vma = _install_special_mapping(mm,
-                                      text_start,
-                                      image->size,
-                                      VM_READ|VM_EXEC|
-                                      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-                                      &image->text_mapping);
-
-       if (IS_ERR(vma)) {
-               ret = PTR_ERR(vma);
-               goto up_fail;
-       }
-
-       vma = _install_special_mapping(mm,
-                                      addr,
-                                      -image->sym_vvar_start,
-                                      VM_READ|VM_MAYREAD,
-                                      &vvar_mapping);
-
-       if (IS_ERR(vma)) {
-               ret = PTR_ERR(vma);
-               goto up_fail;
-       }
-
-       if (image->sym_vvar_page)
-               ret = remap_pfn_range(vma,
-                                     text_start + image->sym_vvar_page,
-                                     __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
-                                     PAGE_SIZE,
-                                     PAGE_READONLY);
-
-       if (ret)
-               goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
-       if (hpet_address && image->sym_hpet_page) {
-               ret = io_remap_pfn_range(vma,
-                       text_start + image->sym_hpet_page,
-                       hpet_address >> PAGE_SHIFT,
-                       PAGE_SIZE,
-                       pgprot_noncached(PAGE_READONLY));
-
-               if (ret)
-                       goto up_fail;
-       }
-#endif
-
-up_fail:
-       if (ret)
-               current->mm->context.vdso = NULL;
-
-       up_write(&mm->mmap_sem);
-       return ret;
-}
-
-#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
-static int load_vdso32(void)
-{
-       int ret;
-
-       if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
-               return 0;
-
-       ret = map_vdso(selected_vdso32, false);
-       if (ret)
-               return ret;
-
-       if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
-               current_thread_info()->sysenter_return =
-                       current->mm->context.vdso +
-                       selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
-
-       return 0;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
-       if (!vdso64_enabled)
-               return 0;
-
-       return map_vdso(&vdso_image_64, true);
-}
-
-#ifdef CONFIG_COMPAT
-int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
-                                      int uses_interp)
-{
-#ifdef CONFIG_X86_X32_ABI
-       if (test_thread_flag(TIF_X32)) {
-               if (!vdso64_enabled)
-                       return 0;
-
-               return map_vdso(&vdso_image_x32, true);
-       }
-#endif
-
-       return load_vdso32();
-}
-#endif
-#else
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
-       return load_vdso32();
-}
-#endif
-
-#ifdef CONFIG_X86_64
-static __init int vdso_setup(char *s)
-{
-       vdso64_enabled = simple_strtoul(s, NULL, 0);
-       return 0;
-}
-__setup("vdso=", vdso_setup);
-#endif
-
-#ifdef CONFIG_X86_64
-static void vgetcpu_cpu_init(void *arg)
-{
-       int cpu = smp_processor_id();
-       struct desc_struct d = { };
-       unsigned long node = 0;
-#ifdef CONFIG_NUMA
-       node = cpu_to_node(cpu);
-#endif
-       if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
-               write_rdtscp_aux((node << 12) | cpu);
-
-       /*
-        * Store cpu number in limit so that it can be loaded
-        * quickly in user space in vgetcpu. (12 bits for the CPU
-        * and 8 bits for the node)
-        */
-       d.limit0 = cpu | ((node & 0xf) << 12);
-       d.limit = node >> 4;
-       d.type = 5;             /* RO data, expand down, accessed */
-       d.dpl = 3;              /* Visible to user code */
-       d.s = 1;                /* Not a system segment */
-       d.p = 1;                /* Present */
-       d.d = 1;                /* 32-bit */
-
-       write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
-}
-
-static int
-vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
-{
-       long cpu = (long)arg;
-
-       if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-               smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
-
-       return NOTIFY_DONE;
-}
-
-static int __init init_vdso(void)
-{
-       init_vdso_image(&vdso_image_64);
-
-#ifdef CONFIG_X86_X32_ABI
-       init_vdso_image(&vdso_image_x32);
-#endif
-
-       cpu_notifier_register_begin();
-
-       on_each_cpu(vgetcpu_cpu_init, NULL, 1);
-       /* notifier priority > KVM */
-       __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
-
-       cpu_notifier_register_done();
-
-       return 0;
-}
-subsys_initcall(init_vdso);
-#endif /* CONFIG_X86_64 */