From bab06ba696694e7f62f964af7ee5290a13f78340 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Fri, 17 May 2013 23:49:01 +0000 Subject: [PATCH] Support unaligned load/store on more ARM targets This patch matches GCC behavior: the code used to only allow unaligned load/store on ARM for v6+ Darwin, it will now allow unaligned load/store for v6+ Darwin as well as for v7+ on Linux and NaCl. The distinction is made because v6 doesn't guarantee support (but LLVM assumes that Apple controls hardware+kernel and therefore have conformant v6 CPUs), whereas v7 does provide this guarantee (and Linux/NaCl behave sanely). The patch keeps the -arm-strict-align command line option, and adds -arm-no-strict-align. They behave similarly to GCC's -mstrict-align and -mnostrict-align. I originally encountered this discrepancy in FastIsel tests which expect unaligned load/store generation. Overall this should slightly improve performance in most cases because of reduced I$ pressure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 51 ++++++++-- lib/Target/ARM/ARMSubtarget.h | 5 +- test/CodeGen/ARM/fast-isel-align.ll | 144 ++++++++++++++++++++++++++++ test/CodeGen/ARM/fast-isel.ll | 133 +------------------------ 4 files changed, 193 insertions(+), 140 deletions(-) create mode 100644 test/CodeGen/ARM/fast-isel-align.ll diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 8653c462f06..c7d97437906 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -38,9 +38,24 @@ static cl::opt UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); -static cl::opt -StrictAlign("arm-strict-align", cl::Hidden, - cl::desc("Disallow all unaligned memory accesses")); +enum AlignMode { + DefaultAlign, + StrictAlign, + NoStrictAlign +}; + +static cl::opt +Align(cl::desc("Load/store alignment support"), + cl::Hidden, cl::init(DefaultAlign), + cl::values( + clEnumValN(DefaultAlign, "arm-default-align", + "Generate unaligned accesses only on hardware/OS " + "combinations that are known to support them"), + clEnumValN(StrictAlign, "arm-strict-align", + "Disallow all unaligned memory accesses"), + clEnumValN(NoStrictAlign, "arm-no-strict-align", + "Allow unaligned memory accesses"), + clEnumValEnd)); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, const TargetOptions &Options) @@ -162,10 +177,32 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (!isThumb() || hasThumb2()) PostRAScheduler = true; - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - if (!StrictAlign && hasV6Ops() && isTargetDarwin()) - AllowsUnalignedMem = true; + switch (Align) { + case DefaultAlign: + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // Linux targets support unaligned accesses. The same goes for NaCl. + // + // The above behavior is consistent with GCC. + AllowsUnalignedMem = ( + (hasV7Ops() && (isTargetLinux() || isTargetNaCl())) || + (hasV6Ops() && isTargetDarwin())); + break; + case StrictAlign: + AllowsUnalignedMem = false; + break; + case NoStrictAlign: + AllowsUnalignedMem = true; + break; + } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. uint64_t Bits = getFeatureBits(); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 038eb76ae1d..d01316511c4 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -270,9 +270,8 @@ public: bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - bool isTargetNaCl() const { - return TargetTriple.getOS() == Triple::NaCl; - } + bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll new file mode 100644 index 00000000000..4e28a10cd15 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-align.ll @@ -0,0 +1,144 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN +; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN + +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM-STRICT-ALIGN +; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN + +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM-STRICT-ALIGN + +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN +; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN +; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN + +; Check unaligned stores +%struct.anon = type <{ float }> + +@a = common global %struct.anon* null, align 4 + +define void @unaligned_store(float %x, float %y) nounwind { +entry: +; ARM: @unaligned_store +; ARM: vmov r1, s0 +; ARM: str r1, [r0] + +; THUMB: @unaligned_store +; THUMB: vmov r1, s0 +; THUMB: str r1, [r0] + + %add = fadd float %x, %y + %0 = load %struct.anon** @a, align 4 + %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0 + store float %add, float* %x1, align 1 + ret void +} + +; Doublewords require only word-alignment. +; rdar://10528060 +%struct.anon.0 = type { double } + +@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4 + +define void @word_aligned_f64_store(double %a, double %b) nounwind { +entry: +; ARM: @word_aligned_f64_store +; THUMB: @word_aligned_f64_store + %add = fadd double %a, %b + store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4 +; ARM: vstr d16, [r0] +; THUMB: vstr d16, [r0] + ret void +} + +; Check unaligned loads of floats +%class.TAlignTest = type <{ i16, float }> + +define zeroext i1 @unaligned_f32_load(%class.TAlignTest* %this) nounwind align 2 { +entry: +; ARM: @unaligned_f32_load +; THUMB: @unaligned_f32_load + %0 = alloca %class.TAlignTest*, align 4 + store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4 + %1 = load %class.TAlignTest** %0 + %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1 + %3 = load float* %2, align 1 + %4 = fcmp une float %3, 0.000000e+00 +; ARM: ldr r[[R:[0-9]+]], [r0, #2] +; ARM: vmov s0, r[[R]] +; ARM: vcmpe.f32 s0, #0 +; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2] +; THUMB: vmov s0, r[[R]] +; THUMB: vcmpe.f32 s0, #0 + ret i1 %4 +} + +define void @unaligned_i16_store(i16 %x, i16* %y) nounwind { +entry: +; ARM-STRICT-ALIGN: @unaligned_i16_store +; ARM-STRICT-ALIGN: strb +; ARM-STRICT-ALIGN: strb + +; THUMB-STRICT-ALIGN: @unaligned_i16_store +; THUMB-STRICT-ALIGN: strb +; THUMB-STRICT-ALIGN: strb + + store i16 %x, i16* %y, align 1 + ret void +} + +define i16 @unaligned_i16_load(i16* %x) nounwind { +entry: +; ARM-STRICT-ALIGN: @unaligned_i16_load +; ARM-STRICT-ALIGN: ldrb +; ARM-STRICT-ALIGN: ldrb + +; THUMB-STRICT-ALIGN: @unaligned_i16_load +; THUMB-STRICT-ALIGN: ldrb +; THUMB-STRICT-ALIGN: ldrb + + %0 = load i16* %x, align 1 + ret i16 %0 +} + +define void @unaligned_i32_store(i32 %x, i32* %y) nounwind { +entry: +; ARM-STRICT-ALIGN: @unaligned_i32_store +; ARM-STRICT-ALIGN: strb +; ARM-STRICT-ALIGN: strb +; ARM-STRICT-ALIGN: strb +; ARM-STRICT-ALIGN: strb + +; THUMB-STRICT-ALIGN: @unaligned_i32_store +; THUMB-STRICT-ALIGN: strb +; THUMB-STRICT-ALIGN: strb +; THUMB-STRICT-ALIGN: strb +; THUMB-STRICT-ALIGN: strb + + store i32 %x, i32* %y, align 1 + ret void +} + +define i32 @unaligned_i32_load(i32* %x) nounwind { +entry: +; ARM-STRICT-ALIGN: @unaligned_i32_load +; ARM-STRICT-ALIGN: ldrb +; ARM-STRICT-ALIGN: ldrb +; ARM-STRICT-ALIGN: ldrb +; ARM-STRICT-ALIGN: ldrb + +; THUMB-STRICT-ALIGN: @unaligned_i32_load +; THUMB-STRICT-ALIGN: ldrb +; THUMB-STRICT-ALIGN: ldrb +; THUMB-STRICT-ALIGN: ldrb +; THUMB-STRICT-ALIGN: ldrb + + %0 = load i32* %x, align 1 + ret i32 %0 +} diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index 39ffcac2922..c4274c5eb5e 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -1,7 +1,5 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB -; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN -; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN ; Very basic fast-isel functionality. define i32 @add(i32 %a, i32 %b) nounwind { @@ -163,67 +161,6 @@ define void @test4() { ; ARM: str r1, [r0] } -; Check unaligned stores -%struct.anon = type <{ float }> - -@a = common global %struct.anon* null, align 4 - -define void @unaligned_store(float %x, float %y) nounwind { -entry: -; ARM: @unaligned_store -; ARM: vmov r1, s0 -; ARM: str r1, [r0] - -; THUMB: @unaligned_store -; THUMB: vmov r1, s0 -; THUMB: str r1, [r0] - - %add = fadd float %x, %y - %0 = load %struct.anon** @a, align 4 - %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0 - store float %add, float* %x1, align 1 - ret void -} - -; Doublewords require only word-alignment. -; rdar://10528060 -%struct.anon.0 = type { double } - -@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4 - -define void @test5(double %a, double %b) nounwind { -entry: -; ARM: @test5 -; THUMB: @test5 - %add = fadd double %a, %b - store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4 -; ARM: vstr d16, [r0] -; THUMB: vstr d16, [r0] - ret void -} - -; Check unaligned loads of floats -%class.TAlignTest = type <{ i16, float }> - -define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 { -entry: -; ARM: @test6 -; THUMB: @test6 - %0 = alloca %class.TAlignTest*, align 4 - store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4 - %1 = load %class.TAlignTest** %0 - %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1 - %3 = load float* %2, align 1 - %4 = fcmp une float %3, 0.000000e+00 -; ARM: ldr r0, [r0, #2] -; ARM: vmov s0, r0 -; ARM: vcmpe.f32 s0, #0 -; THUMB: ldr.w r0, [r0, #2] -; THUMB: vmov s0, r0 -; THUMB: vcmpe.f32 s0, #0 - ret i1 %4 -} - ; ARM: @urem_fold ; THUMB: @urem_fold ; ARM: and r0, r0, #31 @@ -233,10 +170,10 @@ define i32 @urem_fold(i32 %a) nounwind { ret i32 %rem } -define i32 @test7() noreturn nounwind { +define i32 @trap_intrinsic() noreturn nounwind { entry: -; ARM: @test7 -; THUMB: @test7 +; ARM: @trap_intrinsic +; THUMB: @trap_intrinsic ; ARM: trap ; THUMB: trap tail call void @llvm.trap( ) @@ -244,67 +181,3 @@ entry: } declare void @llvm.trap() nounwind - -define void @unaligned_i16_store(i16 %x, i16* %y) nounwind { -entry: -; ARM-STRICT-ALIGN: @unaligned_i16_store -; ARM-STRICT-ALIGN: strb -; ARM-STRICT-ALIGN: strb - -; THUMB-STRICT-ALIGN: @unaligned_i16_store -; THUMB-STRICT-ALIGN: strb -; THUMB-STRICT-ALIGN: strb - - store i16 %x, i16* %y, align 1 - ret void -} - -define i16 @unaligned_i16_load(i16* %x) nounwind { -entry: -; ARM-STRICT-ALIGN: @unaligned_i16_load -; ARM-STRICT-ALIGN: ldrb -; ARM-STRICT-ALIGN: ldrb - -; THUMB-STRICT-ALIGN: @unaligned_i16_load -; THUMB-STRICT-ALIGN: ldrb -; THUMB-STRICT-ALIGN: ldrb - - %0 = load i16* %x, align 1 - ret i16 %0 -} - -define void @unaligned_i32_store(i32 %x, i32* %y) nounwind { -entry: -; ARM-STRICT-ALIGN: @unaligned_i32_store -; ARM-STRICT-ALIGN: strb -; ARM-STRICT-ALIGN: strb -; ARM-STRICT-ALIGN: strb -; ARM-STRICT-ALIGN: strb - -; THUMB-STRICT-ALIGN: @unaligned_i32_store -; THUMB-STRICT-ALIGN: strb -; THUMB-STRICT-ALIGN: strb -; THUMB-STRICT-ALIGN: strb -; THUMB-STRICT-ALIGN: strb - - store i32 %x, i32* %y, align 1 - ret void -} - -define i32 @unaligned_i32_load(i32* %x) nounwind { -entry: -; ARM-STRICT-ALIGN: @unaligned_i32_load -; ARM-STRICT-ALIGN: ldrb -; ARM-STRICT-ALIGN: ldrb -; ARM-STRICT-ALIGN: ldrb -; ARM-STRICT-ALIGN: ldrb - -; THUMB-STRICT-ALIGN: @unaligned_i32_load -; THUMB-STRICT-ALIGN: ldrb -; THUMB-STRICT-ALIGN: ldrb -; THUMB-STRICT-ALIGN: ldrb -; THUMB-STRICT-ALIGN: ldrb - - %0 = load i32* %x, align 1 - ret i32 %0 -} -- 2.34.1