From e0364b64d12330f6f8c47ef98fc658468e2b72e4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 17 Jul 2013 05:57:45 +0000 Subject: [PATCH 1/1] Make x86 fast-isel correctly choose between aligned and unaligned operations for vector stores. Fixes PR16640. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186491 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 40 +++++++++++----- .../X86/2011-10-18-FastISel-VectorParams.ll | 2 +- test/CodeGen/X86/fast-isel-store.ll | 48 +++++++++++++++++++ test/CodeGen/X86/fast-isel-unaligned-store.ll | 18 ------- 4 files changed, 77 insertions(+), 31 deletions(-) create mode 100644 test/CodeGen/X86/fast-isel-store.ll delete mode 100644 test/CodeGen/X86/fast-isel-unaligned-store.ll diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9c91e935f0b..7419822b673 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -79,8 +79,10 @@ private: bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); - bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, + bool Aligned = false); + bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM, + bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { +X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, + const X86AddressMode &AM, bool Aligned) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); - Val = AndResult; + TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1); + ValReg = AndResult; } // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; @@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; break; case MVT::v4f32: - Opc = X86::MOVAPSmr; + if (Aligned) + Opc = X86::MOVAPSmr; + else + Opc = X86::MOVUPSmr; break; case MVT::v2f64: - Opc = X86::MOVAPDmr; + if (Aligned) + Opc = X86::MOVAPSmr; + else + Opc = X86::MOVUPSmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - Opc = X86::MOVDQAmr; + if (Aligned) + Opc = X86::MOVDQAmr; + else + Opc = X86::MOVDQUmr; break; } addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(Opc)), AM).addReg(Val); + DL, TII.get(Opc)), AM).addReg(ValReg); return true; } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM) { + const X86AddressMode &AM, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); @@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, if (ValReg == 0) return false; - return X86FastEmitStore(VT, ValReg, AM); + return X86FastEmitStore(VT, ValReg, AM, Aligned); } /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of @@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; + unsigned SABIAlignment = + TD.getABITypeAlignment(S->getValueOperand()->getType()); + bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (!X86SelectAddress(I->getOperand(1), AM)) return false; - return X86FastEmitStore(VT, I->getOperand(0), AM); + return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned); } /// X86SelectRet - Select and emit code to implement ret instructions. diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll index 8c09d97f08d..e7d1e194d9c 100644 --- a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll +++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll @@ -20,7 +20,7 @@ entry: %2 = load <4 x float>* %p3, align 16 %3 = load <4 x float>* %p4, align 16 %4 = load <4 x float>* %p5, align 16 -; CHECK: movaps {{%xmm[0-7]}}, (%esp) +; CHECK: movups {{%xmm[0-7]}}, (%esp) ; CHECK-NEXT: calll _dovectortest call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4) ret void diff --git a/test/CodeGen/X86/fast-isel-store.ll b/test/CodeGen/X86/fast-isel-store.ll new file mode 100644 index 00000000000..06f5b6621a1 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-store.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s +; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s + +define i32 @test_store_32(i32* nocapture %addr, i32 %value) { +entry: + store i32 %value, i32* %addr, align 1 + ret i32 %value +} + +; CHECK: ret + +define i16 @test_store_16(i16* nocapture %addr, i16 %value) { +entry: + store i16 %value, i16* %addr, align 1 + ret i16 %value +} + +; CHECK: ret + +define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) { +; CHECK: movdqu +; CHECK: ret + %foo = add <4 x i32> %value, %value2 ; to force integer type on store + store <4 x i32> %foo, <4 x i32>* %addr, align 1 + ret <4 x i32> %foo +} + +define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) { +; CHECK: movdqa +; CHECK: ret + %foo = add <4 x i32> %value, %value2 ; to force integer type on store + store <4 x i32> %foo, <4 x i32>* %addr, align 16 + ret <4 x i32> %foo +} + +define <4 x float> @test_store_4xf32(<4 x float>* nocapture %addr, <4 x float> %value) { +; CHECK: movups +; CHECK: ret + store <4 x float> %value, <4 x float>* %addr, align 1 + ret <4 x float> %value +} + +define <4 x float> @test_store_4xf32_aligned(<4 x float>* nocapture %addr, <4 x float> %value) { +; CHECK: movaps +; CHECK: ret + store <4 x float> %value, <4 x float>* %addr, align 16 + ret <4 x float> %value +} diff --git a/test/CodeGen/X86/fast-isel-unaligned-store.ll b/test/CodeGen/X86/fast-isel-unaligned-store.ll deleted file mode 100644 index 7ce7f676add..00000000000 --- a/test/CodeGen/X86/fast-isel-unaligned-store.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s -; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s - -define i32 @test_store_32(i32* nocapture %addr, i32 %value) { -entry: - store i32 %value, i32* %addr, align 1 - ret i32 %value -} - -; CHECK: ret - -define i16 @test_store_16(i16* nocapture %addr, i16 %value) { -entry: - store i16 %value, i16* %addr, align 1 - ret i16 %value -} - -; CHECK: ret -- 2.34.1