Make x86 fast-isel correctly choose between aligned and unaligned operations for...

author Craig Topper <craig.topper@gmail.com>

Wed, 17 Jul 2013 05:57:45 +0000 (05:57 +0000)

committer Craig Topper <craig.topper@gmail.com>

Wed, 17 Jul 2013 05:57:45 +0000 (05:57 +0000)
author Craig Topper <craig.topper@gmail.com>
Wed, 17 Jul 2013 05:57:45 +0000 (05:57 +0000)
committer Craig Topper <craig.topper@gmail.com>
Wed, 17 Jul 2013 05:57:45 +0000 (05:57 +0000)
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp

index 9c91e935f0b644f5cf00012def71af75d9405f8a..7419822b673a875193089dc6db764d4545451d57 100644 (file)
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -79,8 +79,10 @@ private:
  
    bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
  
-  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
-  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+                        bool Aligned = false);
+  bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
+                        bool Aligned = false);
  
    bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                           unsigned &ResultReg);
@@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
  /// and a displacement offset, or a GlobalAddress,
  /// i.e. V. Return true if it is possible.
  bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
+                              const X86AddressMode &AM, bool Aligned) {
    // Get opcode and regclass of the output for the given store instruction.
    unsigned Opc = 0;
    switch (VT.getSimpleVT().SimpleTy) {
@@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
      // Mask out all but lowest bit.
      unsigned AndResult = createResultReg(&X86::GR8RegClass);
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
-    Val = AndResult;
+            TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+    ValReg = AndResult;
    }
    // FALLTHROUGH, handling i1 as i8.
    case MVT::i8:  Opc = X86::MOV8mr;  break;
@@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
            (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
      break;
    case MVT::v4f32:
-    Opc = X86::MOVAPSmr;
+    if (Aligned)
+      Opc = X86::MOVAPSmr;
+    else
+      Opc = X86::MOVUPSmr;
      break;
    case MVT::v2f64:
-    Opc = X86::MOVAPDmr;
+    if (Aligned)
+      Opc = X86::MOVAPSmr;
+    else
+      Opc = X86::MOVUPSmr;
      break;
    case MVT::v4i32:
    case MVT::v2i64:
    case MVT::v8i16:
    case MVT::v16i8:
-    Opc = X86::MOVDQAmr;
+    if (Aligned)
+      Opc = X86::MOVDQAmr;
+    else
+      Opc = X86::MOVDQUmr;
      break;
    }
  
    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
-                         DL, TII.get(Opc)), AM).addReg(Val);
+                         DL, TII.get(Opc)), AM).addReg(ValReg);
    return true;
  }
  
  bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
-                                   const X86AddressMode &AM) {
+                                   const X86AddressMode &AM, bool Aligned) {
    // Handle 'null' like i32/i64 0.
    if (isa<ConstantPointerNull>(Val))
      Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
@@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
    if (ValReg == 0)
      return false;
  
-  return X86FastEmitStore(VT, ValReg, AM);
+  return X86FastEmitStore(VT, ValReg, AM, Aligned);
  }
  
  /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
@@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
    if (S->isAtomic())
      return false;
  
+  unsigned SABIAlignment =
+    TD.getABITypeAlignment(S->getValueOperand()->getType());
+  bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+
    MVT VT;
    if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
      return false;
@@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
    if (!X86SelectAddress(I->getOperand(1), AM))
      return false;
  
-  return X86FastEmitStore(VT, I->getOperand(0), AM);
+  return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
  }
  
  /// X86SelectRet - Select and emit code to implement ret instructions.
diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll

index 8c09d97f08d2fa9382ca3e0fda8f2b41a92b26db..e7d1e194d9cdf6964953189e04dafa09f19005de 100644 (file)
--- a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
+++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -20,7 +20,7 @@ entry:
    %2 = load <4 x float>* %p3, align 16
    %3 = load <4 x float>* %p4, align 16
    %4 = load <4 x float>* %p5, align 16
-; CHECK:      movaps {{%xmm[0-7]}}, (%esp)
+; CHECK:      movups {{%xmm[0-7]}}, (%esp)
  ; CHECK-NEXT: calll _dovectortest 
    call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
    ret void
diff --git a/test/CodeGen/X86/fast-isel-store.ll b/test/CodeGen/X86/fast-isel-store.ll

new file mode 100644 (file)

index 0000000..06f5b66
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-store.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
+
+define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
+entry:
+  store i32 %value, i32* %addr, align 1
+  ret i32 %value
+}
+
+; CHECK: ret
+
+define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
+entry:
+  store i16 %value, i16* %addr, align 1
+  ret i16 %value
+}
+
+; CHECK: ret
+
+define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
+; CHECK: movdqu
+; CHECK: ret
+  %foo = add <4 x i32> %value, %value2 ; to force integer type on store
+  store <4 x i32> %foo, <4 x i32>* %addr, align 1
+  ret <4 x i32> %foo
+}
+
+define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
+; CHECK: movdqa
+; CHECK: ret
+  %foo = add <4 x i32> %value, %value2 ; to force integer type on store
+  store <4 x i32> %foo, <4 x i32>* %addr, align 16
+  ret <4 x i32> %foo
+}
+
+define <4 x float> @test_store_4xf32(<4 x float>* nocapture %addr, <4 x float> %value) {
+; CHECK: movups
+; CHECK: ret
+  store <4 x float> %value, <4 x float>* %addr, align 1
+  ret <4 x float> %value
+}
+
+define <4 x float> @test_store_4xf32_aligned(<4 x float>* nocapture %addr, <4 x float> %value) {
+; CHECK: movaps
+; CHECK: ret
+  store <4 x float> %value, <4 x float>* %addr, align 16
+  ret <4 x float> %value
+}
diff --git a/test/CodeGen/X86/fast-isel-unaligned-store.ll b/test/CodeGen/X86/fast-isel-unaligned-store.ll

deleted file mode 100644 (file)

index 7ce7f67..0000000
--- a/test/CodeGen/X86/fast-isel-unaligned-store.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
-; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
-
-define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
-entry:
-  store i32 %value, i32* %addr, align 1
-  ret i32 %value
-}
-
-; CHECK: ret
-
-define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
-entry:
-  store i16 %value, i16* %addr, align 1
-  ret i16 %value
-}
-
-; CHECK: ret
author	Craig Topper <craig.topper@gmail.com>
	Wed, 17 Jul 2013 05:57:45 +0000 (05:57 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Wed, 17 Jul 2013 05:57:45 +0000 (05:57 +0000)
lib/Target/X86/X86FastISel.cpp		patch \| blob \| history
test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll		patch \| blob \| history
test/CodeGen/X86/fast-isel-store.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/fast-isel-unaligned-store.ll	[deleted file]	patch \| blob \| history