Expand V_SET0 to xorps by default.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Mon, 7 Nov 2011 19:15:58 +0000 (19:15 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Mon, 7 Nov 2011 19:15:58 +0000 (19:15 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Mon, 7 Nov 2011 19:15:58 +0000 (19:15 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Mon, 7 Nov 2011 19:15:58 +0000 (19:15 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 3a02de0aa01b91f64774b0999eaf859135825066..8dcd637a966fcaa974b23e123b75e0d265a2d53b 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2420,7 +2420,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
    bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
    switch (MI->getOpcode()) {
    case X86::V_SET0:
-    return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+    return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
    case X86::TEST8ri_NOREX:
      MI->setDesc(get(X86::TEST8ri));
      return true;
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll

index 0a46b0828a8cbc6ecbaaa78bdf7e7e752abb5ecf..edbdc06a1c94be3d929c2d09fac2211fe1f19505 100644 (file)
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -6,7 +6,7 @@
  
  define void @zero128() nounwind ssp {
  entry:
-  ; CHECK: vpxor
+  ; CHECK: vxorps
    ; CHECK: vmovaps
    store <4 x float> zeroinitializer, <4 x float>* @z, align 16
    ret void
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll

index 0007cab0961f1adab2c59039e75dfc81156eca43..4ff1d035e4d6a349f7f2f94ae1841c54ce22725c 100644 (file)
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -26,8 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
    ret void
  }
  
+; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
+; mixed domains here.
  ; CHECK: vsel_i64
-; CHECK: pxor
+; CHECK: xorps
  ; CHECK: pand
  ; CHECK: andnps
  ; CHECK: orps
@@ -41,8 +43,10 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
    ret void
  }
  
+; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
+; mixed domains here.
  ; CHECK: vsel_double
-; CHECK: pxor
+; CHECK: xorps
  ; CHECK: pand
  ; CHECK: andnps
  ; CHECK: orps
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll

index 70e0a8a177e051bf0c8bf56afe5f9803ea52b727..d520d5c1e317a485f4dc69aba1c46b01ed548658 100644 (file)
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -98,7 +98,7 @@ define void @test7() nounwind {
          ret void
          
  ; CHECK: test7:
-; CHECK:       pxor    %xmm0, %xmm0
+; CHECK:       xorps   %xmm0, %xmm0
  ; CHECK:       movaps  %xmm0, 0
  }
  
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll

index 676be9b7179cfd9847dffb2fbeefe2c6fb5d3693..d5fc11ecd54c607d3f4b4aae2504372beab7e124 100644 (file)
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,12 +1,17 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pxor %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: not grep shuf %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
  
+; Without any typed operations, always use the smaller xorps.
+; CHECK: test
+; CHECK: xorps
  define <2 x double> @test() {
         ret <2 x double> zeroinitializer
  }
  
+; Prefer a constant pool load here.
+; CHECK: test2
+; CHECK-NOT: shuf
+; CHECK: movaps LCP
+; CHECK-NEXT: ret
  define <4 x i32> @test2() nounwind  {
         ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >
  }
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll

index 4d1f05629b414d233ffded4678903179d4c74c64..682a0dfca806f0726ff441ffefe1f59fe08f43fa 100644 (file)
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,5 +1,6 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
  
+; CHECK: foo
  ; CHECK: xorps
  define void @foo(<4 x float>* %P) {
          %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
@@ -8,6 +9,7 @@ define void @foo(<4 x float>* %P) {
          ret void
  }
  
+; CHECK: bar
  ; CHECK: pxor
  define void @bar(<4 x i32>* %P) {
          %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
@@ -16,3 +18,13 @@ define void @bar(<4 x i32>* %P) {
          ret void
  }
  
+; Without any type hints from operations, we fall back to the smaller xorps.
+; The IR type <4 x i32> is ignored.
+; CHECK: untyped_zero
+; CHECK: xorps
+; CHECK: movaps
+define void @untyped_zero(<4 x i32>* %p) {
+entry:
+  store <4 x i32> zeroinitializer, <4 x i32>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll

index 8aa50945e635ab23c94dab3f99c3cc476363f14b..41ea0245ed86426ed226a481bf1dcb7716fa08b1 100644 (file)
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
  ; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
  ; 64-bit stores here do not use MMX.
  
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll

index 178c59dbaa97bd949101c814690452471cc238ba..ddc4cab14a4c55d52c221e14d77fc58ac8815f49 100644 (file)
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -8,7 +8,7 @@ define <4 x i32> @test1() nounwind {
         ret <4 x i32> %tmp
          
  ; X32: test1:
-; X32: pxor    %xmm0, %xmm0
+; X32: xorps   %xmm0, %xmm0
  ; X32: ret
  }
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Mon, 7 Nov 2011 19:15:58 +0000 (19:15 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Mon, 7 Nov 2011 19:15:58 +0000 (19:15 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/avx-basic.ll		patch \| blob \| history
test/CodeGen/X86/sse2-blend.ll		patch \| blob \| history
test/CodeGen/X86/sse2.ll		patch \| blob \| history
test/CodeGen/X86/vec_return.ll		patch \| blob \| history
test/CodeGen/X86/vec_zero.ll		patch \| blob \| history
test/CodeGen/X86/vec_zero_cse.ll		patch \| blob \| history
test/CodeGen/X86/xor.ll		patch \| blob \| history