Use MOVSSmr instead of EXTRACTPSmr in the case of extracting

author Dan Gohman <gohman@apple.com>

Fri, 31 Oct 2008 00:57:24 +0000 (00:57 +0000)

committer Dan Gohman <gohman@apple.com>

Fri, 31 Oct 2008 00:57:24 +0000 (00:57 +0000)
author Dan Gohman <gohman@apple.com>
Fri, 31 Oct 2008 00:57:24 +0000 (00:57 +0000)
committer Dan Gohman <gohman@apple.com>
Fri, 31 Oct 2008 00:57:24 +0000 (00:57 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 7e1b7a0c76d699deffbab1c9c272b87ee4c9def9..cdf167078137ebb318de46ae1f9825433982bf8c 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4194,11 +4194,15 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
    } else if (VT == MVT::f32) {
      // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
      // the result back to FR32 register. It's only worth matching if the
-    // result has a single use which is a store or a bitcast to i32.
+    // result has a single use which is a store or a bitcast to i32.  And in
+    // the case of a store, it's not worth it if the index is a constant 0,
+    // because a MOVSSmr can be used instead, which is smaller and faster.
      if (!Op.hasOneUse())
        return SDValue();
      SDNode *User = *Op.getNode()->use_begin();
-    if (User->getOpcode() != ISD::STORE &&
+    if ((User->getOpcode() != ISD::STORE ||
+         (isa<ConstantSDNode>(Op.getOperand(1)) &&
+          cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
          (User->getOpcode() != ISD::BIT_CONVERT ||
           User->getValueType(0) != MVT::i32))
        return SDValue();
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll

index cc6d92c8d3967aed8673337b2fc26b3406beb436..484d2c4e5e100d0b5e80c88a0487647434bb742e 100644 (file)
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -1,7 +1,7 @@
  ; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn > %t
  ; RUN: not grep movd %t
-; RUN: not grep movss %t
-; RUN: grep {extractps \\\$0, %xmm0, } %t
+; RUN: grep {movss     %xmm} %t | count 1
+; RUN: grep {extractps \\\$1, %xmm0, } %t | count 1
  ; PR2647
  
  external global float, align 16         ; <float*>:0 [#uses=2]
@@ -14,6 +14,14 @@ define internal void @""() nounwind {
          store float %4, float* @0, align 16
          ret void
  }
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 1                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 1            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
  
  declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
author	Dan Gohman <gohman@apple.com>
	Fri, 31 Oct 2008 00:57:24 +0000 (00:57 +0000)
committer	Dan Gohman <gohman@apple.com>
	Fri, 31 Oct 2008 00:57:24 +0000 (00:57 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/extractps.ll		patch \| blob \| history