The type-legalizer often scalarizes code. One of the common patterns is extract-and...

author Nadav Rotem <nadav.rotem@intel.com>

Fri, 3 Feb 2012 13:18:25 +0000 (13:18 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Fri, 3 Feb 2012 13:18:25 +0000 (13:18 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Fri, 3 Feb 2012 13:18:25 +0000 (13:18 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Fri, 3 Feb 2012 13:18:25 +0000 (13:18 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 979f79478b052a683ecdb9bf4faff76c09f59a1c..48b9a3939e7a99d01c77fd2da12a20495c3affc5 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4957,6 +4957,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
+  bool isLE = TLI.isLittleEndian();
  
    // noop truncate
    if (N0.getValueType() == N->getValueType(0))
@@ -4984,6 +4985,39 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
        return N0.getOperand(0);
    }
  
+  // Fold Extract-and-trunc into a narrow extract:
+  //    trunc(extract(x)) -> extract(bitcast(x))
+  // We only run this optimization after type legalization (which often
+  // creates this pattern) and before operation legalization after which
+  // we need to be more careful about the vector instructions that we generate.
+  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+    EVT VecTy = N0.getOperand(0).getValueType();
+    EVT ExTy = N0.getValueType();
+    EVT TrTy = N->getValueType(0);
+
+    unsigned NumElem = VecTy.getVectorNumElements();
+    unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+    assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+    SDValue EltNo = N0->getOperand(1);
+    if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+      int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+      SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                              NVT, N0.getOperand(0));
+
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                         N->getDebugLoc(), TrTy, V,
+                         DAG.getConstant(Index, MVT::i32));
+    }
+  }
+
    // See if we can simplify the input to this truncate through knowledge that
    // only the low bits are being used.
    // For example "trunc (or (shl x, 8), y)" // -> trunc y
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll

index ceee8e6041ebdd09dfd3ca734cc75a5a5cd5d244..e2b3ebcf76ebb231caddad2893eb60e5f3bb0d6d 100644 (file)
--- a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -2,8 +2,8 @@
  
  ; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
  ; CHECK: prom_bug
-; CHECK: movd
  ; CHECK: shufb
+; CHECK: movd
  ; CHECK: movw
  ; CHECK: ret
  define void @prom_bug(<4 x i8> %t, i16* %p) {
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll

index 08b668c983bc1bb9aeea811223c4a36e8516ad6b..5a5c35333f2c60c5422f53020ed76960f5c74b04 100644 (file)
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -109,3 +109,11 @@ define <4 x float> @test12(<4 x float>* %a) nounwind  {
    %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    ret <4 x float> %tmp1
  }
+
+;CHECK: test13
+;CHECK: shufd
+;CHECK: ret
+define <4 x i32> @test13(<2 x i32>%x) nounwind readnone {
+  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32>%x1
+}
author	Nadav Rotem <nadav.rotem@intel.com>
	Fri, 3 Feb 2012 13:18:25 +0000 (13:18 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Fri, 3 Feb 2012 13:18:25 +0000 (13:18 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/2011-12-8-bitcastintprom.ll		patch \| blob \| history
test/CodeGen/X86/avx-shuffle.ll		patch \| blob \| history