Adapt the x86 build_vector dagcombine to the current state of the legalizer.

author Nate Begeman <natebegeman@mac.com>

Fri, 5 Jun 2009 21:37:30 +0000 (21:37 +0000)

committer Nate Begeman <natebegeman@mac.com>

Fri, 5 Jun 2009 21:37:30 +0000 (21:37 +0000)
author Nate Begeman <natebegeman@mac.com>
Fri, 5 Jun 2009 21:37:30 +0000 (21:37 +0000)
committer Nate Begeman <natebegeman@mac.com>
Fri, 5 Jun 2009 21:37:30 +0000 (21:37 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 163f4c5ae50ebfc9b41de49261474d7f504e16a9..ef166a26c553e56f9707100d98f796d9128e6363 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -825,11 +825,11 @@ public:
    virtual bool
    isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
  
-  /// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-  /// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-  /// location that the 'Base' load is loading from.
-  bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist,
-                         const MachineFrameInfo *MFI) const;
+  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+  /// location that is 'Dist' units away from the location that the 'Base' load 
+  /// is loading from.
+  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes,
+                         int Dist, const MachineFrameInfo *MFI) const;
  
    /// PerformDAGCombine - This method will be invoked for all target nodes and
    /// for any target-independent nodes that the target has registered with
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 5d1b2a3ea2825833aefe261509ac226c5bb514a3..609ec82c5ad1929e3453bc9034cc446ca5ff5841 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
  SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
    assert(N->getOpcode() == ISD::BUILD_PAIR);
  
-  SDNode *LD1 = getBuildPairElt(N, 0);
-  if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
      return SDValue();
    MVT LD1VT = LD1->getValueType(0);
-  SDNode *LD2 = getBuildPairElt(N, 1);
    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
  
    if (ISD::isNON_EXTLoad(LD2) &&
        LD2->hasOneUse() &&
        // If both are volatile this would reduce the number of volatile loads.
        // If one is volatile it might be ok, but play conservative and bail out.
-      !cast<LoadSDNode>(LD1)->isVolatile() &&
-      !cast<LoadSDNode>(LD2)->isVolatile() &&
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
        TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
-    LoadSDNode *LD = cast<LoadSDNode>(LD1);
-    unsigned Align = LD->getAlignment();
+    unsigned Align = LD1->getAlignment();
      unsigned NewAlign = TLI.getTargetData()->
        getABITypeAlignment(VT.getTypeForMVT());
  
      if (NewAlign <= Align &&
          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
-      return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
-                         LD->getSrcValue(), LD->getSrcValueOffset(),
-                         false, Align);
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getSrcValue(),
+                         LD1->getSrcValueOffset(), false, Align);
    }
  
    return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 3334e53f0fbc3e834f1934bd528bda34b6097138..ab4cd515531c42bc13ca8b3303073db00ca8022e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
  }
  
  
-/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-/// location that the 'Base' load is loading from.
-bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
-                                       unsigned Bytes, int Dist,
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+/// location that is 'Dist' units away from the location that the 'Base' load 
+/// is loading from.
+bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+                                       unsigned Bytes, int Dist, 
                                         const MachineFrameInfo *MFI) const {
-  if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())
+  if (LD->getChain() != Base->getChain())
      return false;
    MVT VT = LD->getValueType(0);
    if (VT.getSizeInBits() / 8 != Bytes)
@@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
      if (FS != BFS || FS != (int)Bytes) return false;
      return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
    }
+  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+    if (V && (V->getSExtValue() == Dist*Bytes))
+      return true;
+  }
  
    GlobalValue *GV1 = NULL;
    GlobalValue *GV2 = NULL;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 924155c45050b328f5215582dd67c7b4d209ea8c..77c9f3d02a6fed9533bf429618a2e554d47d162b 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7675,8 +7675,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
      if (Elt.getOpcode() == ISD::UNDEF)
        continue;
  
-    if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
-                               EVT.getSizeInBits()/8, i, MFI))
+    LoadSDNode *LD = cast<LoadSDNode>(Elt);
+    LoadSDNode *LDBase = cast<LoadSDNode>(Base);
+    if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
        return false;
    }
    return true;
@@ -7751,44 +7752,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
  
    MVT VT = N->getValueType(0);
    MVT EVT = VT.getVectorElementType();
-  if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
-    // We are looking for load i64 and zero extend. We want to transform
-    // it before legalizer has a chance to expand it. Also look for i64
-    // BUILD_PAIR bit casted to f64.
-    return SDValue();
-  // This must be an insertion into a zero vector.
-  SDValue HighElt = N->getOperand(1);
-  if (!isZeroNode(HighElt))
-    return SDValue();
+  
+  // Before or during type legalization, we want to try and convert a
+  // build_vector of an i64 load and a zero value into vzext_movl before the 
+  // legalizer can break it up.  
+  // FIXME: does the case below remove the need to do this?
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
+    if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+      return SDValue();
+    
+    // This must be an insertion into a zero vector.
+    SDValue HighElt = N->getOperand(1);
+    if (!isZeroNode(HighElt))
+      return SDValue();
+    
+    // Value must be a load.
+    SDNode *Base = N->getOperand(0).getNode();
+    if (!isa<LoadSDNode>(Base)) {
+      if (Base->getOpcode() != ISD::BIT_CONVERT)
+        return SDValue();
+      Base = Base->getOperand(0).getNode();
+      if (!isa<LoadSDNode>(Base))
+        return SDValue();
+    }
+    
+    // Transform it into VZEXT_LOAD addr.
+    LoadSDNode *LD = cast<LoadSDNode>(Base);
+    
+    // Load must not be an extload.
+    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+      return SDValue();
+    
+    // Load type should legal type so we don't have to legalize it.
+    if (!TLI.isTypeLegal(VT))
+      return SDValue();
+    
+    SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+    SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+    TargetLowering::TargetLoweringOpt TLO(DAG);
+    TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+    DCI.CommitTargetLoweringOpt(TLO);
+    return ResNode;
+  }
+
+  // The type legalizer will have broken apart v2i64 build_vector created during
+  // widening before the code which handles that case is run.  Look for build
+  // vector (load, load + 4, 0/undef, 0/undef)
+  if (VT == MVT::v4i32 || VT == MVT::v4f32) {
+    LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
+    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
+    if (!LD0 || !LD1)
+      return SDValue();
+    if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
+        LD1->getExtensionType() != ISD::NON_EXTLOAD)
+      return SDValue();
+    // Make sure the second elt is a consecutive load.
+    if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
+                               DAG.getMachineFunction().getFrameInfo()))
+      return SDValue();
  
-  // Value must be a load.
-  SDNode *Base = N->getOperand(0).getNode();
-  if (!isa<LoadSDNode>(Base)) {
-    if (Base->getOpcode() != ISD::BIT_CONVERT)
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
        return SDValue();
-    Base = Base->getOperand(0).getNode();
-    if (!isa<LoadSDNode>(Base))
+    if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
        return SDValue();
+    
+    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+    SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
+    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+    TargetLowering::TargetLoweringOpt TLO(DAG);
+    TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
+    DCI.CommitTargetLoweringOpt(TLO);
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
    }
-
-  // Transform it into VZEXT_LOAD addr.
-  LoadSDNode *LD = cast<LoadSDNode>(Base);
-
-  // Load must not be an extload.
-  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
-    return SDValue();
-
-  // Load type should legal type so we don't have to legalize it.
-  if (!TLI.isTypeLegal(VT))
-    return SDValue();
-
-  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
-  SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
-  SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
-  TargetLowering::TargetLoweringOpt TLO(DAG);
-  TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
-  DCI.CommitTargetLoweringOpt(TLO);
-  return ResNode;
+  return SDValue();
  }
  
  /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll

index c89a296d0db9630181503d069d8941b26a359325..b96fdfc03c68d5180ee48f4c0cee69eb6f8fcbe6 100644 (file)
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
  ; RUN: grep unpcklpd %t | count 1
  ; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
  
  ; Shows a dag combine bug that will generate an illegal build vector
  ; with v2i64 build_vector i32, i32.
  
-define void @test(<2 x double>* %dst, <4 x double> %src) {
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
  entry:
          %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
          store <2 x double> %tmp7.i, <2 x double>* %dst
          ret void
  }
+
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
author	Nate Begeman <natebegeman@mac.com>
	Fri, 5 Jun 2009 21:37:30 +0000 (21:37 +0000)
committer	Nate Begeman <natebegeman@mac.com>
	Fri, 5 Jun 2009 21:37:30 +0000 (21:37 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/dagcombine-buildvector.ll		patch \| blob \| history