Teach dag combine to fold the following transformation more aggressively:

author Evan Cheng <evan.cheng@apple.com>

Wed, 6 Jan 2010 19:38:29 +0000 (19:38 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Wed, 6 Jan 2010 19:38:29 +0000 (19:38 +0000)
author Evan Cheng <evan.cheng@apple.com>
Wed, 6 Jan 2010 19:38:29 +0000 (19:38 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Wed, 6 Jan 2010 19:38:29 +0000 (19:38 +0000)
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h

index bf5572684743958d88308681f71dff26cd2bdf42..b33b21da42ad903a0dd5fa5d2226a8f74d758983 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -131,6 +131,7 @@ private:
    void CodeGenAndEmitDAG();
    void LowerArguments(BasicBlock *BB);
    
+  void ShrinkDemandedOps();
    void ComputeLiveOutVRegInfo();
  
    void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB);
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index dd28a87938ffefd1aa0d497b4285ed2359922aba..15da8456f1726a171152f79c7994bbce1e69bb06 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -774,10 +774,12 @@ public:
    /// that want to combine 
    struct TargetLoweringOpt {
      SelectionDAG &DAG;
+    bool ShrinkOps;
      SDValue Old;
      SDValue New;
  
-    explicit TargetLoweringOpt(SelectionDAG &InDAG) : DAG(InDAG) {}
+    explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) :
+      DAG(InDAG), ShrinkOps(Shrink) {}
      
      bool CombineTo(SDValue O, SDValue N) { 
        Old = O; 
@@ -1478,7 +1480,7 @@ public:
    }
  
    /// isZExtFree - Return true if any actual instruction that defines a
-  /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+  /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result
    /// register. This does not necessarily include registers defined in
    /// unknown ways, such as incoming arguments, or copies from unknown
    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 759fa0e112565c49a9ae752d70d65e1f1f3640b4..5ab92805f1aad70ae611ec3c6db358f2955014cc 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1688,18 +1688,18 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
    // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
    // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
    //
    // do not sink logical op inside of a vector extend, since it may combine
    // into a vsetcc.
-  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND  ||
         N0.getOpcode() == ISD::SIGN_EXTEND ||
-       (N0.getOpcode() == ISD::TRUNCATE &&
-        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
        !VT.isVector() &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
-      (!LegalOperations ||
-       TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
      SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                   N0.getOperand(0).getValueType(),
                                   N0.getOperand(0), N1.getOperand(0));
@@ -1839,6 +1839,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    if (!VT.isVector() &&
        SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
+
    // fold (zext_inreg (extload x)) -> (zextload x)
    if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -1885,48 +1886,89 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
  
    // fold (and (load x), 255) -> (zextload x, i8)
    // fold (and (extload x, i16), 255) -> (zextload x, i8)
-  if (N1C && N0.getOpcode() == ISD::LOAD) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
      if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() &&
-        // Do not change the width of a volatile load.
-        !LN0->isVolatile()) {
-      EVT ExtVT = MVT::Other;
+        LN0->isUnindexed() && N0.hasOneUse()) {
        uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
-      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-
-      EVT LoadedVT = LN0->getMemoryVT();
-
-      // Do not generate loads of non-round integer types since these can
-      // be expensive (and would be wrong if the type is not byte sized).
-      if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
-        EVT PtrType = N0.getOperand(1).getValueType();
-
-        // For big endian targets, we need to add an offset to the pointer to
-        // load the correct bytes.  For little endian systems, we merely need to
-        // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSize();
-        unsigned EVTStoreBytes = ExtVT.getStoreSize();
-        unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-        unsigned Alignment = LN0->getAlignment();
-        SDValue NewPtr = LN0->getBasePtr();
-
-        if (TLI.isBigEndian()) {
-          NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
-                               NewPtr, DAG.getConstant(PtrOff, PtrType));
-          Alignment = MinAlign(Alignment, PtrOff);
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          if (HasAnyExt) {
+            SDValue Load = 
+              DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(),
+                             LN0->getValueType(0),
+                             LN0->getChain(), LN0->getBasePtr(),
+                             LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                             ExtVT, LN0->isVolatile(), LN0->getAlignment());
+            AddToWorkList(N);
+            CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1));
+            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          } else {
+            SDValue Load =
+              DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                             LN0->getChain(), LN0->getBasePtr(),
+                             LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                             ExtVT, LN0->isVolatile(), LN0->getAlignment());
+            AddToWorkList(N);
+            CombineTo(N0.getNode(), Load, Load.getValue(1));
+            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          }
+        } else if (!LN0->isVolatile()) {
+          // Do not change the width of a volatile load.
+          // Do not generate loads of non-round integer types since these can
+          // be expensive (and would be wrong if the type is not byte sized).
+          if (LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+              (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+            EVT PtrType = LN0->getOperand(1).getValueType();
+
+            // For big endian targets, we need to add an offset to the pointer
+            // to load the correct bytes.  For little endian systems, we merely
+            // need to read fewer bytes from the same pointer.
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            unsigned Alignment = LN0->getAlignment();
+            SDValue NewPtr = LN0->getBasePtr();
+
+            if (TLI.isBigEndian()) {
+              NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                   NewPtr, DAG.getConstant(PtrOff, PtrType));
+              Alignment = MinAlign(Alignment, PtrOff);
+            }
+
+            AddToWorkList(NewPtr.getNode());
+            if (HasAnyExt) {
+              SDValue Load =
+                DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(),
+                               LN0->getValueType(0),
+                               LN0->getChain(), NewPtr,
+                               LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                               ExtVT, LN0->isVolatile(), Alignment);
+              AddToWorkList(N);
+              CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1));
+              return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+            } else {
+              SDValue Load =
+                DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                               LN0->getChain(), NewPtr,
+                               LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                               ExtVT, LN0->isVolatile(), Alignment);
+              AddToWorkList(N);
+              CombineTo(N0.getNode(), Load, Load.getValue(1));
+              return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+            }
+          }
          }
-
-        AddToWorkList(NewPtr.getNode());
-        SDValue Load =
-          DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
-                         NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         ExtVT, LN0->isVolatile(), Alignment);
-        AddToWorkList(N);
-        CombineTo(N0.getNode(), Load, Load.getValue(1));
-        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
        }
      }
    }
@@ -2778,9 +2820,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    // However when after the source operand of SRL is optimized into AND, the SRL
    // itself may not be optimized further. Look for it and add the BRCOND into
    // the worklist.
-  if (N->hasOneUse() &&
-      N->use_begin()->getOpcode() == ISD::BRCOND)
-    AddToWorkList(*N->use_begin());
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
  
    return SDValue();
  }
@@ -3198,7 +3248,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  
    // fold (zext (truncate x)) -> (and x, mask)
    if (N0.getOpcode() == ISD::TRUNCATE &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
      SDValue Op = N0.getOperand(0);
      if (Op.getValueType().bitsLT(VT)) {
        Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3704,7 +3757,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
      else
        // if the source and dest are the same type, we can drop both the extend
-      // and the truncate
+      // and the truncate.
        return N0.getOperand(0);
    }
  
@@ -4515,6 +4568,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                         N1.getOperand(0), N1.getOperand(1), N2);
    }
  
+  SDNode *Trunc = 0;
+  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+    // Look pass truncate.
+    Trunc = N1.getNode();
+    N1 = N1.getOperand(0);
+  }
+
    if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
      // Match this pattern so that we can generate simpler code:
      //
@@ -4526,7 +4586,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
      // into
      // 
      //   %a = ...
-    //   %b = and %a, 2
+    //   %b = and i32 %a, 2
      //   %c = setcc eq %b, 0
      //   brcond %c ...
      //
@@ -4537,7 +4597,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
      SDValue Op1 = N1.getOperand(1);
  
      if (Op0.getOpcode() == ISD::AND &&
-        Op0.hasOneUse() &&
          Op1.getOpcode() == ISD::Constant) {
        SDValue AndOp1 = Op0.getOperand(1);
  
@@ -4552,12 +4611,21 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                           Op0, DAG.getConstant(0, Op0.getValueType()),
                           ISD::SETNE);
  
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
            // Replace the uses of SRL with SETCC
            DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
            removeFromWorkList(N1.getNode());
            DAG.DeleteNode(N1.getNode());
-          return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
-                             MVT::Other, Chain, SetCC, N2);
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
          }
        }
      }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index f1c8650728ce9eae0ebbf839d31edf50a88a6115..ca8c17beffa06f31a79a99915ebc4ba27fa86022 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2656,6 +2656,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
      // size of the value, the shift/rotate count is guaranteed to be zero.
      if (VT == MVT::i1)
        return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
      break;
    case ISD::FP_ROUND_INREG: {
      EVT EVT = cast<VTSDNode>(N2)->getVT();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

index 3073dfe9cc604dcfefd0b9b14e8a1b0be7de15dc..8ed24cce744f039346e6075950b237cf289d3ee1 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -438,6 +438,75 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
    SDB->clear();
  }
  
+void SelectionDAGISel::ShrinkDemandedOps() {
+  SmallVector<SDNode*, 128> Worklist;
+
+  // Add all the dag nodes to the worklist.
+  Worklist.reserve(CurDAG->allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ++I)
+    Worklist.push_back(I);
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+
+    if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+
+    // Run ShrinkDemandedOp on scalar binary operations.
+    if (N->getNumValues() == 1 &&
+        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+      DebugLoc dl = N->getDebugLoc();
+      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero, KnownOne;
+      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        // Revisit the node.
+        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                       Worklist.end());
+        Worklist.push_back(N);
+
+        // Replace the old value with the new one.
+        DEBUG(errs() << "\nReplacing "; 
+              TLO.Old.getNode()->dump(CurDAG);
+              errs() << "\nWith: ";
+              TLO.New.getNode()->dump(CurDAG);
+              errs() << '\n');
+
+        Worklist.push_back(TLO.New.getNode());
+        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+        if (TLO.Old.getNode()->use_empty()) {
+          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+               i != e; ++i) {
+            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+            if (OpNode->hasOneUse()) {
+              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                         OpNode),
+                             Worklist.end());
+              Worklist.push_back(TLO.Old.getNode()->getOperand(i).getNode());
+            }
+          }
+
+          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                     TLO.Old.getNode()),
+                         Worklist.end());
+          CurDAG->DeleteNode(TLO.Old.getNode());
+        }
+      }
+    }
+  }
+}
+
  void SelectionDAGISel::ComputeLiveOutVRegInfo() {
    SmallPtrSet<SDNode*, 128> VisitedNodes;
    SmallVector<SDNode*, 128> Worklist;
@@ -609,8 +678,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
  
    if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
  
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
+    ShrinkDemandedOps();
      ComputeLiveOutVRegInfo();
+  }
  
    // Third, instruction select all of the operations to machine code, adding the
    // code to the MachineBasicBlock.
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index d9a5a13666b31c5ff64461076a36047c4682d29b..f7694dbd3e5b160e488390f0d7a2bc3a64452a4e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -990,7 +990,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
        return true;
      // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
        return true;
  
      // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1024,7 +1024,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      if (TLO.ShrinkDemandedConstant(Op, NewMask))
        return true;
      // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
        return true;
  
      // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1049,7 +1049,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      if ((KnownZero2 & NewMask) == NewMask)
        return TLO.CombineTo(Op, Op.getOperand(1));
      // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
        return true;
  
      // If all of the unknown bits are known to be zero on one side or the other
@@ -1480,7 +1480,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                               KnownOne2, TLO, Depth+1))
        return true;
      // See if the operation should be performed at a smaller bit width.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
        return true;
    }
    // FALL THROUGH
@@ -1876,7 +1876,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
  
      // Fold bit comparisons when we can.
      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-        VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
        if (ConstantSDNode *AndRHS =
                    dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
          EVT ShiftTy = DCI.isBeforeLegalize() ?
@@ -1884,16 +1886,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
            // Perform the xform if the AND RHS is a single bit.
            if (isPowerOf2_64(AndRHS->getZExtValue())) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
                                  DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
-                                                ShiftTy));
+                                                ShiftTy)));
            }
          } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
            // (X & 8) == 8  -->  (X & 8) >> 3
            // Perform the xform if C1 is a single bit.
            if (C1.isPowerOf2()) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                DAG.getConstant(C1.logBase2(), ShiftTy));
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
            }
          }
        }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 0367165f5c414595d4969446f8826f5752fea1b9..7d92fd8fad62881e7b93d42b2af086507c2c22af 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5681,58 +5681,51 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
  }
  
-static SDValue LowerToBT(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
                           DebugLoc dl, SelectionDAG &DAG) {
-  // Lower (X & (1 << N)) == 0 to BT(X, N).
-  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
-  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
-  if (Op0.getOpcode() == ISD::AND &&
-      Op0.hasOneUse() &&
-      Op1.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
-      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    SDValue LHS, RHS;
-    if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
-      if (ConstantSDNode *Op010C =
-          dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
-        if (Op010C->getZExtValue() == 1) {
-          LHS = Op0.getOperand(0);
-          RHS = Op0.getOperand(1).getOperand(1);
-        }
-    } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
-      if (ConstantSDNode *Op000C =
-          dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
-        if (Op000C->getZExtValue() == 1) {
-          LHS = Op0.getOperand(1);
-          RHS = Op0.getOperand(0).getOperand(1);
-        }
-    } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
-      ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
-      SDValue AndLHS = Op0.getOperand(0);
-      if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
-        LHS = AndLHS.getOperand(0);
-        RHS = AndLHS.getOperand(1);
+  SDValue LHS, RHS;
+  if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *Op010C =
+        dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+      if (Op010C->getZExtValue() == 1) {
+        LHS = Op0.getOperand(0);
+        RHS = Op0.getOperand(1).getOperand(1);
        }
+  } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *Op000C =
+        dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+      if (Op000C->getZExtValue() == 1) {
+        LHS = Op0.getOperand(1);
+        RHS = Op0.getOperand(0).getOperand(1);
+      }
+  } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+    SDValue AndLHS = Op0.getOperand(0);
+    if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+      LHS = AndLHS.getOperand(0);
+      RHS = AndLHS.getOperand(1);
      }
+  }
  
-    if (LHS.getNode()) {
-      // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
-      // instruction.  Since the shift amount is in-range-or-undefined, we know
-      // that doing a bittest on the i16 value is ok.  We extend to i32 because
-      // the encoding for the i16 version is larger than the i32 version.
-      if (LHS.getValueType() == MVT::i8)
-        LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+  if (LHS.getNode()) {
+    // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
+    // instruction.  Since the shift amount is in-range-or-undefined, we know
+    // that doing a bittest on the i16 value is ok.  We extend to i32 because
+    // the encoding for the i16 version is larger than the i32 version.
+    if (LHS.getValueType() == MVT::i8)
+      LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
  
-      // If the operand types disagree, extend the shift amount to match.  Since
-      // BT ignores high bits (like shifts) we can use anyextend.
-      if (LHS.getValueType() != RHS.getValueType())
-        RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+    // If the operand types disagree, extend the shift amount to match.  Since
+    // BT ignores high bits (like shifts) we can use anyextend.
+    if (LHS.getValueType() != RHS.getValueType())
+      RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
  
-      SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
-      unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                         DAG.getConstant(Cond, MVT::i8), BT);
-    }
+    SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                       DAG.getConstant(Cond, MVT::i8), BT);
    }
  
    return SDValue();
@@ -5746,9 +5739,18 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  
    // Optimize to BT if possible.
-  SDValue NewCond = LowerToBT(Op0, Op1, CC, dl, DAG);
-  if (NewCond.getNode())
-    return NewCond;
+  // Lower (X & (1 << N)) == 0 to BT(X, N).
+  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+  if (Op0.getOpcode() == ISD::AND &&
+      Op0.hasOneUse() &&
+      Op1.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+    if (NewSetCC.getNode())
+      return NewSetCC;
+  }
  
    bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
    unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
@@ -5946,6 +5948,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
      }
    }
  
+  if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
    if (addTest) {
      CC = DAG.getConstant(X86::COND_NE, MVT::i8);
      Cond = EmitTest(Cond, X86::COND_NE, DAG);
@@ -6103,6 +6122,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
      }
    }
  
+  if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
    if (addTest) {
      CC = DAG.getConstant(X86::COND_NE, MVT::i8);
      Cond = EmitTest(Cond, X86::COND_NE, DAG);
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll

index f21da52315faf20f0759f9a1d5e72094dc60431e..b6cd2d40d1af978469e7b216bcd830ed8ceb9229 100644 (file)
--- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
+++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
@@ -1,5 +1,4 @@
  ; RUN: llc < %s -march=bfin -verify-machineinstrs
-; XFAIL: *
  
  ; An undef argument causes a setugt node to escape instruction selection.
  
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll

index c247aca0a5b0ad0f7347d7b143ae8e032d824bdf..46da56681d4fb34fb2ee9ade75281286626c39b9 100644 (file)
--- a/test/CodeGen/Blackfin/promote-logic.ll
+++ b/test/CodeGen/Blackfin/promote-logic.ll
@@ -1,4 +1,5 @@
  ; RUN: llc < %s -march=bfin > %t
+; XFAIL: *
  
  ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
  ; operation after LegalizeOps.
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll

index 031d6c37ce70669938034910dd35f7f3f6e03cc4..1e28fc7a918d23bd161cb44d40640d366d04b96c 100644 (file)
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -11,7 +11,6 @@
  ; RUN: grep shli    %t1.s | count 4
  ; RUN: grep shlhi   %t1.s | count 4
  ; RUN: grep ila     %t1.s | count 2
-; RUN: grep xsbh    %t1.s | count 4
  target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
  target triple = "spu"
  
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll

index 3317864c01473f73f6819516baa6c0a4cf22e277..07a164d42645b37e32dc5b7840f2b008b53bc69f 100644 (file)
--- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
+++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -5,8 +5,8 @@ target triple = "s390x-linux"
  
  define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
  entry:
-       %shl = shl i32 %x, 0            ; <i32> [#uses=1]
-       %sub = sub i32 32, 0            ; <i32> [#uses=1]
+       %shl = shl i32 %x, 1            ; <i32> [#uses=1]
+       %sub = sub i32 32, 1            ; <i32> [#uses=1]
         %shr = lshr i32 %x, %sub                ; <i32> [#uses=1]
         %or = or i32 %shr, %shl         ; <i32> [#uses=1]
         ret i32 %or
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll

new file mode 100644 (file)

index 0000000..a6bdb13
--- /dev/null
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+define i32 @t(i32 %a, i32 %b) nounwind ssp {
+entry:
+; X32:     t:
+; X32:     xorb
+; X32-NOT: andb
+; X32-NOT: shrb
+; X32:     testb $64
+; X32:     jne
+
+; X64:     t:
+; X64-NOT: setne
+; X64:     xorl
+; X64:     testb $64
+; X64:     jne
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
author	Evan Cheng <evan.cheng@apple.com>
	Wed, 6 Jan 2010 19:38:29 +0000 (19:38 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Wed, 6 Jan 2010 19:38:29 +0000 (19:38 +0000)
include/llvm/CodeGen/SelectionDAGISel.h		patch \| blob \| history
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll		patch \| blob \| history
test/CodeGen/Blackfin/promote-logic.ll		patch \| blob \| history
test/CodeGen/CellSPU/mul_ops.ll		patch \| blob \| history
test/CodeGen/SystemZ/2009-06-02-Rotate.ll		patch \| blob \| history
test/CodeGen/X86/xor-icmp.ll	[new file with mode: 0644]	patch \| blob