Implement support for using modeling implicit-zero-extension on x86-64

author Dan Gohman <gohman@apple.com>

Wed, 8 Apr 2009 00:15:30 +0000 (00:15 +0000)

committer Dan Gohman <gohman@apple.com>

Wed, 8 Apr 2009 00:15:30 +0000 (00:15 +0000)
author Dan Gohman <gohman@apple.com>
Wed, 8 Apr 2009 00:15:30 +0000 (00:15 +0000)
committer Dan Gohman <gohman@apple.com>
Wed, 8 Apr 2009 00:15:30 +0000 (00:15 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index cbe4ca4f5384dd23856df4df1b17c1d8c28d4a7a..477505e2f79f87f8d827875b1760a78f66bf41a5 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -747,6 +747,13 @@ public:
      /// there are any bits set in the constant that are not demanded.  If so,
      /// shrink the constant and return true.
      bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded);
+
+    /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+    /// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+    /// cast, but it could be generalized for targets with other types of
+    /// implicit widening casts.
+    bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
+                          DebugLoc dl);
    };
                                                  
    /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
@@ -1386,7 +1393,23 @@ public:
    virtual bool isTruncateFree(MVT VT1, MVT VT2) const {
      return false;
    }
-  
+
+  /// isZExtFree - Return true if any actual instruction that defines a
+  /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+  /// register. This does not necessarily include registers defined in
+  /// unknown ways, such as incoming arguments, or copies from unknown
+  /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+  /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+  /// all instructions that define 32-bit values implicit zero-extend the
+  /// result out to 64 bits.
+  virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const {
+    return false;
+  }
+
+  virtual bool isZExtFree(MVT VT1, MVT VT2) const {
+    return false;
+  }
+
    //===--------------------------------------------------------------------===//
    // Div utility functions
    //
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp

index 75cd36dec2c43187ab67eee44ccfda6ff9f171ef..3f8714085aef64b6d0b7a8936fad286c1586ed5a 100644 (file)
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -579,24 +579,41 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
    
    iterator IP = begin();
    for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
+    bool Done = false;
      unsigned Start = I->start, End = I->end;
-    IP = std::upper_bound(IP, end(), Start);
-    
-    // If the start of this range overlaps with an existing liverange, trim it.
-    if (IP != begin() && IP[-1].end > Start) {
-      Start = IP[-1].end;
-      // Trimmed away the whole range?
-      if (Start >= End) continue;
-    }
-    // If the end of this range overlaps with an existing liverange, trim it.
-    if (IP != end() && End > IP->start) {
-      End = IP->start;
-      // If this trimmed away the whole range, ignore it.
-      if (Start == End) continue;
+    // If a clobber range starts before an existing range and ends after
+    // it, the clobber range will need to be split into multiple ranges.
+    // Loop until the entire clobber range is handled.
+    while (!Done) {
+      Done = true;
+      IP = std::upper_bound(IP, end(), Start);
+      unsigned SubRangeStart = Start;
+      unsigned SubRangeEnd = End;
+
+      // If the start of this range overlaps with an existing liverange, trim it.
+      if (IP != begin() && IP[-1].end > SubRangeStart) {
+        SubRangeStart = IP[-1].end;
+        // Trimmed away the whole range?
+        if (SubRangeStart >= SubRangeEnd) continue;
+      }
+      // If the end of this range overlaps with an existing liverange, trim it.
+      if (IP != end() && SubRangeEnd > IP->start) {
+        // If the clobber live range extends beyond the existing live range,
+        // it'll need at least another live range, so set the flag to keep
+        // iterating.
+        if (SubRangeEnd > IP->end) {
+          Start = IP->end;
+          Done = false;
+        }
+        SubRangeEnd = IP->start;
+        // If this trimmed away the whole range, ignore it.
+        if (SubRangeStart == SubRangeEnd) continue;
+      }
+
+      // Insert the clobber interval.
+      IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
+                        IP);
      }
-    
-    // Insert the clobber interval.
-    IP = addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
    }
  }
  
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp

index 8c7fa1b1ac22f5a611d4feba8a7491aa0b2bd966..cb08fe759f509690e6c5e2ab84c7af43ee69c042 100644 (file)
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -399,6 +399,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
      unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
      if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
          mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+        mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
          tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
        CopyMI = mi;
      // Earlyclobbers move back one.
@@ -556,6 +557,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
        if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
            mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+          mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
            tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
          CopyMI = mi;
        ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
@@ -658,6 +660,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
      unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
      if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
          MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+        MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
          tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
        CopyMI = MI;
      handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, 
@@ -855,7 +858,8 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
      if (TargetRegisterInfo::isPhysicalRegister(Reg))
        Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm());
      return Reg;
-  } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG)
+  } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+             VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
      return VNI->copy->getOperand(2).getReg();
  
    unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 78d5d403e8625ddc5c1d9477b0a4ee4a890fc9e5..e874f1b1ec18afec70234be1cc56c9af04f36299 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1666,9 +1666,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
    // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
    // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
    if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
-       N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       (N0.getOpcode() == ISD::TRUNCATE &&
+        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
      SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                   N0.getOperand(0).getValueType(),
@@ -3121,10 +3123,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
      return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
    }
  
-  // fold (zext (and (trunc x), cst)) -> (and x, cst).
+  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+  // if either of the casts is not free.
    if (N0.getOpcode() == ISD::AND &&
        N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
      SDValue X = N0.getOperand(0).getOperand(0);
      if (X.getValueType().bitsLT(VT)) {
        X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
@@ -3252,10 +3258,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
    }
  
-  // fold (aext (and (trunc x), cst)) -> (and x, cst).
+  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+  // if the trunc is not free.
    if (N0.getOpcode() == ISD::AND &&
        N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                          N0.getValueType())) {
      SDValue X = N0.getOperand(0).getOperand(0);
      if (X.getValueType().bitsLT(VT)) {
        X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 79a48a6fd9cfe96f5248d3034ccfd430b8157ed7..7b83a127074ba756b5d73c0a358f937c6d2e846d 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -777,6 +777,48 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
    return false;
  }
  
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+                                                    unsigned BitWidth,
+                                                    const APInt &Demanded,
+                                                    DebugLoc dl) {
+  assert(Op.getNumOperands() == 2 &&
+         "ShrinkDemandedOp only supports binary operators!");
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "ShrinkDemandedOp only supports nodes with one result!");
+
+  // Don't do this if the node has another user, which may require the
+  // full value.
+  if (!Op.getNode()->hasOneUse())
+    return false;
+
+  // Search for the smallest integer type with free casts to and from
+  // Op's type. For expedience, just check power-of-2 integer types.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  if (!isPowerOf2_32(SmallVTBits))
+    SmallVTBits = NextPowerOf2(SmallVTBits);
+  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+    MVT SmallVT = MVT::getIntegerVT(SmallVTBits);
+    if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, Op.getValueType())) {
+      // We found a type with free casts.
+      SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(0)),
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(1)));
+      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      return CombineTo(Op, Z);
+    }
+  }
+  return false;
+}
+
  /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
  /// DemandedMask bits of the result of Op are ever used downstream.  If we can
  /// use this information to simplify Op, create a new simplified DAG node and
@@ -865,7 +907,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      // If the RHS is a constant, see if we can simplify it.
      if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
        return true;
-      
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
      // Output known-1 bits are only known if set in both the LHS & RHS.
      KnownOne &= KnownOne2;
      // Output known-0 are known to be clear if zero in either the LHS | RHS.
@@ -896,7 +941,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      // If the RHS is a constant, see if we can simplify it.
      if (TLO.ShrinkDemandedConstant(Op, NewMask))
        return true;
-          
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
      // Output known-0 bits are only known if clear in both the LHS & RHS.
      KnownZero &= KnownZero2;
      // Output known-1 are known to be set if set in either the LHS | RHS.
@@ -918,7 +966,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
        return TLO.CombineTo(Op, Op.getOperand(0));
      if ((KnownZero2 & NewMask) == NewMask)
        return TLO.CombineTo(Op, Op.getOperand(1));
-      
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
      // If all of the unknown bits are known to be zero on one side or the other
      // (but not both) turn this into an *inclusive* or.
      //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
@@ -1333,6 +1384,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      }
  #endif
      break;
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::SUB: {
+    // Add, Sub, and Mul don't demand any bits in positions beyond that
+    // of the highest bit demanded of them.
+    APInt LoMask = APInt::getLowBitsSet(BitWidth,
+                                        BitWidth - NewMask.countLeadingZeros());
+    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    // See if the operation should be performed at a smaller bit width.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+  }
+  // FALL THROUGH
    default:
      // Just use ComputeMaskedBits to compute output bits.
      TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp

index f60b23a6c5077d84809551239b1c1ffd11b0ece0..60f7f403a093504763926eb23efb9bddaaf6d379 100644 (file)
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -1070,7 +1070,8 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
            return true;
        }
      }
-    if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+    if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+        MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
        SubIdx = MI->getOperand(3).getImm();
        if (VirtReg == MI->getOperand(0).getReg()) {
          if (!tri_->getSubReg(PhysReg, SubIdx))
@@ -1164,11 +1165,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
    bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
    bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG;
+  bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG;
    unsigned SubIdx = 0;
    if (isExtSubReg) {
      DstReg = CopyMI->getOperand(0).getReg();
      SrcReg = CopyMI->getOperand(1).getReg();
-  } else if (isInsSubReg) {
+  } else if (isInsSubReg || isSubRegToReg) {
      if (CopyMI->getOperand(2).getSubReg()) {
        DOUT << "\tSource of insert_subreg is already coalesced "
             << "to another register.\n";
@@ -1212,7 +1214,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
    MachineBasicBlock *CopyMBB = CopyMI->getParent();
    unsigned RealDstReg = 0;
    unsigned RealSrcReg = 0;
-  if (isExtSubReg || isInsSubReg) {
+  if (isExtSubReg || isInsSubReg || isSubRegToReg) {
      SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm();
      if (SrcIsPhys && isExtSubReg) {
        // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
@@ -1228,7 +1230,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
        } else
          SrcReg = tri_->getSubReg(SrcReg, SubIdx);
        SubIdx = 0;
-    } else if (DstIsPhys && isInsSubReg) {
+    } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) {
        // EAX = INSERT_SUBREG EAX, r1024, 0
        unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg();
        if (SrcSubIdx) {
@@ -1241,8 +1243,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
        } else
          DstReg = tri_->getSubReg(DstReg, SubIdx);
        SubIdx = 0;
-    } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && isInsSubReg)) {
-      if (CopyMI->getOperand(1).getSubReg()) {
+    } else if ((DstIsPhys && isExtSubReg) ||
+               (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
+      if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
          DOUT << "\tSrc of extract_subreg already coalesced with reg"
               << " of a super-class.\n";
          return false; // Not coalescable.
@@ -1295,20 +1298,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
  
      // Process moves where one of the registers have a sub-register index.
      MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg);
-    if (DstMO->getSubReg())
-      // FIXME: Can we handle this?
-      return false;
      MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg);
-    SubIdx = SrcMO->getSubReg();
+    SubIdx = DstMO->getSubReg();
      if (SubIdx) {
-      // This is not a extract_subreg but it looks like one.
-      // e.g. %cl = MOV16rr %reg1024:2
-      isExtSubReg = true;
-      if (DstIsPhys) {
-        if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
+      if (SrcMO->getSubReg())
+        // FIXME: can we handle this?
+        return false;
+      // This is not an insert_subreg but it looks like one.
+      // e.g. %reg1024:3 = MOV32rr %EAX
+      isInsSubReg = true;
+      if (SrcIsPhys) {
+        if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
            return false; // Not coalescable
          SubIdx = 0;
        }
+    } else {
+      SubIdx = SrcMO->getSubReg();
+      if (SubIdx) {
+        // This is not a extract_subreg but it looks like one.
+        // e.g. %cl = MOV16rr %reg1024:2
+        isExtSubReg = true;
+        if (DstIsPhys) {
+          if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
+            return false; // Not coalescable
+          SubIdx = 0;
+        }
+      }
      }
  
      const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
@@ -1393,7 +1408,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
      SavedLI = li_->dupInterval(&DstInt);
  
    // Check if it is necessary to propagate "isDead" property.
-  if (!isExtSubReg && !isInsSubReg) {
+  if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
      MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
      bool isDead = mopd->isDead();
  
@@ -1446,12 +1461,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
  
      // If definition of source is defined by trivial computation, try
      // rematerializing it.
-    if (!isExtSubReg && !isInsSubReg &&
+    if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
          ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI))
        return true;
      
      // If we can eliminate the copy without merging the live ranges, do so now.
-    if (!isExtSubReg && !isInsSubReg &&
+    if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
          (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
           RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
        JoinedCopies.insert(CopyMI);
@@ -1505,8 +1520,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
  
    // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the
    // larger super-register.
-  if ((isExtSubReg || isInsSubReg) && !SrcIsPhys && !DstIsPhys) {
-    if ((isExtSubReg && !Swapped) || (isInsSubReg && Swapped)) {
+  if ((isExtSubReg || isInsSubReg || isSubRegToReg) &&
+      !SrcIsPhys && !DstIsPhys) {
+    if ((isExtSubReg && !Swapped) ||
+        ((isInsSubReg || isSubRegToReg) && Swapped)) {
        ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
        std::swap(SrcReg, DstReg);
        std::swap(ResSrcInt, ResDstInt);
@@ -1594,7 +1611,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
  
    // If resulting interval has a preference that no longer fits because of subreg
    // coalescing, just clear the preference.
-  if (ResDstInt->preference && (isExtSubReg || isInsSubReg) &&
+  if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
        TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
      const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
      if (!RC->contains(ResDstInt->preference))
@@ -1847,7 +1864,13 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
    LHS.weight += RHS.weight;
    if (RHS.preference && !LHS.preference)
      LHS.preference = RHS.preference;
-  
+
+  // Update the liveintervals of sub-registers.
+  if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
+    for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
+      li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS,
+                                                    li_->getVNInfoAllocator());
+
    return true;
  }
  
@@ -2183,7 +2206,8 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
      if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
        DstReg = Inst->getOperand(0).getReg();
        SrcReg = Inst->getOperand(1).getReg();
-    } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+    } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+               Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
        DstReg = Inst->getOperand(0).getReg();
        SrcReg = Inst->getOperand(2).getReg();
      } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -2498,7 +2522,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
          // Delete all coalesced copies.
          if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
            assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
-                  MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) &&
+                  MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+                  MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
                   "Unrecognized copy instruction");
            DstReg = MI->getOperand(0).getReg();
          }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp

index e8ae988f0c15bef138e1740bc3e2b160bdd3ef5a..8aa866ea29b026fb059a8c7d7db39e9e65fcd79a 100644 (file)
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -177,7 +177,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
      break;
    }
  
-  if (!KillMI || KillMI->getParent() != MBB)
+  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
      return false;
  
    // If any of the definitions are used by another instruction between the
@@ -326,6 +326,9 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
      } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
        DstReg = MI.getOperand(0).getReg();
        SrcReg = MI.getOperand(2).getReg();
+    } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(2).getReg();
      }
    }
  
@@ -337,6 +340,46 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
    return false;
  }
  
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+///   %reg1034 = copy %reg1024
+///   %reg1035 = copy %reg1025<kill>
+///   %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+                     const MachineRegisterInfo *MRI,
+                     const TargetInstrInfo *TII) {
+  MachineInstr *DefMI = &MI;
+  for (;;) {
+    if (!DefMI->killsRegister(Reg))
+      return false;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      return true;
+    MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+    // If there are multiple defs, we can't do a simple analysis, so just
+    // go with what the kill flag says.
+    if (next(Begin) != MRI->def_end())
+      return true;
+    DefMI = &*Begin;
+    bool IsSrcPhys, IsDstPhys;
+    unsigned SrcReg,  DstReg;
+    // If the def is something other than a copy, then it isn't going to
+    // be coalesced, so follow the kill flag.
+    if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+      return true;
+    Reg = SrcReg;
+  }
+}
+
  /// isTwoAddrUse - Return true if the specified MI uses the specified register
  /// as a two-address use. If so, return the destination register by reference.
  static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
@@ -735,7 +778,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
            // rearrange the code to make it so.  Making it the killing user will
            // allow us to coalesce A and B together, eliminating the copy we are
            // about to insert.
-          if (!mi->killsRegister(regB)) {
+          if (!isKilled(*mi, regB, MRI, TII)) {
              // If regA is dead and the instruction can be deleted, just delete
              // it so it doesn't clobber regB.
              if (mi->getOperand(ti).isDead() && isSafeToDelete(mi, TII)) {
@@ -753,7 +796,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
                assert(mi->getOperand(3-si).isReg() &&
                       "Not a proper commutative instruction!");
                unsigned regC = mi->getOperand(3-si).getReg();
-              if (mi->killsRegister(regC)) {
+              if (isKilled(*mi, regC, MRI, TII)) {
                  if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
                    ++NumCommuted;
                    regB = regC;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index c5a6acbf7afb6439bb64ee12a68b33ff516e460e..6bdb92fbcb703d07cff73c6d899b1532c588ec89 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7258,6 +7258,16 @@ bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
    return Subtarget->is64Bit() || NumBits1 < 64;
  }
  
+bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+  // x86-64 has implicitly zero-extends 32-bit results in 64-bit registers.
+  return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
+  // x86-64 has implicitly zero-extends 32-bit results in 64-bit registers.
+  return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
  /// isShuffleMaskLegal - Targets can use this to indicate that they only
  /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
  /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index ca4af634226e5911558af142b5b891fa10fbef3e..45b3e974ae4a94528f1e830ba1028ffe868a6845 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -458,7 +458,18 @@ namespace llvm {
      /// register EAX to i16 by referencing its sub-register AX.
      virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
      virtual bool isTruncateFree(MVT VT1, MVT VT2) const;
-  
+
+    /// isZExtFree - Return true if any actual instruction that defines a
+    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+    /// register. This does not necessarily include registers defined in
+    /// unknown ways, such as incoming arguments, or copies from unknown
+    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+    /// all instructions that define 32-bit values implicit zero-extend the
+    /// result out to 64 bits.
+    virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+
      /// isShuffleMaskLegal - Targets can use this to indicate that they only
      /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
      /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td

index ce5a8a37032fac2a8d230c5c9a33e06c88e421a1..f88834e4a28f3cad93b6a4aff00c9edd1227a834 100644 (file)
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -292,10 +292,12 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                     [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
  
  // There's no movzlq instruction, but movl can be used for this purpose, using
-// implicit zero-extension. We need this because the seeming alternative for
-// implementing zext from 32 to 64, an EXTRACT_SUBREG/SUBREG_TO_REG pair, isn't
-// safe because both instructions could be optimized away in the
-// register-to-register case, leaving nothing behind to do the zero extension.
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
  def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
                      "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
                      [(set GR64:$dst, (zext GR32:$src))]>;
@@ -303,6 +305,21 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                      "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
                      [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
  
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+
  let neverHasSideEffects = 1 in {
    let Defs = [RAX], Uses = [EAX] in
    def CDQE : RI<0x98, RawFrm, (outs), (ins),
@@ -1443,10 +1460,6 @@ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
  def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
            (CMOVO64rm GR64:$src2, addr:$src1)>;
  
-// Zero-extension
-def : Pat<(i64 (zext GR32:$src)), 
-          (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
-
  // zextload bool -> zextload byte
  def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
  
diff --git a/test/CodeGen/X86/2008-05-06-SpillerBug.ll b/test/CodeGen/X86/2008-05-06-SpillerBug.ll

deleted file mode 100644 (file)

index e13f398..0000000
--- a/test/CodeGen/X86/2008-05-06-SpillerBug.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep addb | grep ebp
-
-       %struct.rc4_state = type { i32, i32, [256 x i32] }
-@.str1 = internal constant [65 x i8] c"m[%d] = 0x%02x, m[%d] = 0x%02x, 0x%02x, k = %d, key[k] = 0x%02x\0A\00"          ; <[65 x i8]*> [#uses=1]
-@keys = internal constant [7 x [30 x i8]] [ [30 x i8] c"\08\01#Eg\89\AB\CD\EF\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\08\01#Eg\89\AB\CD\EF\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\04\EF\01#E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\08\01#Eg\89\AB\CD\EF\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] c"\04\EF\01#E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [30 x i8] zeroinitializer ]            ; <[7 x [30 x i8]]*> [#uses=1]
-
-declare i32 @printf(i8*, ...) nounwind 
-
-define i32 @main(i32 %argc, i8** %argv) nounwind  {
-entry:
-       br label %bb25
-
-bb25:          ; preds = %bb25, %entry
-  %foo = phi i1 [ 0, %bb25], [ 1, %entry]
-       br i1 %foo, label %bb.i, label %bb25
-
-bb.i:          ; preds = %bb.i, %bb25
-  %foo2 = phi i1 [ 0, %bb.i], [1, %bb25]
-       br i1 %foo2, label %bb21.i, label %bb.i
-
-bb21.i:                ; preds = %bb21.i, %bb.i
-       %k.0.reg2mem.0.i = phi i32 [ %k.1.i, %bb21.i ], [ 0, %bb.i ]            ; <i32> [#uses=2]
-       %j.0.reg2mem.0.i = phi i8 [ %tmp35.i, %bb21.i ], [ 0, %bb.i ]           ; <i8> [#uses=1]
-       %tmp25.i = load i32* null, align 4              ; <i32> [#uses=4]
-       %tmp2829.i = trunc i32 %tmp25.i to i8           ; <i8> [#uses=1]
-       %.sum = add i32 %k.0.reg2mem.0.i, 1             ; <i32> [#uses=3]
-       %tmp33.i = getelementptr [7 x [30 x i8]]* @keys, i32 0, i32 0, i32 %.sum                ; <i8*> [#uses=1]
-       %tmp34.i = load i8* %tmp33.i, align 1           ; <i8> [#uses=1]
-       %tmp30.i = add i8 %tmp2829.i, %j.0.reg2mem.0.i          ; <i8> [#uses=1]
-       %tmp35.i = add i8 %tmp30.i, %tmp34.i            ; <i8> [#uses=2]
-       %tmp3536.i = zext i8 %tmp35.i to i32            ; <i32> [#uses=2]
-       %tmp39.i = getelementptr %struct.rc4_state* null, i32 0, i32 2, i32 %tmp3536.i          ; <i32*> [#uses=1]
-       store i32 %tmp25.i, i32* %tmp39.i, align 4
-       %tmp60.i = load i32* null, align 4              ; <i32> [#uses=1]
-       %tmp65.i = call i32 (i8*, ...)* @printf( i8* getelementptr ([65 x i8]* @.str1, i32 0, i32 0), i32 0, i32 %tmp60.i, i32 %tmp3536.i, i32 %tmp25.i, i32 %tmp25.i, i32 %k.0.reg2mem.0.i, i32 0 ) nounwind           ; <i32> [#uses=0]
-       %tmp70.i = icmp slt i32 %.sum, 8                ; <i1> [#uses=1]
-       %k.1.i = select i1 %tmp70.i, i32 %.sum, i32 0           ; <i32> [#uses=1]
-       br label %bb21.i
-}
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll

index fcbc59b838abc826d96c2ca2a74707f2346eb1ed..6b64c6ce4dabd147374adc8a1f0affcbba183684 100644 (file)
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
  
         %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
                             <2 x i64> }
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll

index b47b856052d8fe06feb7f2366204fade9c365599..b21586173f926ece9e80f5d38a2681b65c67e3f5 100644 (file)
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,7 +1,7 @@
  ; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
  ; RUN: grep inc %t | count 2
  ; RUN: grep addq %t | count 13
-; RUN: grep leaq %t | count 10
+; RUN: grep leaq %t | count 9
  ; RUN: grep movq %t | count 5
  
  ; IV users in each of the loops from other loops shouldn't cause LSR
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll

index 4e487e18472a5e63f8885548663e6eb59922c25b..cf9f2d814253c9fbb2e2223e708b9df3ae18eb87 100644 (file)
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,6 +1,9 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep {movl   %e.\*, %e.\*} | count 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {leal   .*), %e.\*} | count 1
  
  ; Don't eliminate or coalesce away the explicit zero-extension!
+; This is currently using an leal because of a 3-addressification detail,
+; though this isn't necessary; The point of this test is to make sure
+; a 32-bit add is used.
  
  define i64 @foo(i64 %a) {
    %b = add i64 %a, 4294967295
diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll

new file mode 100644 (file)

index 0000000..6634538
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep imull
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+
+define i64 @foo(i64 %a) {
+  %b = mul i64 %a, 7823
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll

new file mode 100644 (file)

index 0000000..bb6af39
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -0,0 +1,135 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: not grep leaq %t
+; RUN: not grep incq %t
+; RUN: not grep decq %t
+; RUN: not grep negq %t
+; RUN: not grep addq %t
+; RUN: not grep subq %t
+; RUN: not grep {movl  %} %t
+
+; Utilize implicit zero-extension on x86-64 to eliminate explicit
+; zero-extensions. Shrink 64-bit adds to 32-bit when the high
+; 32-bits will be zeroed.
+
+define void @bar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = add i64 %x, %y
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @easy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = add i32 %x, %y
+        %tn = zext i32 %t0 to i64
+       %t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+       %t0 = add i64 %p, %y
+       %t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+       ret void
+}
+define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+       %t0 = add i64 %p, %y
+        %t1 = xor i64 %t0, %u
+       %t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+       ret void
+}
+define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+       %t0 = add i64 %a, %b
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @avo(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+       %t0 = add i64 %x, 734847
+       %t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+       ret void
+}
+define void @phe(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+       %t0 = add i64 %x, 734847
+        %t1 = xor i64 %t0, %u
+       %t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+       ret void
+}
+define void @oze(i64 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = add i64 %y, 1
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+
+define void @sbar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = sub i64 %x, %y
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @seasy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = sub i32 %x, %y
+        %tn = zext i32 %t0 to i64
+       %t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+       %t0 = sub i64 %p, %y
+       %t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+       ret void
+}
+define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+       %t0 = sub i64 %p, %y
+        %t1 = xor i64 %t0, %u
+       %t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+       ret void
+}
+define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+       %t0 = sub i64 %a, %b
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @swya(i64 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = sub i64 0, %y
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
+define void @soze(i64 %y, i64* %z) nounwind readnone {
+entry:
+       %t0 = sub i64 %y, 1
+       %t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+       ret void
+}
diff --git a/test/CodeGen/X86/subreg-to-reg-5.ll b/test/CodeGen/X86/subreg-to-reg-5.ll

new file mode 100644 (file)

index 0000000..eee751a
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-5.ll
@@ -0,0 +1,34 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep addl %t
+; RUN: not egrep {movl|movq} %t
+
+define float @foo(float* %B) nounwind {
+entry:
+       br label %bb2
+
+bb2:           ; preds = %bb3, %entry
+       %B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ]                ; <i64> [#uses=2]
+       br i1 false, label %bb3, label %bb4
+
+bb3:           ; preds = %bb2
+       %indvar.next154 = add i64 %B_addr.0.rec, 1              ; <i64> [#uses=1]
+       br label %bb2
+
+bb4:           ; preds = %bb2
+       %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec          ; <float*> [#uses=1]
+       %t1 = ptrtoint float* %B_addr.0 to i64          ; <i64> [#uses=1]
+       %t2 = and i64 %t1, 15           ; <i64> [#uses=1]
+       %t3 = icmp eq i64 %t2, 0                ; <i1> [#uses=1]
+       br i1 %t3, label %bb5, label %bb10.preheader
+
+bb10.preheader:                ; preds = %bb4
+       br label %bb9
+
+bb5:           ; preds = %bb4
+       unreachable
+
+bb9:           ; preds = %bb10.preheader
+       %t5 = getelementptr float* %B, i64 0            ; <float*> [#uses=1]
+       %t7 = load float* %t5           ; <float> [#uses=1]
+       ret float %t7
+}
diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll

new file mode 100644 (file)

index 0000000..f18eef7
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llc -march=x86-64
+
+define i64 @foo() nounwind {
+entry:
+       %t0 = load i32* null, align 8
+       switch i32 %t0, label %bb65 [
+               i32 16, label %bb
+               i32 12, label %bb56
+       ]
+
+bb:
+       br label %bb65
+
+bb56:
+       unreachable
+
+bb65:
+       %a = phi i64 [ 0, %bb ], [ 0, %entry ]
+       tail call void asm "", "{cx}"(i64 %a) nounwind
+       %t15 = and i64 %a, 4294967295
+       ret i64 %t15
+}
+
+define i64 @bar(i64 %t0) nounwind {
+       call void asm "", "{cx}"(i64 0) nounwind
+       %t1 = sub i64 0, %t0
+       %t2 = and i64 %t1, 4294967295
+       ret i64 %t2
+}
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll

index ba98ae8252ee72700a7f438223dad1db3f8e1f72..5293b778796011e7cd4d0bf414364cb122d7ad9d 100644 (file)
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& \
-; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
+; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 5
  ; rdar://6523745
  
  @"\01LC" = internal constant [4 x i8] c"%d\0A\00"              ; <[4 x i8]*> [#uses=1]
author	Dan Gohman <gohman@apple.com>
	Wed, 8 Apr 2009 00:15:30 +0000 (00:15 +0000)
committer	Dan Gohman <gohman@apple.com>
	Wed, 8 Apr 2009 00:15:30 +0000 (00:15 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/LiveInterval.cpp		patch \| blob \| history
lib/CodeGen/LiveIntervalAnalysis.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/CodeGen/SimpleRegisterCoalescing.cpp		patch \| blob \| history
lib/CodeGen/TwoAddressInstructionPass.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86Instr64bit.td		patch \| blob \| history
test/CodeGen/X86/2008-05-06-SpillerBug.ll	[deleted file]	patch \| blob \| history
test/CodeGen/X86/byval7.ll		patch \| blob \| history
test/CodeGen/X86/iv-users-in-other-loops.ll		patch \| blob \| history
test/CodeGen/X86/subreg-to-reg-1.ll		patch \| blob \| history
test/CodeGen/X86/subreg-to-reg-3.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/subreg-to-reg-4.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/subreg-to-reg-5.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/subreg-to-reg-6.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/twoaddr-coalesce.ll		patch \| blob \| history