#define DEBUG_TYPE "dagcombine"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <set>
using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
//------------------------------ DAGCombiner ---------------------------------//
- class VISIBILITY_HIDDEN DAGCombiner {
+ class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
- APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
- SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
DebugLoc DL, bool foldBooleans = true);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
- SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);
- SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
/// overlap.
bool isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
SDValue Ptr2, int64_t Size2,
- const Value *SrcValue2, int SrcValueOffset2) const;
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2) const;
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue, int &SrcValueOffset) const;
+ const Value *&SrcValue, int &SrcValueOffset,
+ unsigned &SrcValueAlignment) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
- MVT getShiftAmountTy() {
+ EVT getShiftAmountTy() {
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
}
namespace {
/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
-class VISIBILITY_HIDDEN WorkListRemover :
- public SelectionDAG::DAGUpdateListener {
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
switch (Op.getOpcode()) {
- default: assert(0 && "Unknown code");
+ default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
SDValue N0, SDValue N1) {
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
if (isa<ConstantSDNode>(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
- DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
- DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));
- DOUT << " and " << NumTo-1 << " other values\n";
- DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)
- assert(N->getValueType(i) == To[i].getValueType() &&
+ DEBUG(dbgs() << "\nReplacing.1 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo-1 << " other values\n";
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
// Replace the old value with the new one.
++NodesCombined;
- DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));
- DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));
- DOUT << '\n';
+ DEBUG(dbgs() << "\nReplacing.2 ";
+ TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: ";
+ TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
CommitTargetLoweringOpt(TLO);
return true;
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
- DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));
- DOUT << '\n';
+ DEBUG(dbgs() << "\nReplacing.3 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ RV.getNode()->dump(&DAG);
+ dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
// Expose the DAG combiner to the target combiner impls.
TargetLowering::DAGCombinerInfo
- DagCombineInfo(DAG, Level == Unrestricted, false, this);
+ DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
RV = TLI.PerformDAGCombine(N, DagCombineInfo);
}
break;
case ISD::TokenFactor:
- if ((CombinerAA || Op.hasOneUse()) &&
+ if (Op.hasOneUse() &&
std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
// Queue up for processing.
TFs.push_back(Op.getNode());
}
}
}
-
+
SDValue Result;
// If we've change things around then replace token factor.
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
WorkListRemover DeadNodes(*this);
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
- &DeadNodes);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ do {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+ &DeadNodes);
+ } while (!N->use_empty());
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
static
SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
SelectionDAG &DAG) {
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
if (Result.getNode()) return Result;
}
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL &&
+ N1.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOpcode() == ISD::SUB)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+ if (C->getAPIntValue() == 0)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
+
return SDValue();
}
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
if (N->hasNUsesOfValue(0, 1))
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
if (N1C)
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (N0C && N0C->isAllOnesValue())
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
- MVT ADDVT = N1.getOperand(1).getValueType();
+ EVT ADDVT = N1.getOperand(1).getValueType();
SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
if (N0C && N1C && !N1C->isNullValue())
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
if (N0C && N1C && !N1C->isNullValue())
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
/// two operands of the same opcode, try to simplify it.
SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+ // Bail early if none of these transforms apply.
+ if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
// For each of OP in AND/OR/XOR:
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
- if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
- (N0.getOpcode() == ISD::TRUNCATE &&
- !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
- N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+ (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
+ !VT.isVector() &&
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
SDValue LL, LR, RL, RR, CC0, CC1;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N1.getValueType();
- unsigned BitWidth = VT.getSizeInBits();
+ EVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
if (RAND.getNode() != 0)
return RAND;
- // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ // fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+
// fold (zext_inreg (extload x)) -> (zextload x)
if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- MVT EVT = LN0->getMemoryVT();
+ EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueSizeInBits();
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getSizeInBits())) &&
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- MVT EVT = LN0->getMemoryVT();
+ EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueSizeInBits();
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getSizeInBits())) &&
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
- if (N1C && N0.getOpcode() == ISD::LOAD) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
- LN0->isUnindexed() && N0.hasOneUse() &&
- // Do not change the width of a volatile load.
- !LN0->isVolatile()) {
- MVT EVT = MVT::Other;
+ LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
- if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
- EVT = MVT::getIntegerVT(ActiveBits);
-
- MVT LoadedVT = LN0->getMemoryVT();
-
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
- MVT PtrType = N0.getOperand(1).getValueType();
-
- // For big endian targets, we need to add an offset to the pointer to
- // load the correct bytes. For little endian systems, we merely need to
- // read fewer bytes from the same pointer.
- unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;
- unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;
- unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
- unsigned Alignment = LN0->getAlignment();
- SDValue NewPtr = LN0->getBasePtr();
-
- if (TLI.isBigEndian()) {
- NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
- NewPtr, DAG.getConstant(PtrOff, PtrType));
- Alignment = MinAlign(Alignment, PtrOff);
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LN0->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+
+ // Do not change the width of a volatile load.
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
- AddToWorkList(NewPtr.getNode());
- SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
- NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
- EVT, LN0->isVolatile(), Alignment);
- AddToWorkList(N);
- CombineTo(N0.getNode(), Load, Load.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ AddToWorkList(NewPtr.getNode());
+
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Alignment);
+ AddToWorkList(N);
+ CombineTo(LN0, Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
}
}
}
SDValue LL, LR, RL, RR, CC0, CC1;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N1.getValueType();
+ EVT VT = N1.getValueType();
// fold vector ops
if (VT.isVector()) {
}
// fold (or x, undef) -> -1
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(~0ULL, VT);
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
// fold (or c1, c2) -> c1|c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
if (ROR.getNode() != 0)
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
- return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
- N0.getOperand(0), N1),
- DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
// a rot[lr].
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
- MVT VT = LHS.getValueType();
+ EVT VT = LHS.getValueType();
if (!TLI.isTypeLegal(VT)) return 0;
// The target must have at least one rotate flavor.
SDValue LHS, RHS, CC;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
switch (N0.getOpcode()) {
default:
- assert(0 && "Unhandled SetCC Equivalent!");
- abort();
+ llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
!isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
return SDValue();
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// If this is a signed shift right, and the high bit is modified by the
// logical operation, do not perform the transformation. The highBitSet
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getSizeInBits();
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (shl c1, c2) -> c1<<c2
if (N0C && N1C)
return N0;
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(VT.getSizeInBits())))
+ APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
SDValue N101 = N1.getOperand(0).getOperand(1);
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- MVT TruncVT = N1.getValueType();
+ EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
TruncC.trunc(TruncVT.getSizeInBits());
if (N1C && N0.getOpcode() == ISD::SRL &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL << c1, VT));
- if (c2 > c1)
- return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
- DAG.getConstant(c2-c1, N1.getValueType()));
- else
- return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
- DAG.getConstant(c1-c2, N1.getValueType()));
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1),
+ VT);
+ SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+ N0.getOperand(0),
+ HiBitsMask);
+ if (c2 > c1)
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ else
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
- if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() -
+ N1C->getZExtValue()),
+ VT);
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));
+ HiBitsMask);
+ }
return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
}
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (sra c1, c2) -> (sra c1, c2)
if (N0C && N1C)
if (N0C && N0C->isAllOnesValue())
return N0;
// fold (sra x, (setge c, size(x))) -> undef
- if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
- unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
- MVT EVT = MVT::getIntegerVT(LowBits);
- if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
- N0.getOperand(0), DAG.getValueType(EVT));
+ N0.getOperand(0), DAG.getValueType(ExtVT));
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(Sum, N1C->getValueType(0)));
}
const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
- unsigned VTValSize = VT.getSizeInBits();
- MVT TruncVT =
- MVT::getIntegerVT(VTValSize - N1C->getZExtValue());
+ EVT TruncVT =
+ EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
// If the shift is not a no-op (in which case this should be just a sign
// extend already), the truncated to type is legal, sign_extend is legal
- // on that type, and the the truncate to that type is both legal and free,
+ // on that type, and the truncate to that type is both legal and free,
// perform the transform.
if ((ShiftAmt > 0) &&
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
SDValue N101 = N1.getOperand(0).getOperand(1);
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- MVT TruncVT = N1.getValueType();
+ EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- MVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getSizeInBits();
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (srl c1, c2) -> c1 >>u c2
if (N0C && N1C)
// fold (srl (anyextend x), c) -> (anyextend (srl x, c))
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
- MVT SmallVT = N0.getOperand(0).getValueType();
+ EVT SmallVT = N0.getOperand(0).getValueType();
if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
return DAG.getUNDEF(VT);
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
SDValue N101 = N1.getOperand(0).getOperand(1);
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- MVT TruncVT = N1.getValueType();
+ EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
TruncC.trunc(TruncVT.getSizeInBits());
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+ if (N1C) {
+ SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRL.getNode())
+ return NewSRL;
+ }
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ }
+ }
+
+ return SDValue();
}
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (ctlz c1) -> c2
if (isa<ConstantSDNode>(N0))
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (cttz c1) -> c2
if (isa<ConstantSDNode>(N0))
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (ctpop c1) -> c2
if (isa<ConstantSDNode>(N0))
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
- MVT VT = N->getValueType(0);
- MVT VT0 = N0.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
// fold (select C, X, X) -> X
if (N1 == N2)
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
// having to say they don't support SELECT_CC on every type the DAG knows
// about, since there is no way to mark an opcode illegal at all value types
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (sext c1) -> c1
if (isa<ConstantSDNode>(N0))
// See if the value being truncated is already sign extended. If so, just
// eliminate the trunc/sext pair.
SDValue Op = N0.getOperand(0);
- unsigned OpBits = Op.getValueType().getSizeInBits();
- unsigned MidBits = N0.getValueType().getSizeInBits();
- unsigned DestBits = VT.getSizeInBits();
+ unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
if (OpBits == DestBits) {
// fold (sext (truncate x)) -> (sextinreg x).
if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
N0.getValueType())) {
- if (Op.getValueType().bitsLT(VT))
+ if (OpBits < DestBits)
Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
- else if (Op.getValueType().bitsGT(VT))
+ else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
DAG.getValueType(N0.getValueType()));
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- MVT EVT = LN0->getMemoryVT();
+ EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
}
}
- // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
if (N0.getOpcode() == ISD::SETCC) {
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ if (VT.isVector() &&
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
+
+ // Only do this before legalize for now.
+ !LegalOperations) {
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ SDValue NegOne =
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
SDValue SCC =
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+ NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ NegOne, DAG.getConstant(0, VT));
}
+
+
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (zext c1) -> c1
if (isa<ConstantSDNode>(N0))
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
}
- return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- MVT EVT = LN0->getMemoryVT();
+ EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
if (SCC.getNode()) return SCC;
}
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SHL) {
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
+ N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
+ if (ShAmt > KnownZeroBits)
+ return SDValue();
+ }
+ DebugLoc dl = N->getDebugLoc();
+ return DAG.getNode(N0.getOpcode(), dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+ DAG.getNode(ISD::ZERO_EXTEND, dl,
+ N0.getOperand(1).getValueType(),
+ N0.getOperand(1)));
+ }
+
return SDValue();
}
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
!ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- MVT EVT = LN0->getMemoryVT();
+ EVT MemVT = LN0->getMemoryVT();
SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
VT, LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->getSrcValueOffset(), MemVT,
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
unsigned Opc = N->getOpcode();
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
- MVT EVT = VT;
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
// This transformation isn't valid for vector loads.
if (VT.isVector())
return SDValue();
- // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
- EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
return SDValue();
}
- unsigned EVTBits = EVT.getSizeInBits();
+ unsigned EVTBits = ExtVT.getSizeInBits();
unsigned ShAmt = 0;
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
- if (N0.getValueType().getSizeInBits() <= EVTBits)
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
}
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
- if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&
+ if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- MVT PtrType = N0.getOperand(1).getValueType();
+ EVT PtrType = N0.getOperand(1).getValueType();
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian()) {
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = EVT.getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
}
SDValue Load = (ExtType == ISD::NON_EXTLOAD)
? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- LN0->isVolatile(), NewAlign)
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
: DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- EVT, LN0->isVolatile(), NewAlign);
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- MVT VT = N->getValueType(0);
- MVT EVT = cast<VTSDNode>(N1)->getVT();
- unsigned VTBits = VT.getSizeInBits();
- unsigned EVTBits = EVT.getSizeInBits();
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned EVTBits = EVT.getScalarType().getSizeInBits();
// fold (sext_in_reg c1) -> c1
if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
// if x is small enough.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getValueType().getSizeInBits() < EVTBits)
+ if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
}
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1));
}
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// noop truncate
if (N0.getValueType() == N->getValueType(0))
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
else
// if the source and dest are the same type, we can drop both the extend
- // and the truncate
+ // and the truncate.
return N0.getOperand(0);
}
/// CombineConsecutiveLoads - build_pair (load, load) -> load
/// if load locations are consecutive.
-SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- SDNode *LD1 = getBuildPairElt(N, 0);
- if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
return SDValue();
- MVT LD1VT = LD1->getValueType(0);
- SDNode *LD2 = getBuildPairElt(N, 1);
- const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ EVT LD1VT = LD1->getValueType(0);
if (ISD::isNON_EXTLoad(LD2) &&
LD2->hasOneUse() &&
// If both are volatile this would reduce the number of volatile loads.
// If one is volatile it might be ok, but play conservative and bail out.
- !cast<LoadSDNode>(LD1)->isVolatile() &&
- !cast<LoadSDNode>(LD2)->isVolatile() &&
- TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
- LoadSDNode *LD = cast<LoadSDNode>(LD1);
- unsigned Align = LD->getAlignment();
+ !LD1->isVolatile() &&
+ !LD2->isVolatile() &&
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+ unsigned Align = LD1->getAlignment();
unsigned NewAlign = TLI.getTargetData()->
- getABITypeAlignment(VT.getTypeForMVT());
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- false, Align);
+ return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+ LD1->getBasePtr(), LD1->getSrcValue(),
+ LD1->getSrcValueOffset(), false, false, Align);
}
return SDValue();
SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize, since afterward the target may be depending
break;
}
- MVT DestEltVT = N->getValueType(0).getVectorElementType();
+ EVT DestEltVT = N->getValueType(0).getVectorElementType();
assert(!DestEltVT.isVector() &&
"Element type of vector ValueType must not be vector!");
if (isSimple)
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
- if (Res.getNode() != N) return Res;
+ if (Res.getNode() != N) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+ return Res;
+
+ // Folding it resulted in an illegal node, and it's too late to
+ // do that. Clean up the old node and forego the transformation.
+ // Ideally this won't happen very often, because instcombine
+ // and the earlier dagcombine runs (where illegal nodes are
+ // permitted) should have folded most of them already.
+ DAG.DeleteNode(Res.getNode());
+ }
}
// (conv (conv x, t1), t2) -> (conv x, t2)
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned Align = TLI.getTargetData()->
- getABITypeAlignment(VT.getTypeForMVT());
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
unsigned OrigAlign = LN0->getAlignment();
if (Align <= OrigAlign) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
LN0->getBasePtr(),
LN0->getSrcValue(), LN0->getSrcValueOffset(),
- LN0->isVolatile(), OrigAlign);
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
- MVT IntXVT = MVT::getIntegerVT(OrigXWidth);
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
IntXVT, N0.getOperand(1));
}
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
return CombineConsecutiveLoads(N, VT);
}
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
- MVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
- MVT VT = MVT::getVectorVT(DstEltVT,
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
BV->getValueType(0).getVectorNumElements());
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
&Ops[0], Ops.size());
// Convert the input float vector to a int vector where the elements are the
// same sizes.
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
- MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
// convert to integer first, then to FP of the right size.
if (DstEltVT.isFloatingPoint()) {
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
- MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
}
- MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
&Ops[0], Ops.size());
}
// turns into multiple outputs.
bool isS2V = ISD::isScalarToVector(BV);
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
- MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
// fold (fmul A, 0) -> 0
if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
return N1;
+ // fold (fmul A, 0) -> 0, vector edition.
+ if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
- // fold (fmul X, (fneg 1.0)) -> (fneg X)
+ // fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector()) {
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP && VT != MVT::ppcf128)
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- MVT VT = N->getValueType(0);
- MVT OpVT = N0.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128)
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- MVT VT = N->getValueType(0);
- MVT OpVT = N0.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128)
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (fp_to_sint c1fp) -> c1
if (N0CFP)
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (fp_to_uint c1fp) -> c1
if (N0CFP && VT != MVT::ppcf128)
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (fp_round c1fp) -> c1fp
if (N0CFP && N0.getValueType() != MVT::ppcf128)
SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
- MVT VT = N->getValueType(0);
- MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
// fold (fp_round_inreg c1fp) -> c1fp
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() &&
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
if (isNegatibleForFree(N0, LegalOperations))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
- N0.getOperand(0).getValueType().isInteger() &&
- !N0.getOperand(0).getValueType().isVector()) {
+ if (N0.getOpcode() == ISD::BIT_CONVERT &&
+ !VT.isVector() &&
+ N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger()) {
SDValue Int = N0.getOperand(0);
- MVT IntVT = Int.getValueType();
+ EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
- N->getValueType(0), Int);
+ VT, Int);
}
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
// fold (fabs c1) -> fabs(c1)
if (N0CFP && VT != MVT::ppcf128)
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
- MVT IntVT = Int.getValueType();
+ EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- // never taken branch, fold to chain
- if (N1C && N1C->isNullValue())
- return Chain;
- // unconditional branch
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
N1.getOperand(0), N1.getOperand(1), N2);
}
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+ // Look pass truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
// Match this pattern so that we can generate simpler code:
//
// into
//
// %a = ...
- // %b = and %a, 2
+ // %b = and i32 %a, 2
// %c = setcc eq %b, 0
// brcond %c ...
//
SDValue Op1 = N1.getOperand(1);
if (Op0.getOpcode() == ISD::AND &&
- Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant) {
- SDValue AndOp0 = Op0.getOperand(0);
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
Op0, DAG.getConstant(0, Op0.getValueType()),
ISD::SETNE);
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc) {
+ removeFromWorkList(Trunc);
+ DAG.DeleteNode(Trunc);
+ }
// Replace the uses of SRL with SETCC
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
- return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
- MVT::Other, Chain, SetCC, N2);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
}
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ SDValue Tmp = visitXOR(TheXor);
+ if (Tmp.getNode()) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+ if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = TLI.getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
return SDValue();
}
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// Use SimplifySetCC to simplify SETCC's.
SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
false);
if (Simp.getNode()) AddToWorkList(Simp.getNode());
- ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
-
- // fold br_cc true, dest -> br dest (unconditional branch)
- if (SCCC && !SCCC->isNullValue())
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
- N->getOperand(0), N->getOperand(4));
- // fold br_cc false, dest -> unconditional fall through
- if (SCCC && SCCC->isNullValue())
- return N->getOperand(0);
-
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
bool isLoad = true;
SDValue Ptr;
- MVT VT;
+ EVT VT;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
BasePtr, Offset, AM);
++PreIndexedNodes;
++NodesCombined;
- DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
- DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
- DOUT << '\n';
+ DEBUG(dbgs() << "\nReplacing.4 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
bool isLoad = true;
SDValue Ptr;
- MVT VT;
+ EVT VT;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
- if (Ptr == Offset)
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
std::swap(BasePtr, Offset);
if (Ptr != BasePtr)
continue;
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
- DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
- DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
- DOUT << '\n';
+ DEBUG(dbgs() << "\nReplacing.5 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
return false;
}
-/// InferAlignment - If we can infer some alignment information from this
-/// pointer, return it.
-static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
- // If this is a direct reference to a stack slot, use information about the
- // stack slot's alignment.
- int FrameIdx = 1 << 31;
- int64_t FrameOffset = 0;
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
- FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
- isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
- FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
- FrameOffset = Ptr.getConstantOperandVal(1);
- }
-
- if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
- const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
- if (MFI.isFixedObjectIndex(FrameIdx)) {
- int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
- // The alignment of the frame index can be determined from its offset from
- // the incoming frame position. If the frame object is at offset 32 and
- // the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned.
- unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
- // Finally, the frame object itself may have a known alignment. Factor
- // the alignment + offset into a new alignment. For example, if we know
- // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
- // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
- // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
- unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
- FrameOffset);
- return std::max(Align, FIInfoAlign);
- }
- }
-
- return 0;
-}
-
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
- if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
LD->getValueType(0),
Chain, Ptr, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
- LD->isVolatile(), Align);
+ LD->isVolatile(), LD->isNonTemporal(), Align);
}
}
// v3 = add v2, c
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
- DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
- DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));
- DOUT << "\n";
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith chain: ";
+ Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
- DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
- DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));
- DOUT << " and 2 other values\n";
+ DEBUG(dbgs() << "\nReplacing.7 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
BetterChain, Ptr,
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
LD->getValueType(0),
LD->getSrcValueOffset(),
LD->getMemoryVT(),
LD->isVolatile(),
+ LD->isNonTemporal(),
LD->getAlignment());
}
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, Chain, ReplLoad.getValue(1));
-
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
- MVT VT = Value.getValueType();
+ EVT VT = Value.getValueType();
if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
return SDValue();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
- MVT NewVT = MVT::getIntegerVT(NewBW);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
while (NewBW < BitWidth &&
!(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
TLI.isNarrowingProfitable(VT, NewVT))) {
NewBW = NextPowerOf2(NewBW);
- NewVT = MVT::getIntegerVT(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
}
if (NewBW >= BitWidth)
return SDValue();
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
if (NewAlign <
- TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))
+ TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
return SDValue();
SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
LD->getChain(), NewPtr,
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), NewAlign);
+ LD->isVolatile(), LD->isNonTemporal(),
+ NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
NewVal, NewPtr,
ST->getSrcValue(), ST->getSrcValueOffset(),
- false, NewAlign);
+ false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
AddToWorkList(NewLD.getNode());
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
- if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), Align);
+ ST->isVolatile(), ST->isNonTemporal(), Align);
}
}
if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
- MVT SVT = Value.getOperand(0).getValueType();
+ EVT SVT = Value.getOperand(0).getValueType();
unsigned Align = TLI.getTargetData()->
- getABITypeAlignment(SVT.getTypeForMVT());
+ getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
if (Align <= OrigAlign &&
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->isNonTemporal(), OrigAlign);
}
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// transform should not be done in this case.
if (Value.getOpcode() != ISD::TargetConstantFP) {
SDValue Tmp;
- switch (CFP->getValueType(0).getSimpleVT()) {
- default: assert(0 && "Unknown FP type");
+ switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP type");
case MVT::f80: // We don't do this for these yet.
case MVT::f128:
case MVT::ppcf128:
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->isVolatile(),
- ST->getAlignment());
+ ST->isNonTemporal(), ST->getAlignment());
}
break;
case MVT::f64:
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->isVolatile(),
- ST->getAlignment());
+ ST->isNonTemporal(), ST->getAlignment());
} else if (!ST->isVolatile() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(),
- isVolatile, ST->getAlignment());
+ isVolatile, isNonTemporal,
+ ST->getAlignment());
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
SVOffset += 4;
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
Ptr, ST->getSrcValue(),
- SVOffset, isVolatile, Alignment);
+ SVOffset, isVolatile, isNonTemporal,
+ Alignment);
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
St0, St1);
}
// If there is a better chain.
if (Chain != BetterChain) {
- // Replace the chain to avoid dependency.
SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
ST->getSrcValue(),ST->getSrcValueOffset(),
- ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
} else {
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
ST->getSrcValue(), ST->getSrcValueOffset(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
}
// Create token to keep both nodes around.
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, Chain, ReplStore);
+ // Make sure the new and old chains are cleaned up.
+ AddToWorkList(Token.getNode());
+
// Don't add users to work list.
return CombineTo(N, Token, false);
}
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
APInt::getLowBitsSet(
- Value.getValueSizeInBits(),
+ Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getSizeInBits())))
return SDValue(N, 0);
}
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
}
return ReduceLoadOpStoreWidth(N);
// BUILD_VECTOR with undef elements and the inserted element.
if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
isa<ConstantSDNode>(EltNo)) {
- MVT VT = InVec.getValueType();
- MVT EVT = VT.getVectorElementType();
+ EVT VT = InVec.getValueType();
+ EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
+ SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
if (Elt < Ops.size())
SDValue InVec = N->getOperand(0);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- // If the operand is wider than the vector element type then it is implicitly
- // truncated. Make that explicit here.
- MVT EltVT = InVec.getValueType().getVectorElementType();
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ EVT EltVT = InVec.getValueType().getVectorElementType();
SDValue InOp = InVec.getOperand(0);
- if (InOp.getValueType() != EltVT)
- return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
+ EVT NVT = N->getValueType(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
return InOp;
}
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
- MVT VT = InVec.getValueType();
- MVT EVT = VT.getVectorElementType();
- MVT LVT = EVT;
+ EVT VT = InVec.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
if (InVec.getOpcode() == ISD::BIT_CONVERT) {
- MVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
BCNumEltsChanged = true;
InVec = InVec.getOperand(0);
- EVT = BCVT.getVectorElementType();
+ ExtVT = BCVT.getVectorElementType();
NewLoad = true;
}
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- InVec.getOperand(0).getValueType() == EVT &&
+ InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());
+ TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
return SDValue();
SDValue NewPtr = LN0->getBasePtr();
if (Elt) {
unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
- MVT PtrType = NewPtr.getValueType();
+ EVT PtrType = NewPtr.getValueType();
if (TLI.isBigEndian())
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset(),
- LN0->isVolatile(), Align);
+ LN0->isVolatile(), LN0->isNonTemporal(), Align);
}
return SDValue();
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
- MVT VT = N->getValueType(0);
- MVT EltType = VT.getVectorElementType();
+ EVT VT = N->getValueType(0);
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
assert(N0.getValueType().getVectorNumElements() == NumElts &&
"Vector shuffle must be normalized in DAG");
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
}
// Let's see if the target supports this vector_shuffle.
- MVT RVT = RHS.getValueType();
+ EVT RVT = RHS.getValueType();
if (!TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
- MVT EVT = RVT.getVectorElementType();
+ EVT EltVT = RVT.getVectorElementType();
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, EVT));
+ DAG.getConstant(0, EltVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
RVT, &ZeroOps[0], ZeroOps.size());
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
// things. Simplifying them may result in a loss of legality.
if (LegalOperations) return SDValue();
- MVT VT = N->getValueType(0);
+ EVT VT = N->getValueType(0);
assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
- MVT EltType = VT.getVectorElementType();
+ EVT EltType = VT.getVectorElementType();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Shuffle = XformToShuffleWithZero(N);
}
if (Ops.size() == LHS.getNumOperands()) {
- MVT VT = LHS.getValueType();
+ EVT VT = LHS.getValueType();
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
&Ops[0], Ops.size());
}
// If this is an EXTLOAD, the VT's must match.
if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
- // FIXME: this conflates two src values, discarding one. This is not
- // the right thing to do, but nothing uses srcvalues now. When they do,
- // turn SrcValue into a list of locations.
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
+ if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
+ if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
+ LLDAddrSpace = PT->getAddressSpace();
+ }
+ if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
+ if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
+ RLDAddrSpace = PT->getAddressSpace();
+ }
SDValue Addr;
- if (TheSelect->getOpcode() == ISD::SELECT) {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {
- Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0), LLD->getBasePtr(),
- RLD->getBasePtr());
- }
- } else {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {
- Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0),
- TheSelect->getOperand(1),
- LLD->getBasePtr(), RLD->getBasePtr(),
- TheSelect->getOperand(4));
+ if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if ((!LLD->hasAnyUseOfValue(1) ||
+ !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+ (!RLD->hasAnyUseOfValue(1) ||
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ }
+ } else {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if ((!LLD->hasAnyUseOfValue(1) ||
+ (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+ (!RLD->hasAnyUseOfValue(1) ||
+ (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
}
}
Load = DAG.getLoad(TheSelect->getValueType(0),
TheSelect->getDebugLoc(),
LLD->getChain(),
- Addr,LLD->getSrcValue(),
- LLD->getSrcValueOffset(),
+ Addr, 0, 0,
LLD->isVolatile(),
+ LLD->isNonTemporal(),
LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType(),
TheSelect->getDebugLoc(),
TheSelect->getValueType(0),
- LLD->getChain(), Addr, LLD->getSrcValue(),
- LLD->getSrcValueOffset(),
+ LLD->getChain(), Addr, 0, 0,
LLD->getMemoryVT(),
LLD->isVolatile(),
+ LLD->isNonTemporal(),
LLD->getAlignment());
}
// (x ? y : y) -> y.
if (N2 == N3) return N2;
- MVT VT = N2.getValueType();
+ EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
CstOffset);
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0, false,
- Alignment);
+ false, Alignment);
}
}
N2.getValueType().isInteger() &&
(N1C->isNullValue() || // (a < 0) ? b : 0
(N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
- MVT XType = N0.getValueType();
- MVT AType = N2.getValueType();
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
if (XType.bitsGE(AType)) {
// and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
// single-bit constant.
// FIXME: Turn all of these into setcc if setcc if setcc is legal
// otherwise, go ahead with the folds.
if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
- MVT XType = N0.getValueType();
+ EVT XType = N0.getValueType();
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
- MVT XType = N0.getValueType();
+ EVT XType = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy()));
if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
- MVT XType = N0.getValueType();
+ EVT XType = N0.getValueType();
if (SubC->isNullValue() && XType.isInteger()) {
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
N0,
}
/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
-SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
SDValue N1, ISD::CondCode Cond,
DebugLoc DL, bool foldBooleans) {
TargetLowering::DAGCombinerInfo
- DagCombineInfo(DAG, Level == Unrestricted, false, this);
+ DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
}
return S;
}
-/// FindBaseOffset - Return true if base is known not to alias with anything
-/// but itself. Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself. Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+ GlobalValue *&GV, void *&CV) {
// Assume it is a primitive operation.
- Base = Ptr; Offset = 0;
+ Base = Ptr; Offset = 0; GV = 0; CV = 0;
// If it's an adding a simple constant then integrate the offset.
if (Base.getOpcode() == ISD::ADD) {
Offset += C->getZExtValue();
}
}
+
+ // Return the underlying GlobalValue, and update the Offset. Return false
+ // for GlobalAddressSDNode since the same GlobalAddress may be represented
+ // by multiple nodes with different offsets.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+ GV = G->getGlobal();
+ Offset += G->getOffset();
+ return false;
+ }
+ // Return the underlying Constant value, and update the Offset. Return false
+ // for ConstantSDNodes since the same constant pool entry may be represented
+ // by multiple nodes with different offsets.
+ if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+ CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+ : (void *)C->getConstVal();
+ Offset += C->getOffset();
+ return false;
+ }
// If it's any of the following then it can't alias with anything but itself.
- return isa<FrameIndexSDNode>(Base) ||
- isa<ConstantPoolSDNode>(Base) ||
- isa<GlobalAddressSDNode>(Base);
+ return isa<FrameIndexSDNode>(Base);
}
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
+ unsigned SrcValueAlign1,
SDValue Ptr2, int64_t Size2,
- const Value *SrcValue2, int SrcValueOffset2) const {
+ const Value *SrcValue2, int SrcValueOffset2,
+ unsigned SrcValueAlign2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
// Gather base node and offset information.
SDValue Base1, Base2;
int64_t Offset1, Offset2;
- bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
- bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+ GlobalValue *GV1, *GV2;
+ void *CV1, *CV2;
+ bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
- // If they have a same base address then...
- if (Base1 == Base2)
- // Check to see if the addresses overlap.
+ // If they have a same base address then check to see if they overlap.
+ if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
- // If we know both bases then they can't alias.
- if (KnownBase1 && KnownBase2) return false;
+ // If we know what the bases are, and they aren't identical, then we know they
+ // cannot alias.
+ if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ return false;
+ // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+ // compared to the size and offset of the access, we may be able to prove they
+ // do not alias. This check is conservative for now to catch cases created by
+ // splitting vector types.
+ if ((SrcValueAlign1 == SrcValueAlign2) &&
+ (SrcValueOffset1 != SrcValueOffset2) &&
+ (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+ int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+ int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+ // There is no overlap between these relatively aligned accesses of similar
+ // size, return no alias.
+ if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+ return false;
+ }
+
if (CombinerGlobalAA) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue, int &SrcValueOffset) const {
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign) const {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
Size = LD->getMemoryVT().getSizeInBits() >> 3;
SrcValue = LD->getSrcValue();
SrcValueOffset = LD->getSrcValueOffset();
+ SrcValueAlign = LD->getOriginalAlignment();
return true;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
Size = ST->getMemoryVT().getSizeInBits() >> 3;
SrcValue = ST->getSrcValue();
SrcValueOffset = ST->getSrcValueOffset();
+ SrcValueAlign = ST->getOriginalAlignment();
} else {
- assert(0 && "FindAliasInfo expected a memory operand");
+ llvm_unreachable("FindAliasInfo expected a memory operand");
}
return false;
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVector<SDValue, 8> &Aliases) {
SmallVector<SDValue, 8> Chains; // List of chains to visit.
- std::set<SDNode *> Visited; // Visited node set.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
SDValue Ptr;
- int64_t Size = 0;
- const Value *SrcValue = 0;
- int SrcValueOffset = 0;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+ int64_t Size;
+ const Value *SrcValue;
+ int SrcValueOffset;
+ unsigned SrcValueAlign;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign);
// Starting off.
Chains.push_back(OriginalChain);
-
+ unsigned Depth = 0;
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.back();
Chains.pop_back();
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
+ // find more and revert to original chain since the xform is unlikely to be
+ // profitable.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > 6 || Aliases.size() == 2) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ break;
+ }
- // Don't bother if we've been before.
- if (Visited.find(Chain.getNode()) != Visited.end()) continue;
- Visited.insert(Chain.getNode());
+ // Don't bother if we've been before.
+ if (!Visited.insert(Chain.getNode()))
+ continue;
switch (Chain.getOpcode()) {
case ISD::EntryToken:
case ISD::STORE: {
// Get alias information for Chain.
SDValue OpPtr;
- int64_t OpSize = 0;
- const Value *OpSrcValue = 0;
- int OpSrcValueOffset = 0;
+ int64_t OpSize;
+ const Value *OpSrcValue;
+ int OpSrcValueOffset;
+ unsigned OpSrcValueAlign;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
- OpSrcValue, OpSrcValueOffset);
+ OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
- isAlias(Ptr, Size, SrcValue, SrcValueOffset,
- OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ OpSrcValueAlign)) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
Chains.push_back(Chain.getOperand(0));
- // Clean up old chain.
- AddToWorkList(Chain.getNode());
+ ++Depth;
}
break;
}
case ISD::TokenFactor:
- // We have to check each of the operands of the token factor, so we queue
- // then up. Adding the operands to the queue (stack) in reverse order
- // maintains the original order and increases the likelihood that getNode
- // will find a matching token factor (CSE.)
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ break;
+ }
for (unsigned n = Chain.getNumOperands(); n;)
Chains.push_back(Chain.getOperand(--n));
- // Eliminate the token factor if we can.
- AddToWorkList(Chain.getNode());
+ ++Depth;
break;
default:
// If a single operand then chain to it. We don't need to revisit it.
return Aliases[0];
}
-
+
// Construct a custom tailored token factor.
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
- &Aliases[0], Aliases.size());
-
- // Make sure the old chain gets cleaned up.
- if (NewChain != OldChain) AddToWorkList(OldChain.getNode());
-
- return NewChain;
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
}
// SelectionDAG::Combine - This is the entry point for the file.