SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
+ SDValue visitFP_TO_FP16(SDNode *N);
+
+ SDValue visitFADDForFMACombine(SDNode *N);
+ SDValue visitFSUBForFMACombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
- return DAG.getConstantFP(V, Op.getValueType());
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
return SDValue();
}
if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
return SDValue();
}
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
}
return SDValue();
}
SDNode *CSENode;
if (const BinaryWithFlagsSDNode *BinNode =
dyn_cast<BinaryWithFlagsSDNode>(N)) {
- CSENode = DAG.getNodeIfExists(
- N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
- BinNode->hasNoSignedWrap(), BinNode->isExact());
+ CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ BinNode->Flags.hasNoUnsignedWrap(),
+ BinNode->Flags.hasNoSignedWrap(),
+ BinNode->Flags.hasExact());
} else {
CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
}
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
// fold (add x, 0) -> x
if (N1C && N1C->isNullValue())
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N1C && N0.getOpcode() == ISD::SUB)
- if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(N1C->getAPIntValue()+
- N0C->getAPIntValue(), VT),
+ N0C->getAPIntValue(), DL, VT),
N0.getOperand(1));
+ }
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
return RADD;
if (TN->getVT() == MVT::i1) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
- DAG.getConstant(1, VT));
+ DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
}
}
SelectionDAG &DAG,
bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, DL, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, DL, VT);
return SDValue();
}
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
- if (N1C)
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
- DAG.getConstant(-N1C->getAPIntValue(), VT));
+ if (N1C) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDLoc DL(N);
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
- VT);
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
+ DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, NewC,
N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
- VT);
+ SDLoc(N), VT);
}
// sub X, (sextinreg Y i1) -> add X, (and Y 1)
if (TN->getVT() == MVT::i1) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
- DAG.getConstant(1, VT));
+ DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
}
}
MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
- if (N0 == N1)
- return CombineTo(N, DAG.getConstant(0, VT),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
+ if (N0 == N1) {
+ SDLoc DL(N);
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::CARRY_FALSE, DL,
MVT::Glue));
+ }
// fold (subc x, 0) -> x + no borrow
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
// fold (mul x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
bool N0IsConst = false;
bool N1IsConst = false;
N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
- N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
- ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
- : APInt();
- N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
- ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
- : APInt();
+ N0IsConst = isa<ConstantSDNode>(N0);
+ if (N0IsConst)
+ ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+ N1IsConst = isa<ConstantSDNode>(N1);
+ if (N1IsConst)
+ ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
}
// fold (mul c1, c2) -> c1*c2
if (N0IsConst && N1IsConst)
- return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
- // canonicalize constant to RHS
- if (N0IsConst && !N1IsConst)
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue())
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT), N0);
+ if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+ }
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getConstant(ConstValue1.logBase2(),
+ if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(ConstValue1.logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
+ }
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
+ SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT),
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getConstant(Log2Val,
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL,
getShiftAmountTy(N0.getValueType()))));
}
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT), N0);
+ if (N1C && N1C->isAllOnesValue()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+ }
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+ SDLoc DL(N);
// Splat the sign bit into the register
SDValue SGN =
- DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
- DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
AddToWorklist(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL =
- DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
- DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+ DAG.getNode(ISD::SRL, DL, VT, SGN,
+ DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
getShiftAmountTy(SGN.getValueType())));
- SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
+ SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
AddToWorklist(SRL.getNode());
AddToWorklist(ADD.getNode()); // Divide by pow2
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
- DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
+ DAG.getConstant(lg2, DL,
+ getShiftAmountTy(ADD.getValueType())));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
return SRA;
AddToWorklist(SRA.getNode());
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
// If integer divide is expensive and we satisfy the requirements, emit an
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && N1C->getAPIntValue().isPowerOf2())
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
- DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ if (N1C && N1C->getAPIntValue().isPowerOf2()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRL, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
+ }
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
- SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
+ SDLoc DL(N);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
- ADDVT));
+ DL, ADDVT));
AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
}
}
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (!VT.isVector()) {
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
- return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
- DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
+ SDLoc DL(N);
SDValue Add =
- DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ DAG.getNode(ISD::ADD, DL, VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
VT));
AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
if (N1C && N1C->isNullValue())
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
+ if (N1C && N1C->getAPIntValue() == 1) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ DL,
getShiftAmountTy(N0.getValueType())));
+ }
// fold (mulhs x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
- DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
return N1;
// fold (mulhu x, 1) -> 0
if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getConstant(0, N0.getValueType());
+ return DAG.getConstant(0, DL, N0.getValueType());
// fold (mulhu x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, DL, VT);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
- DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
- DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
- DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
// build vector of all zeros that might be illegal at this stage.
if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
if (!LegalTypes)
- ShOp = DAG.getConstant(0, VT);
+ ShOp = DAG.getConstant(0, SDLoc(N), VT);
else
ShOp = SDValue();
}
ShOp = N0->getOperand(0);
if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
if (!LegalTypes)
- ShOp = DAG.getConstant(0, VT);
+ ShOp = DAG.getConstant(0, SDLoc(N), VT);
else
ShOp = SDValue();
}
// fold (and x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(LocReference), VT);
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
(cast<ConstantSDNode>(LR)->isAllOnesValue() &&
cast<ConstantSDNode>(RR)->isNullValue()))) {
- SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
- LL, DAG.getConstant(1, LL.getValueType()));
+ SDLoc DL(N0);
+ SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+ LL, DAG.getConstant(1, DL,
+ LL.getValueType()));
AddToWorklist(ADDNode.getNode());
return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+ DAG.getConstant(2, DL, LL.getValueType()),
+ ISD::SETUGE);
}
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDLoc DL(N0);
SDValue NewAdd =
- DAG.getNode(ISD::ADD, SDLoc(N0), VT,
- N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ DAG.getNode(ISD::ADD, DL, VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
CombineTo(N0.getNode(), NewAdd);
// Return N so it doesn't get rechecked!
return SDValue(LocReference, 0);
return DAG.getConstant(
APInt::getNullValue(
N0.getValueType().getScalarType().getSizeInBits()),
- N0.getValueType());
+ SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(
APInt::getNullValue(
N1.getValueType().getScalarType().getSizeInBits()),
- N1.getValueType());
+ SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
if (N1C && N1C->isAllOnesValue())
unsigned BitWidth = VT.getScalarType().getSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// reassociate and
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
unsigned LVTStoreBytes = LoadedVT.getStoreSize();
unsigned EVTStoreBytes = ExtVT.getStoreSize();
unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
- NewPtr, DAG.getConstant(PtrOff, PtrType));
+ SDLoc DL(LN0);
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
+ NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
Alignment = MinAlign(Alignment, PtrOff);
}
}
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
- if (OpSizeInBits > 16)
- Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
- DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+ if (OpSizeInBits > 16) {
+ SDLoc DL(N);
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getConstant(OpSizeInBits - 16, DL,
+ getShiftAmountTy(VT)));
+ }
return Res;
}
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
return SDValue();
- SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
- SDValue(Parts[0],0));
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+ SDValue(Parts[0], 0));
// Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
- SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+ SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
- return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
- return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
- return DAG.getNode(ISD::OR, SDLoc(N), VT,
- DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
- DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
}
/// This contains all DAGCombine rules which reduce two values combined by
if (!LegalOperations &&
(N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+ SDLoc(LocReference), VT);
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
SDValue LL, LR, RL, RR, CC0, CC1;
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
- return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X,
- DAG.getConstant(LHSMask | RHSMask, VT));
+ SDLoc DL(LocReference);
+ return DAG.getNode(ISD::AND, DL, VT, X,
+ DAG.getConstant(LHSMask | RHSMask, DL, VT));
}
}
return DAG.getConstant(
APInt::getAllOnesValue(
N0.getValueType().getScalarType().getSizeInBits()),
- N0.getValueType());
+ SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(
APInt::getAllOnesValue(
N1.getValueType().getScalarType().getSizeInBits()),
- N1.getValueType());
+ SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
// fold (or x, 0) -> x
if (N1C && N1C->isNullValue())
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
- if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+ N1C, C1))
return DAG.getNode(
ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
}
return Rot.getNode();
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (xor x, undef) -> undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (N0C && !N1C)
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold (xor x, 0) -> x
if (N1C && N1C->isNullValue())
N0.getNode()->hasOneUse() &&
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
SDValue V = N0.getOperand(0);
- V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
- DAG.getConstant(1, V.getValueType()));
+ SDLoc DL(N0);
+ V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
+ DAG.getConstant(1, DL, V.getValueType()));
AddToWorklist(V.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
}
if (N1C && N0.getOpcode() == ISD::XOR) {
ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (N00C)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
+ if (N00C) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
DAG.getConstant(N1C->getAPIntValue() ^
- N00C->getAPIntValue(), VT));
- if (N01C)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
+ N00C->getAPIntValue(), DL, VT));
+ }
+ if (N01C) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getConstant(N1C->getAPIntValue() ^
- N01C->getAPIntValue(), VT));
+ N01C->getAPIntValue(), DL, VT));
+ }
}
// fold (xor x, x) -> 0
if (N0 == N1)
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()))
if (N0.getOpcode() == ISD::SHL)
if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
- if (N1C->isAllOnesValue() && ShlLHS->isOne())
- return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT),
+ if (N1C->isAllOnesValue() && ShlLHS->isOne()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
N0.getOperand(1));
+ }
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue N00 = N->getOperand(0).getOperand(0);
APInt TruncC = N01C->getAPIntValue();
TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+ SDLoc DL(N);
- return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
- DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
- DAG.getConstant(TruncC, TruncVT));
+ return DAG.getNode(ISD::AND, DL, TruncVT,
+ DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+ DAG.getConstant(TruncC, DL, TruncVT));
}
}
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+ N01CV, N1CV))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
} else {
// fold (shl c1, c2) -> c1<<c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
// fold (shl 0, x) -> 0
if (N0C && N0C->isNullValue())
return N0;
return N0;
// fold (shl undef, x) -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
+ SDLoc DL(N);
if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
}
}
EVT InnerShiftVT = N0Op0.getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
if (c2 >= OpSizeInBits - InnerShiftSize) {
+ SDLoc DL(N0);
if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT,
+ DAG.getNode(N0.getOpcode(), DL, VT,
N0Op0->getOperand(0)),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
}
}
}
if (c1 == c2) {
SDValue NewOp0 = N0.getOperand(0);
EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
- NewOp0, DAG.getConstant(c2, CountVT));
+ SDLoc DL(N);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
+ NewOp0,
+ DAG.getConstant(c2, DL, CountVT));
AddToWorklist(NewSHL.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
SDValue Shift;
if (c2 > c1) {
Mask = Mask.shl(c2 - c1);
- Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, N1.getValueType()));
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, DL, N1.getValueType()));
} else {
Mask = Mask.lshr(c1 - c2);
- Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, N1.getValueType()));
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, DL, N1.getValueType()));
}
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
- DAG.getConstant(Mask, VT));
+ SDLoc DL(N0);
+ return DAG.getNode(ISD::AND, DL, VT, Shift,
+ DAG.getConstant(Mask, DL, VT));
}
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
unsigned BitSize = VT.getScalarSizeInBits();
+ SDLoc DL(N);
SDValue HiBitsMask =
DAG.getConstant(APInt::getHighBitsSet(BitSize,
- BitSize - N1C->getZExtValue()), VT);
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+ BitSize - N1C->getZExtValue()),
+ DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
HiBitsMask);
}
// fold (sra c1, c2) -> (sra c1, c2)
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
// fold (sra 0, x) -> 0
if (N0C && N0C->isNullValue())
return N0;
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
if (Sum >= OpSizeInBits)
Sum = OpSizeInBits - 1;
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(Sum, N1.getValueType()));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum, DL, N1.getValueType()));
}
}
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
- SDValue Amt = DAG.getConstant(ShiftAmt,
- getShiftAmountTy(N0.getOperand(0).getValueType()));
- SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
- N0.getOperand(0), Amt);
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
- Shift);
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
- N->getValueType(0), Trunc);
+ SDLoc DL(N);
+ SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL,
+ N->getValueType(0), Trunc);
}
}
}
EVT LargeVT = N0Op0.getValueType();
if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ SDLoc DL(N);
SDValue Amt =
- DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+ DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
N0Op0.getOperand(0), Amt);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
}
}
}
// fold (srl c1, c2) -> c1 >>u c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
// fold (srl 0, x) -> 0
if (N0C && N0C->isNullValue())
return N0;
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
- return DAG.getConstant(0, VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL) {
if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N01C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
+ SDLoc DL(N);
if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
}
}
uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N0);
if (c1 + c2 >= InnerShiftSize)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(ISD::SRL, DL, InnerShiftVT,
N0.getOperand(0)->getOperand(0),
- DAG.getConstant(c1 + c2, ShiftCountVT)));
+ DAG.getConstant(c1 + c2, DL,
+ ShiftCountVT)));
}
}
unsigned BitSize = N0.getScalarValueSizeInBits();
if (BitSize <= 64) {
uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, DL, VT));
}
}
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
uint64_t ShiftAmt = N1C->getZExtValue();
- SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
+ SDLoc DL0(N0);
+ SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
N0.getOperand(0),
- DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+ DAG.getConstant(ShiftAmt, DL0,
+ getShiftAmountTy(SmallVT)));
AddToWorklist(SmallShift.getNode());
APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
- DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+ DAG.getConstant(Mask, DL, VT));
}
}
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
- if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
APInt UnknownBits = ~KnownZero;
- if (UnknownBits == 0) return DAG.getConstant(1, VT);
+ if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
if ((UnknownBits & (UnknownBits - 1)) == 0) {
SDValue Op = N0.getOperand(0);
if (ShAmt) {
- Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
- DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+ SDLoc DL(N0);
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(ShAmt, DL,
+ getShiftAmountTy(Op.getValueType())));
AddToWorklist(Op.getNode());
}
- return DAG.getNode(ISD::XOR, SDLoc(N), VT,
- Op, DAG.getConstant(1, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ Op, DAG.getConstant(1, DL, VT));
}
}
TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
- if (VT == VT0)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
- N0, DAG.getConstant(1, VT0));
- XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
- N0, DAG.getConstant(1, VT0));
+ if (VT == VT0) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT0,
+ N0, DAG.getConstant(1, DL, VT0));
+ }
+ SDLoc DL0(N0);
+ XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
+ N0, DAG.getConstant(1, DL0, VT0));
AddToWorklist(XORNode.getNode());
if (VT.bitsGT(VT0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
SDValue N1_0 = N1->getOperand(0);
SDValue N1_1 = N1->getOperand(1);
SDValue N1_2 = N1->getOperand(2);
- if (N1_2 == N2) {
+ if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
SDValue N2_0 = N2->getOperand(0);
SDValue N2_1 = N2->getOperand(1);
SDValue N2_2 = N2->getOperand(2);
- if (N2_1 == N1) {
+ if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
getMachineMemOperand(MST->getPointerInfo(),
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
EVT VT = LHS.getValueType();
SDValue Shift = DAG.getNode(
ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+ DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
}
}
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
// If the VSELECT result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
continue;
}
+ SDLoc DL(Op);
ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
if (Opcode == ISD::SIGN_EXTEND)
Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- SVT));
+ DL, SVT));
else
Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
- SVT));
+ DL, SVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
Align, LN0->getAAInfo());
BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, BasePtr.getValueType()));
+ DAG.getConstant(Stride, DL, BasePtr.getValueType()));
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
- SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- ExtLoad, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ SDLoc DL(N);
SDValue NegOne =
- DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+ DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
SDValue SCC =
- SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- NegOne, DAG.getConstant(0, VT),
+ SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+ NegOne, DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
N0.getOperand(0), N0.getOperand(1), CC);
return DAG.getSelect(DL, VT, SetCC,
- NegOne, DAG.getConstant(0, VT));
+ NegOne, DAG.getConstant(0, DL, VT));
}
}
}
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- X, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- ExtLoad, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
ISD::ZERO_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
// Only do this before legalize for now.
EVT EltVT = VT.getVectorElementType();
+ SDLoc DL(N);
SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
- DAG.getConstant(1, EltVT));
+ DAG.getConstant(1, DL, EltVT));
if (VT.getSizeInBits() == N0VT.getSizeInBits())
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getSetCC(DL, VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
- DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
OneOps));
// If the desired elements are smaller or larger than the source
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
- DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getSExtOrTrunc(VsetCC, DL, VT),
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
SDValue SCC =
- SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
}
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- X, DAG.getConstant(Mask, VT));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
SDValue SCC =
- SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode())
return SCC;
const APInt &CVal = CV->getAPIntValue();
APInt NewVal = CVal & Mask;
if (NewVal != CVal)
- return DAG.getConstant(NewVal, V.getValueType());
+ return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
break;
}
case ISD::OR:
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
- SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
+ SDLoc DL(LN0);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, PtrType));
+ DAG.getConstant(PtrOff, DL, PtrType));
AddToWorklist(NewPtr.getNode());
SDValue Load;
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
+ SDLoc DL(N0);
if (ShLeftAmt >= VT.getSizeInBits())
- Result = DAG.getConstant(0, VT);
+ Result = DAG.getConstant(0, DL, VT);
else
- Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ Result = DAG.getNode(ISD::SHL, DL, VT,
+ Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}
// Return the new loaded value.
ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- Op.getValueType()));
+ SDLoc(Op), Op.getValueType()));
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
NVT, N0.getOperand(0));
+ SDLoc DL(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- SDLoc(N), TrTy, V,
- DAG.getConstant(Index, IndexTy));
+ DL, TrTy, V,
+ DAG.getConstant(Index, DL, IndexTy));
}
}
N0.getOperand(0));
AddToWorklist(NewConv.getNode());
+ SDLoc DL(N);
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
- return DAG.getNode(ISD::XOR, SDLoc(N), VT,
- NewConv, DAG.getConstant(SignBit, VT));
+ return DAG.getNode(ISD::XOR, DL, VT,
+ NewConv, DAG.getConstant(SignBit, DL, VT));
assert(N0.getOpcode() == ISD::FABS);
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- NewConv, DAG.getConstant(~SignBit, VT));
+ return DAG.getNode(ISD::AND, DL, VT,
+ NewConv, DAG.getConstant(~SignBit, DL, VT));
}
// fold (bitconvert (fcopysign cst, x)) ->
} else if (OrigXWidth > VTWidth) {
// To get the sign bit in the right place, we have to shift it right
// before truncating.
- X = DAG.getNode(ISD::SRL, SDLoc(X),
+ SDLoc DL(X);
+ X = DAG.getNode(ISD::SRL, DL,
X.getValueType(), X,
- DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ DAG.getConstant(OrigXWidth-VTWidth, DL,
+ X.getValueType()));
AddToWorklist(X.getNode());
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
AddToWorklist(X.getNode());
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
- X, DAG.getConstant(SignBit, VT));
+ X, DAG.getConstant(SignBit, SDLoc(X), VT));
AddToWorklist(X.getNode());
SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
- Cst, DAG.getConstant(~SignBit, VT));
+ Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
AddToWorklist(Cst.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
return CombineLD;
}
+ // Remove double bitcasts from shuffles - this is often a legacy of
+ // XformToShuffleWithZero being used to combine bitmaskings (of
+ // float vectors bitcast to integer vectors) into shuffles.
+ // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+ !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+ // If operands are a bitcast, peek through if it casts the original VT.
+ // If operands are a UNDEF or constant, just bitcast back to original VT.
+ auto PeekThroughBitcast = [&](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getOperand(0)->getValueType(0) == VT)
+ return SDValue(Op.getOperand(0));
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ return SDValue();
+ };
+
+ SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+ SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+ if (!(SV0 && SV1))
+ return SDValue();
+
+ int MaskScale =
+ VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (int M : SVN->getMask())
+ for (int i = 0; i != MaskScale; ++i)
+ NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+ }
+
+ if (LegalMask)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+ }
+
return SDValue();
}
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
+ SDLoc DL(BV);
+
// Okay, we know the src/dst types are both integers of differing types.
// Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
if (EltIsUndef)
Ops.push_back(DAG.getUNDEF(DstEltVT));
else
- Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
// Finally, this must be the case where we are shrinking elements: each input
// turns into multiple outputs.
- bool isS2V = ISD::isScalarToVector(BV);
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
NumOutputsPerInput*BV->getNumOperands());
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
- if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
- // Simply turn this into a SCALAR_TO_VECTOR of the new type.
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
- Ops[0]);
+ Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
OpVal = OpVal.lshr(DstBitSize);
}
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
-// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
-static SDValue performFaddFmulCombines(unsigned FusedOpcode,
- bool Aggressive,
- SDNode *N,
- const TargetLowering &TLI,
- SelectionDAG &DAG) {
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+
+
+
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations &&
+ TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = ((!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ UnsafeFPMath);
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse())) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
+ // Look through FP_EXTEND nodes to do more combining.
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
+ }
+
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
+ }
+ }
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if ((UnsafeFPMath || HasFMAD) && Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
+ if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
N1));
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
+ if (N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
N0));
}
+
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
+ }
+ }
+
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
+ }
+ }
+ }
}
return SDValue();
}
-static SDValue performFsubFmulCombines(unsigned FusedOpcode,
- bool Aggressive,
- SDNode *N,
- const TargetLowering &TLI,
- SelectionDAG &DAG) {
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+
+
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations &&
+ TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = ((!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ UnsafeFPMath);
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
- return DAG.getNode(FusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse()))
- return DAG.getNode(FusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(FusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
+ // Look through FP_EXTEND nodes to do more combining.
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ }
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if ((UnsafeFPMath || HasFMAD) && Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == FusedOpcode &&
+ if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == FusedOpcode &&
+ if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20),
+
N21, N0));
}
+
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ SDValue N102 = N1.getOperand(0).getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
+ }
}
return SDValue();
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
- return DAG.getConstantFP(0.0, VT);
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
// If allowed, fold (fadd x, (fneg x)) -> 0.0
if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
- return DAG.getConstantFP(0.0, VT);
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDLoc DL(N);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT,
SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDLoc DL(N);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT,
SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT,
N0.getOperand(0), NewCFP);
}
}
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDLoc DL(N);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT,
SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDLoc DL(N);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT,
SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
}
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
- (N0.getOperand(0) == N1))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, DAG.getConstantFP(3.0, VT));
+ (N0.getOperand(0) == N1)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N1, DAG.getConstantFP(3.0, DL, VT));
+ }
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
- N1.getOperand(0) == N0)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, DAG.getConstantFP(3.0, VT));
+ N1.getOperand(0) == N0) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N0, DAG.getConstantFP(3.0, DL, VT));
+ }
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(0) == N1.getOperand(0))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0), DAG.getConstantFP(4.0, VT));
- }
- } // enable-unsafe-fp-math
-
- if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Assume if there is an fmad instruction that it should be aggressively
- // used.
- if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
- return Fused;
- }
-
- // FADD -> FMA combines:
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
- if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Don't form FMA if we are preferring FMAD.
- if (SDValue Fused
- = performFaddFmulCombines(ISD::FMA,
- TLI.enableAggressiveFMAFusion(VT),
- N, TLI, DAG)) {
- return Fused;
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
}
}
- // When FP_EXTEND nodes are free on the target, and there is an opportunity
- // to combine into FMA, arrange such nodes accordingly.
- if (TLI.isFPExtFree(VT)) {
-
- // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(1)), N1);
- }
-
- // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(1)), N0);
+ // Canonicalize chains of adds to LHS to simplify the following transform.
+ if (N0.getOpcode() != ISD::FADD && N1.getOpcode() == ISD::FADD)
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+
+ // Convert a chain of 3 dependent operations into 2 independent operations
+ // and 1 dependent operation:
+ // (fadd N0: (fadd N00: (fadd z, w), N01: y), N1: x) ->
+ // (fadd N00: (fadd z, w), (fadd N1: x, N01: y))
+ if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
+ N1.getOpcode() != ISD::FADD) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FADD) {
+ SDValue N01 = N0.getOperand(1);
+ SDValue NewAdd = DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N01);
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N00, NewAdd);
}
}
+ } // enable-unsafe-fp-math
+
+ // FADD -> FMA combines:
+ SDValue Fused = visitFADDForFMACombine(N);
+ if (Fused) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
}
return SDValue();
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
// (fsub x, x) -> 0.0
if (N0 == N1)
- return DAG.getConstantFP(0.0f, VT);
+ return DAG.getConstantFP(0.0f, dl, VT);
// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)
}
}
- if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Assume if there is an fmad instruction that it should be aggressively
- // used.
- if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
- return Fused;
- }
-
// FSUB -> FMA combines:
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
- if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Don't form FMA if we are preferring FMAD.
-
- if (SDValue Fused
- = performFsubFmulCombines(ISD::FMA,
- TLI.enableAggressiveFMAFusion(VT),
- N, TLI, DAG)) {
- return Fused;
- }
- }
-
- // When FP_EXTEND nodes are free on the target, and there is an opportunity
- // to combine into FMA, arrange such nodes accordingly.
- if (TLI.isFPExtFree(VT)) {
- // fold (fsub (fpext (fmul x, y)), z)
- // -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
- }
-
- // fold (fsub x, (fpext (fmul y, z)))
- // -> (fma (fneg (fpext y)), (fpext z), x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(1)),
- N0);
- }
-
- // fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
- SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N000.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N000.getOperand(1)),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
- }
- }
-
- // fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FNEG) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
- SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N000.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N000.getOperand(1)),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
- }
- }
- }
+ SDValue Fused = visitFSUBForFMACombine(N);
+ if (Fused) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
}
return SDValue();
// This just handles C1 * C2 for vectors. Other vector folds are below.
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
-
- // Canonicalize vector constant to RHS.
- if (N0.getOpcode() == ISD::BUILD_VECTOR &&
- N1.getOpcode() != ISD::BUILD_VECTOR)
- if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
- if (BV0->isConstant())
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
}
// fold (fmul c1, c2) -> c1*c2
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
// canonicalize constant to RHS
- if (N0CFP && !N1CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
// fold (fmul A, 1.0) -> A
// inserted during lowering.
if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
SDLoc SL(N);
- const SDValue Two = DAG.getConstantFP(2.0, VT);
+ const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+ return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
}
}
if (Options.UnsafeFPMath && N1CFP && N0 == N2)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, VT)));
+ N1, DAG.getConstantFP(1.0, dl, VT)));
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, VT)));
+ N1, DAG.getConstantFP(-1.0, dl, VT)));
return SDValue();
// TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
- DAG.getConstantFP(Recip, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
}
if (TLI.combineRepeatedFPDivisors(Users.size())) {
- SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); // floating point 1.0
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
// Dividend / Divisor -> Dividend * Reciprocal
for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
EVT VT = RV.getValueType();
- RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+ SDLoc DL(N);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
AddToWorklist(RV.getNode());
// Unfortunately, RV is now NaN if the input was exactly 0.
// Select out this case and force the answer to 0.
- SDValue Zero = DAG.getConstantFP(0.0, VT);
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
SDValue ZeroCmp =
- DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+ DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
- SDLoc(N), VT, ZeroCmp, Zero, RV);
+ DL, VT, ZeroCmp, Zero, RV);
return RV;
}
}
!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
// fold (sint_to_fp (zext (setcc x, y, cc))) ->
N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
- DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(0).getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
}
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
}
}
// single-step fp_round we want to fold to.
// In other words, double rounding isn't the same as rounding.
// Also, this is a value preserving truncation iff both fp_round's are.
- if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc));
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+ }
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
// fold (fp_round_inreg c1fp) -> c1fp
if (N0CFP && isTypeLegal(EVT)) {
- SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
- return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
+ SDLoc DL(N);
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
}
return SDValue();
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
+ // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+ if (N0.getOpcode() == ISD::FP16_TO_FP &&
+ TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
// value of X.
if (N0.getOpcode() == ISD::FP_ROUND
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
- N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0))),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
SDValue DAGCombiner::visitFCEIL(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
return SDValue();
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
// For a scalar, just generate 0x80...
SignMask = APInt::getSignBit(IntVT.getSizeInBits());
}
- Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
- DAG.getConstant(SignMask, IntVT));
+ SDLoc DL0(N0);
+ Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+ DAG.getConstant(SignMask, DL0, IntVT));
AddToWorklist(Int.getNode());
return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
}
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
}
if (N0CFP) {
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
}
if (N0CFP) {
// For a scalar, just generate 0x7f...
SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
}
- Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
- DAG.getConstant(SignMask, IntVT));
+ SDLoc DL(N0);
+ Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
AddToWorklist(Int.getNode());
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
}
if (AndConst.isPowerOf2() &&
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDLoc DL(N);
SDValue SetCC =
- DAG.getSetCC(SDLoc(N),
+ DAG.getSetCC(DL,
getSetCCResultType(Op0.getValueType()),
- Op0, DAG.getConstant(0, Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
ISD::SETNE);
- SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
MVT::Other, Chain, SetCC, N2);
// Don't add the new BRCond into the worklist or else SimplifySelectCC
// will convert it back to (X & C1) >> C2.
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
- VT = Use->getValueType(0);
+ VT = LD->getMemoryVT();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
- VT = ST->getValue().getValueType();
+ VT = ST->getMemoryVT();
} else
return false;
if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
else CNV = CNV - Offset1;
+ SDLoc DL(OtherUses[i]);
+
// We can now generate the new expression.
- SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+ SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
- SDLoc(OtherUses[i]),
+ DL,
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
deleteAndRecombine(OtherUses[i]);
"Cannot split out indexing using opaque target constants");
if (Inc.getOpcode() == ISD::TargetConstant) {
ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
- Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
ConstInc->getValueType(0));
}
if (Offset) {
// BaseAddr = BaseAddr + Offset.
EVT ArithType = BaseAddr.getValueType();
- BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
- DAG->getConstant(Offset, ArithType));
+ SDLoc DL(Origin);
+ BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+ DAG->getConstant(Offset, DL, ArithType));
}
// Create the type of the loaded slice according to its size.
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
- if (ByteShift)
- IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
- DAG.getConstant(ByteShift*8,
+ if (ByteShift) {
+ SDLoc DL(IVal);
+ IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8, DL,
DC->getShiftAmountTy(IVal.getValueType())));
+ }
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
SDValue Ptr = St->getBasePtr();
if (StOffset) {
- Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
- Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+ SDLoc DL(IVal);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
Ptr.getValueType(), Ptr,
- DAG.getConstant(PtrOff, Ptr.getValueType()));
+ DAG.getConstant(PtrOff, SDLoc(LD),
+ Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isInvariant(), NewAlign,
LD->getAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
- DAG.getConstant(NewImm, NewVT));
+ DAG.getConstant(NewImm, SDLoc(Value),
+ NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
NewVal, NewPtr,
ST->getPointerInfo().getWithOffset(PtrOff),
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned EarliestNodeUsed = 0;
+ unsigned LatestNodeUsed = 0;
for (unsigned i=0; i < NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
+ // latest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+ LatestNodeUsed = i;
}
- // The earliest Node in the DAG.
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ // The latest Node in the DAG.
+ LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
// can be materialized without a load.
// It may be beneficial to loosen this restriction to allow non-zero
// store merging.
- StoredVal = DAG.getConstant(0, Ty);
+ StoredVal = DAG.getConstant(0, DL, Ty);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElem ; ++i) {
// Create the new Load and Store operations.
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
FirstInChain->getAlignment());
- // Replace the first store with the new store
- CombineTo(EarliestOp, NewStore);
+ // Replace the last store with the new store
+ CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
- if (StoreNodes[i].MemNode == EarliestOp)
+ if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
// ReplaceAllUsesWith will replace all uses that existed when it was
if (NumElem < 2)
return false;
- // The earliest Node in the DAG.
- unsigned EarliestNodeUsed = 0;
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ // The latest Node in the DAG.
+ unsigned LatestNodeUsed = 0;
for (unsigned i=1; i<NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
+ // latest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+ LatestNodeUsed = i;
}
+ LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
false, false, false,
FirstLoad->getAlignment());
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+ SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false,
FirstInChain->getAlignment());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
}
- // Replace the first store with the new store.
- CombineTo(EarliestOp, NewStore);
+ // Replace the last store with the new store.
+ CombineTo(LatestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
// Remove all Store nodes.
- if (StoreNodes[i].MemNode == EarliestOp)
+ if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
case MVT::f32:
if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue(), MVT::i32);
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
return DAG.getStore(Chain, SDLoc(N), Tmp,
Ptr, ST->getMemOperand());
}
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
!ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
- getZExtValue(), MVT::i64);
+ getZExtValue(), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, SDLoc(N), Tmp,
Ptr, ST->getMemOperand());
}
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
- SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
unsigned Alignment = ST->getAlignment();
bool isNonTemporal = ST->isNonTemporal();
AAMDNodes AAInfo = ST->getAAInfo();
+ SDLoc DL(N);
+
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
- DAG.getConstant(4, Ptr.getValueType()));
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
Alignment, AAInfo);
- return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
SDValue Offset;
EVT PtrType = NewPtr.getValueType();
MachinePointerInfo MPI;
+ SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
if (TLI.isBigEndian())
PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
- Offset = DAG.getConstant(PtrOff, PtrType);
+ Offset = DAG.getConstant(PtrOff, DL, PtrType);
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
} else {
Offset = DAG.getNode(
- ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
- DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+ ISD::MUL, DL, EltNo.getValueType(), EltNo,
+ DAG.getConstant(VecEltVT.getStoreSize(), DL, EltNo.getValueType()));
if (TLI.isBigEndian())
Offset = DAG.getNode(
- ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
- DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+ ISD::SUB, DL, EltNo.getValueType(),
+ DAG.getConstant(InVecVT.getStoreSize(), DL, EltNo.getValueType()),
+ Offset);
MPI = OriginalLoad->getPointerInfo();
}
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
if (!LegalOperations) {
EVT IndexTy = TLI.getVectorIdxTy();
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
- SVInVec, DAG.getConstant(OrigElt, IndexTy));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
+ DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
}
}
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
- EltNo = DAG.getConstant(Elt, EltNo.getValueType());
+ EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
}
}
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
- DAG.getConstant(0, SourceType);
+ DAG.getConstant(0, SDLoc(N), SourceType);
unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
// Try to replace VecIn1 with two extract_subvectors
// No need to update the masks, they should still be correct.
VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
+ DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
} else
return SDValue();
}
if (UsesZeroVector)
- VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
- DAG.getConstantFP(0.0, VT);
+ VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
+ DAG.getConstantFP(0.0, dl, VT);
else
// If VecIn2 is unused then change it to undef.
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
return SDValue();
}
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OpVT = N->getOperand(0).getValueType();
+
+ // If the operands are legal vectors, leave them alone.
+ if (TLI.isTypeLegal(OpVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 8> Ops;
+
+ EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+ // Keep track of what we encounter.
+ bool AnyInteger = false;
+ bool AnyFP = false;
+ for (const SDValue &Op : N->ops()) {
+ if (ISD::BITCAST == Op.getOpcode() &&
+ !Op.getOperand(0).getValueType().isVector())
+ Ops.push_back(Op.getOperand(0));
+ else if (ISD::UNDEF == Op.getOpcode())
+ Ops.push_back(ScalarUndef);
+ else
+ return SDValue();
+
+ // Note whether we encounter an integer or floating point scalar.
+ // If it's neither, bail out, it could be something weird like x86mmx.
+ EVT LastOpVT = Ops.back().getValueType();
+ if (LastOpVT.isFloatingPoint())
+ AnyFP = true;
+ else if (LastOpVT.isInteger())
+ AnyInteger = true;
+ else
+ return SDValue();
+ }
+
+ // If any of the operands is a floating point scalar bitcast to a vector,
+ // use floating point types throughout, and bitcast everything.
+ // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+ if (AnyFP) {
+ SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+ ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+ if (AnyInteger) {
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.getOpcode() == ISD::UNDEF)
+ Op = ScalarUndef;
+ else
+ Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+ }
+ }
+ }
+
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getSizeInBits() / SVT.getSizeInBits());
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
// EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
- // Optimize concat_vectors where one of the vectors is undef.
- if (N->getNumOperands() == 2 &&
- N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+ // Optimize concat_vectors where all but the first of the vectors are undef.
+ if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+ return Op.getOpcode() == ISD::UNDEF;
+ })) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
if (In->getOpcode() == ISD::BITCAST &&
!In->getOperand(0)->getValueType(0).isVector()) {
SDValue Scalar = In->getOperand(0);
+
+ // If the bitcast type isn't legal, it might be a trunc of a legal type;
+ // look through the trunc so we can still do the transform:
+ // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+ if (Scalar->getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTypeLegal(Scalar.getValueType()) &&
+ TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+ Scalar = Scalar->getOperand(0);
+
EVT SclTy = Scalar->getValueType(0);
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
+ // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
// type.
if (V->getOperand(0).getValueType() != NVT)
return SDValue();
- unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
"IDX in concat is not a multiple of the result vector length.");
return V;
}
+ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+ // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ SmallVector<SDValue, 8> Ops;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M % NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+ if (Idx == 0)
+ Op = S.getOperand(0);
+ } else {
+ // Operand can't be combined - bail out.
+ break;
+ }
+ }
+ Ops.push_back(Op);
+ }
+ if (Ops.size() == VT.getVectorNumElements()) {
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+ }
+ }
+
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
// back to their original types.
return SDValue();
}
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::FP16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
// Return the new VECTOR_SHUFFLE node.
EVT EltVT = RVT.getVectorElementType();
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
+ DAG.getConstant(0, dl, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
+ // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+ // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
+ if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+ if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+ // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+ SDValue Sqrt = RHS;
+ ISD::CondCode CC;
+ SDValue CmpLHS;
+ const ConstantFPSDNode *NegZero = nullptr;
+
+ if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+ CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CmpLHS = TheSelect->getOperand(0);
+ NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+ } else {
+ // SELECT or VSELECT
+ SDValue Cmp = TheSelect->getOperand(0);
+ if (Cmp.getOpcode() == ISD::SETCC) {
+ CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CmpLHS = Cmp.getOperand(0);
+ NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
+ }
+ }
+ if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
+ Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+ CC == ISD::SETULT || CC == ISD::SETLT)) {
+ // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+ CombineTo(TheSelect, Sqrt);
+ return true;
+ }
+ }
+ }
// Cannot simplify select with vector condition
if (TheSelect->getOperand(0).getValueType().isVector()) return false;
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
LLD->isVolatile() || RLD->isVolatile() ||
+ // FIXME: If either is a pre/post inc/dec load,
+ // we'd need to split out the address adjustment.
+ LLD->isIndexed() || RLD->isIndexed() ||
// If this is an EXTLOAD, the VT's must match.
LLD->getMemoryVT() != RLD->getMemoryVT() ||
// If this is an EXTLOAD, the kind of extension must match.
// Get the offsets to the 0 and 1 element of the array so that we can
// select between them.
- SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
- SDValue One = DAG.getIntPtrConstant(EltSize);
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
SDValue Cond = DAG.getSetCC(DL,
getSetCCResultType(N0.getValueType()),
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
false, false, Alignment);
-
}
}
if (XType.bitsGE(AType)) {
// and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
// single-bit constant.
- if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCtV = N2C->getAPIntValue().logBase2();
- ShCtV = XType.getSizeInBits()-ShCtV-1;
- SDValue ShCt = DAG.getConstant(ShCtV,
+ ShCtV = XType.getSizeInBits() - ShCtV - 1;
+ SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
getShiftAmountTy(N0.getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
XType, N0, ShCt);
SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
XType, N0,
- DAG.getConstant(XType.getSizeInBits()-1,
+ DAG.getConstant(XType.getSizeInBits() - 1,
+ SDLoc(N0),
getShiftAmountTy(N0.getValueType())));
AddToWorklist(Shift.getNode());
// Shift the tested bit over the sign bit.
APInt AndMask = ConstAndRHS->getAPIntValue();
SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(),
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
getShiftAmountTy(AndLHS.getValueType()));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is either
// all-ones, or zero.
SDValue ShrAmt =
- DAG.getConstant(AndMask.getBitWidth()-1,
+ DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
getShiftAmountTy(Shl.getValueType()));
SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
// shl setcc result by log2 n2c
return DAG.getNode(
ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
getShiftAmountTy(Temp.getValueType())));
}
}
SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ SDLoc(Ctlz),
getShiftAmountTy(Ctlz.getValueType())));
}
// fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
- SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
- XType, DAG.getConstant(0, XType), N0);
- SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
+ SDLoc DL(N0);
+ SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
+ XType, DAG.getConstant(0, DL, XType), N0);
+ SDValue NotN0 = DAG.getNOT(DL, N0, XType);
return DAG.getNode(ISD::SRL, DL, XType,
DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
- DAG.getConstant(XType.getSizeInBits()-1,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
getShiftAmountTy(XType)));
}
// fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
- SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
- DAG.getConstant(XType.getSizeInBits()-1,
+ SDLoc DL(N0);
+ SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
- return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
+ XType));
}
}
EVT XType = N0.getValueType();
if (SubC && SubC->isNullValue() && XType.isInteger()) {
- SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
+ SDLoc DL(N0);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
N0,
- DAG.getConstant(XType.getSizeInBits()-1,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
- SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
+ SDValue Add = DAG.getNode(ISD::ADD, DL,
XType, N0, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
// does not require additional intermediate precision]
EVT VT = Op.getValueType();
SDLoc DL(Op);
- SDValue FPOne = DAG.getConstantFP(1.0, VT);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
AddToWorklist(Est.getNode());
unsigned Iterations) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
- SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
unsigned Iterations) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
- SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
- SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
// Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {