cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
+/// Value type used for condition codes.
+static const MVT MVT_CC = MVT::i32;
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
- // [SU][MIN|MAX] are available for all NEON types apart from i64.
+ // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
+ // i64.
if (!VT.isFloatingPoint() &&
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
- for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
+ ISD::SABSDIFF, ISD::UABSDIFF})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
if (Subtarget->isLittleEndian()) {
addTypeForNEON(VT, MVT::v4i32);
}
-EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
}
-MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
+MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT) const {
return MVT::i64;
}
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
+ case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
+ case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
+ case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
LHS = LHS.getOperand(0);
}
- return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
+ return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
+/// \defgroup AArch64CCMP CMP;CCMP matching
+///
+/// These functions deal with the formation of CMP;CCMP;... sequences.
+/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
+/// a comparison. They set the NZCV flags to a predefined value if their
+/// predicate is false. This allows to express arbitrary conjunctions, for
+/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
+/// expressed as:
+/// cmp A
+/// ccmp B, inv(CB), CA
+/// check for CB flags
+///
+/// In general we can create code for arbitrary "... (and (and A B) C)"
+/// sequences. We can also implement some "or" expressions, because "(or A B)"
+/// is equivalent to "not (and (not A) (not B))" and we can implement some
+/// negation operations:
+/// We can negate the results of a single comparison by inverting the flags
+/// used when the predicate fails and inverting the flags tested in the next
+/// instruction; We can also negate the results of the whole previous
+/// conditional compare sequence by inverting the flags tested in the next
+/// instruction. However there is no way to negate the result of a partial
+/// sequence.
+///
+/// Therefore on encountering an "or" expression we can negate the subtree on
+/// one side and have to be able to push the negate to the leafs of the subtree
+/// on the other side (see also the comments in code). As complete example:
+/// "or (or (setCA (cmp A)) (setCB (cmp B)))
+/// (and (setCC (cmp C)) (setCD (cmp D)))"
+/// is transformed to
+/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
+/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
+/// and implemented as:
+/// cmp C
+/// ccmp D, inv(CD), CC
+/// ccmp A, CA, inv(CD)
+/// ccmp B, CB, inv(CA)
+/// check for CB flags
+/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
+/// by conditional compare sequences.
+/// @{
+
+/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
+static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue CCOp,
+ SDValue Condition, unsigned NZCV,
+ SDLoc DL, SelectionDAG &DAG) {
+ unsigned Opcode = 0;
+ if (LHS.getValueType().isFloatingPoint())
+ Opcode = AArch64ISD::FCCMP;
+ else if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = RHS.getOperand(0);
+ if (const ConstantSDNode *SubOp0C = dyn_cast<ConstantSDNode>(SubOp0))
+ if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // See emitComparison() on why we can only do this for SETEQ and SETNE.
+ Opcode = AArch64ISD::CCMN;
+ RHS = RHS.getOperand(1);
+ }
+ }
+ if (Opcode == 0)
+ Opcode = AArch64ISD::CCMP;
+
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+}
+
+/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
+/// CanPushNegate is set to true if we can push a negate operation through
+/// the tree in a was that we are left with AND operations and negate operations
+/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
+/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
+/// brought into such a form.
+static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
+ unsigned Depth = 0) {
+ if (!Val.hasOneUse())
+ return false;
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ CanPushNegate = true;
+ return true;
+ }
+ // Protect against stack overflow.
+ if (Depth > 15)
+ return false;
+ if (Opcode == ISD::AND || Opcode == ISD::OR) {
+ SDValue O0 = Val->getOperand(0);
+ SDValue O1 = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
+ return false;
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
+ return false;
+ // We cannot push a negate through an AND operation (it would become an OR),
+ // we can however change a (not (or x y)) to (and (not x) (not y)) if we can
+ // push the negate through the x/y subtrees.
+ CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
+ return true;
+ }
+ return false;
+}
+
+/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
+/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
+/// Tries to transform the given i1 producing node @p Val to a series compare
+/// and conditional compare operations. @returns an NZCV flags producing node
+/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
+/// transformation was not possible.
+/// On recursive invocations @p PushNegate may be set to true to have negation
+/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
+/// for the comparisons in the current subtree; @p Depth limits the search
+/// depth to avoid stack overflow.
+static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
+ AArch64CC::CondCode &OutCC, bool PushNegate = false,
+ SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
+ unsigned Depth = 0) {
+ // We're at a tree leaf, produce a conditional comparison operation.
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
+ bool isInteger = LHS.getValueType().isInteger();
+ if (PushNegate)
+ CC = getSetCCInverse(CC, isInteger);
+ SDLoc DL(Val);
+ // Determine OutCC and handle FP special case.
+ if (isInteger) {
+ OutCC = changeIntCCToAArch64CC(CC);
+ } else {
+ assert(LHS.getValueType().isFloatingPoint());
+ AArch64CC::CondCode ExtraCC;
+ changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
+ // Surpisingly some floating point conditions can't be tested with a
+ // single condition code. Construct an additional comparison in this case.
+ // See comment below on how we deal with OR conditions.
+ if (ExtraCC != AArch64CC::AL) {
+ SDValue ExtraCmp;
+ if (!CCOp.getNode())
+ ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
+ else {
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+ // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
+ ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
+ NZCV, DL, DAG);
+ }
+ CCOp = ExtraCmp;
+ Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ }
+ }
+
+ // Produce a normal comparison if we are first in the chain
+ if (!CCOp.getNode())
+ return emitComparison(LHS, RHS, CC, DL, DAG);
+ // Otherwise produce a ccmp.
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+ AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+ return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
+ DAG);
+ } else if (Opcode != ISD::AND && Opcode != ISD::OR)
+ return SDValue();
+
+ assert((Opcode == ISD::OR || !PushNegate)
+ && "Can only push negate through OR operation");
+
+ // Check if both sides can be transformed.
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
+ return SDValue();
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
+ return SDValue();
+
+ // Do we need to negate our operands?
+ bool NegateOperands = Opcode == ISD::OR;
+ // We can negate the results of all previous operations by inverting the
+ // predicate flags giving us a free negation for one side. For the other side
+ // we need to be able to push the negation to the leafs of the tree.
+ if (NegateOperands) {
+ if (!CanPushNegateL && !CanPushNegateR)
+ return SDValue();
+ // Order the side where we can push the negate through to LHS.
+ if (!CanPushNegateL && CanPushNegateR) {
+ std::swap(LHS, RHS);
+ CanPushNegateL = true;
+ }
+ }
+
+ // Emit RHS. If we want to negate the tree we only need to push a negate
+ // through if we are already in a PushNegate case, otherwise we can negate
+ // the "flags to test" afterwards.
+ AArch64CC::CondCode RHSCC;
+ SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
+ CCOp, Predicate, Depth+1);
+ if (NegateOperands && !PushNegate)
+ RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+ // Emit LHS. We must push the negate through if we need to negate it.
+ SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
+ CmpR, RHSCC, Depth+1);
+ // If we transformed an OR to and AND then we have to negate the result
+ // (or absorb a PushNegate resulting in a double negation).
+ if (Opcode == ISD::OR && !PushNegate)
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ return CmpL;
+}
+
+/// @}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
- SDValue Cmp;
- AArch64CC::CondCode AArch64CC;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
}
}
}
- // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
- // For the i8 operand, the largest immediate is 255, so this can be easily
- // encoded in the compare instruction. For the i16 operand, however, the
- // largest immediate cannot be encoded in the compare.
- // Therefore, use a sign extending load and cmn to avoid materializing the -1
- // constant. For example,
- // movz w1, #65535
- // ldrh w0, [x0, #0]
- // cmp w0, w1
- // >
- // ldrsh w0, [x0, #0]
- // cmn w0, #1
- // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
- // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
- // both the LHS and RHS are truely zero extended and to make sure the
- // transformation is profitable.
+ SDValue Cmp;
+ AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
- if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
- isa<LoadSDNode>(LHS)) {
- if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
- cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
- LHS.getNode()->hasNUsesOfValue(1, 0)) {
- int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
- if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
- SDValue SExt =
- DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
- DAG.getValueType(MVT::i16));
- Cmp = emitComparison(SExt,
- DAG.getConstant(ValueofRHS, dl,
- RHS.getValueType()),
- CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
- return Cmp;
- }
+ const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
+
+ // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+ // For the i8 operand, the largest immediate is 255, so this can be easily
+ // encoded in the compare instruction. For the i16 operand, however, the
+ // largest immediate cannot be encoded in the compare.
+ // Therefore, use a sign extending load and cmn to avoid materializing the
+ // -1 constant. For example,
+ // movz w1, #65535
+ // ldrh w0, [x0, #0]
+ // cmp w0, w1
+ // >
+ // ldrsh w0, [x0, #0]
+ // cmn w0, #1
+ // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+ // if and only if (sext LHS) == (sext RHS). The checks are in place to
+ // ensure both the LHS and RHS are truely zero extended and to make sure the
+ // transformation is profitable.
+ if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
+ cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+ LHS.getNode()->hasNUsesOfValue(1, 0)) {
+ int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+ DAG.getValueType(MVT::i16));
+ Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
+ RHS.getValueType()),
+ CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
}
}
+
+ if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+ if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
+ if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+ AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
+ }
+ }
+ }
+
+ if (!Cmp) {
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
}
- Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
const char *LibcallName =
(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
+ /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// non-compliant manner for larger structs.
- EVT PtrTy = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
- SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue ArgValue;
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SmallVector<SDValue, 8> MemOps;
if (GPRSaveSize != 0) {
GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
- SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 8), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, DL, getPointerTy()));
+ FIN =
+ DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
if (FPRSaveSize != 0) {
FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 16), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, DL, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+ DAG.getConstant(16, DL, PtrVT));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Outs[i].VT;
// Get type of the original argument.
- EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
+ EVT ActualVT = getValueType(DAG.getDataLayout(),
+ CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
true),
DL);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
+ getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
- PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
if (IsTailCall) {
Offset = Offset + FPDiff;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
- DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(FI);
// Make sure any stack arguments overlapping with where we're storing
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
- DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(LocMemOffset);
}
const GlobalValue *GV = G->getGlobal();
bool InternalLinkage = GV->hasInternalLinkage();
if (InternalLinkage)
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
else {
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
- AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ Callee =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
}
} else if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- Callee =
- DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
// We don't usually want to end the call-sequence here because we would tidy
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
SDLoc DL(Op);
- MVT PtrVT = getPointerTy();
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
/// the sequence is produced as per above.
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
}
SDValue TPOff;
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
- SDValue FR =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
+ getPointerTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
- SDValue Stack =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), false, false, 8));
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
- GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(8, DL, getPointerTy()));
+ GRTopAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
- GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
- GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
- DAG.getConstant(GPRSize, DL, getPointerTy()));
+ GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
+ GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
+ DAG.getConstant(GPRSize, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, 8), false, false, 8));
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
- VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(16, DL, getPointerTy()));
+ VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(16, DL, PtrVT));
- VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
- VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
- DAG.getConstant(FPRSize, DL, getPointerTy()));
+ VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
+ VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
+ DAG.getConstant(FPRSize, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, 16), false, false, 8));
}
// int __gr_offs at offset 24
- SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(24, DL, getPointerTy()));
+ SDValue GROffsAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL,
DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, 24), false,
false, 4));
// int __vr_offs at offset 28
- SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(28, DL, getPointerTy()));
+ SDValue VROffsAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL,
DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, 28), false,
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
unsigned Align = Op.getConstantOperandVal(3);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
- MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V),
+ false, false, false, 0);
Chain = VAList.getValue(1);
if (Align > 8) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
- VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(Align - 1, DL, getPointerTy()));
- VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
- DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
+ VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Align - 1, DL, PtrVT));
+ VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+ DAG.getConstant(-(int64_t)Align, DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
- uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
}
// Increment the pointer, VAList, to the next vaarg
- SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(ArgSize, DL, getPointerTy()));
+ SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(ArgSize, DL, PtrVT));
// Store the incremented VAList to the legalized pointer
SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
false, false, 0);
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
.Default(0);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
-AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
unsigned Size = Constraint.size();
if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
- const std::string Reg =
- std::string(&Constraint[2], &Constraint[Size - 1]);
- int RegNo = atoi(Reg.c_str());
- if (RegNo >= 0 && RegNo <= 31) {
+ int RegNo;
+ bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
+ if (!Failed && RegNo >= 0 && RegNo <= 31) {
// v0 - v31 are aliases of q0 - q31.
// By default we'll emit v0-v31 for this unless there's a modifier where
// we'll emit the correct register as well.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
+ auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
return true;
const TargetOptions &Options = getTargetMachine().Options;
- EVT VT = getValueType(User->getOperand(0)->getType());
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ EVT VT = getValueType(DL, User->getOperand(0)->getType());
if (isFMAFasterThanFMulAndFAdd(VT) &&
isOperationLegalOrCustom(ISD::FMA, VT) &&
break;
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
+ auto &DL = Ext->getModule()->getDataLayout();
std::advance(GTI, U.getOperandNo());
Type *IdxTy = *GTI;
// This extension will end up with a shift because of the scaling factor.
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
- countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+ countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
- const DataLayout *DL = getDataLayout();
+ const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+ unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
// Skip illegal vector types.
if (VecSize != 64 && VecSize != 128)
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = VecTy->getVectorElementType();
if (EltTy->isPointerTy())
- VecTy = VectorType::get(DL->getIntPtrType(EltTy),
- VecTy->getVectorNumElements());
+ VecTy =
+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {VecTy, PtrTy};
Type *EltTy = VecTy->getVectorElementType();
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
- const DataLayout *DL = getDataLayout();
- unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
// Skip illegal vector types.
if (SubVecSize != 64 && SubVecSize != 128)
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
- Type *IntTy = DL->getIntPtrType(EltTy);
+ Type *IntTy = DL.getIntPtrType(EltTy);
unsigned NumOpElts =
dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// AArch64 has five basic addressing modes:
// reg
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
if (Ty->isSized()) {
- uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
return false;
}
-int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty,
+int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
- if (isLegalAddressingMode(AM, Ty, AS))
+ if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
-static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
+static SDValue tryCombineLongOpWithDup(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
+ bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
+ SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
+ SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
return SDValue();
}
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS, RHS);
+ // N could either be an intrinsic or a sabsdiff/uabsdiff node.
+ if (IsIntrinsic)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), LHS, RHS);
+ else
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_sabd:
+ return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_uabd:
+ return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
- return tryCombineLongOpWithDup(IID, N, DCI, DAG);
+ return tryCombineLongOpWithDup(N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
+ N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
SDNode *ABDNode = N->getOperand(0).getNode();
- unsigned IID = getIntrinsicID(ABDNode);
- if (IID == Intrinsic::aarch64_neon_sabd ||
- IID == Intrinsic::aarch64_neon_uabd) {
- SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
- }
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+ NewABD);
}
// This is effectively a custom type legalization for AArch64.
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
return Ty->isArrayTy();
}
+
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+ EVT) const {
+ return false;
+}