setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
- // [SU][MIN|MAX] are available for all NEON types apart from i64.
+ // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
+ // i64.
if (!VT.isFloatingPoint() &&
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
- for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
+ ISD::SABSDIFF, ISD::UABSDIFF})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
if (Subtarget->isLittleEndian()) {
addTypeForNEON(VT, MVT::v4i32);
}
-EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
}
-MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
+MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT) const {
return MVT::i64;
}
.getValue(1);
}
+/// \defgroup AArch64CCMP CMP;CCMP matching
+///
+/// These functions deal with the formation of CMP;CCMP;... sequences.
+/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
+/// a comparison. They set the NZCV flags to a predefined value if their
+/// predicate is false. This allows to express arbitrary conjunctions, for
+/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
+/// expressed as:
+/// cmp A
+/// ccmp B, inv(CB), CA
+/// check for CB flags
+///
+/// In general we can create code for arbitrary "... (and (and A B) C)"
+/// sequences. We can also implement some "or" expressions, because "(or A B)"
+/// is equivalent to "not (and (not A) (not B))" and we can implement some
+/// negation operations:
+/// We can negate the results of a single comparison by inverting the flags
+/// used when the predicate fails and inverting the flags tested in the next
+/// instruction; We can also negate the results of the whole previous
+/// conditional compare sequence by inverting the flags tested in the next
+/// instruction. However there is no way to negate the result of a partial
+/// sequence.
+///
+/// Therefore on encountering an "or" expression we can negate the subtree on
+/// one side and have to be able to push the negate to the leafs of the subtree
+/// on the other side (see also the comments in code). As complete example:
+/// "or (or (setCA (cmp A)) (setCB (cmp B)))
+/// (and (setCC (cmp C)) (setCD (cmp D)))"
+/// is transformed to
+/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
+/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
+/// and implemented as:
+/// cmp C
+/// ccmp D, inv(CD), CC
+/// ccmp A, CA, inv(CD)
+/// ccmp B, CB, inv(CA)
+/// check for CB flags
+/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
+/// by conditional compare sequences.
+/// @{
+
+/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
SDValue Condition, unsigned NZCV,
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
-static bool isConjunctionDisjunctionTree(const SDValue Val, unsigned Depth) {
+/// CanPushNegate is set to true if we can push a negate operation through
+/// the tree in a was that we are left with AND operations and negate operations
+/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
+/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
+/// brought into such a form.
+static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
+ unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
- if (Val->getOpcode() == ISD::SETCC)
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ CanPushNegate = true;
return true;
+ }
// Protect against stack overflow.
- if (Depth > 1000)
+ if (Depth > 15)
return false;
- if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) {
+ if (Opcode == ISD::AND || Opcode == ISD::OR) {
SDValue O0 = Val->getOperand(0);
SDValue O1 = Val->getOperand(1);
- return isConjunctionDisjunctionTree(O0, Depth+1) &&
- isConjunctionDisjunctionTree(O1, Depth+1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
+ return false;
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
+ return false;
+ // We cannot push a negate through an AND operation (it would become an OR),
+ // we can however change a (not (or x y)) to (and (not x) (not y)) if we can
+ // push the negate through the x/y subtrees.
+ CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
+ return true;
}
return false;
}
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
-/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and
-/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0
-/// and setting flags to inversed(cond_1) otherwise.
-/// This recursive function produces DAG nodes that produce condition flags
-/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC)
-/// by testing the result for the condition set to @p OutCC. If @p Negate is
-/// set the opposite truth value is produced. If @p CCOp and @p Condition are
-/// given then conditional comparison are created so that false is reported
-/// when they are false.
-static SDValue emitConjunctionDisjunctionTree(
- SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate,
- SDValue CCOp = SDValue(), AArch64CC::CondCode Condition = AArch64CC::AL) {
- assert(isConjunctionDisjunctionTree(Val, 0));
- // We're at a tree leaf, produce a c?f?cmp.
+/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
+/// Tries to transform the given i1 producing node @p Val to a series compare
+/// and conditional compare operations. @returns an NZCV flags producing node
+/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
+/// transformation was not possible.
+/// On recursive invocations @p PushNegate may be set to true to have negation
+/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
+/// for the comparisons in the current subtree; @p Depth limits the search
+/// depth to avoid stack overflow.
+static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
+ AArch64CC::CondCode &OutCC, bool PushNegate = false,
+ SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
+ unsigned Depth = 0) {
+ // We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
SDValue LHS = Val->getOperand(0);
SDValue RHS = Val->getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
bool isInteger = LHS.getValueType().isInteger();
- if (Negate)
+ if (PushNegate)
CC = getSetCCInverse(CC, isInteger);
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (!CCOp.getNode())
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
else {
- SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC);
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
// Note that we want the inverse of ExtraCC, so NZCV is not inversed.
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
NZCV, DL, DAG);
}
CCOp = ExtraCmp;
- Condition = AArch64CC::getInvertedCondCode(ExtraCC);
+ Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
OutCC = AArch64CC::getInvertedCondCode(OutCC);
}
}
if (!CCOp.getNode())
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
- SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC);
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
DAG);
- }
+ } else if (Opcode != ISD::AND && Opcode != ISD::OR)
+ return SDValue();
+
+ assert((Opcode == ISD::OR || !PushNegate)
+ && "Can only push negate through OR operation");
- // Construct comparison sequence for the left hand side.
+ // Check if both sides can be transformed.
SDValue LHS = Val->getOperand(0);
SDValue RHS = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
+ return SDValue();
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
+ return SDValue();
- // We can only implement AND-like behaviour here, but negation is free. So we
- // use (not (and (not x) (not y))) to implement (or x y).
- bool isOr = Val->getOpcode() == ISD::OR;
- assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR.");
- Negate ^= isOr;
+ // Do we need to negate our operands?
+ bool NegateOperands = Opcode == ISD::OR;
+ // We can negate the results of all previous operations by inverting the
+ // predicate flags giving us a free negation for one side. For the other side
+ // we need to be able to push the negation to the leafs of the tree.
+ if (NegateOperands) {
+ if (!CanPushNegateL && !CanPushNegateR)
+ return SDValue();
+ // Order the side where we can push the negate through to LHS.
+ if (!CanPushNegateL && CanPushNegateR) {
+ std::swap(LHS, RHS);
+ CanPushNegateL = true;
+ }
+ }
+ // Emit RHS. If we want to negate the tree we only need to push a negate
+ // through if we are already in a PushNegate case, otherwise we can negate
+ // the "flags to test" afterwards.
AArch64CC::CondCode RHSCC;
- SDValue CmpR =
- emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, isOr, CCOp, Condition);
- SDValue CmpL =
- emitConjunctionDisjunctionTree(DAG, LHS, OutCC, isOr, CmpR, RHSCC);
- if (Negate)
+ SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
+ CCOp, Predicate, Depth+1);
+ if (NegateOperands && !PushNegate)
+ RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+ // Emit LHS. We must push the negate through if we need to negate it.
+ SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
+ CmpR, RHSCC, Depth+1);
+ // If we transformed an OR to and AND then we have to negate the result
+ // (or absorb a PushNegate resulting in a double negation).
+ if (Opcode == ISD::OR && !PushNegate)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
+/// @}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
- SDValue Cmp;
- AArch64CC::CondCode AArch64CC;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
}
}
}
+ SDValue Cmp;
+ AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
- goto CreateCCNode;
}
}
- if ((RHSC->isNullValue() || RHSC->isOne()) &&
- isConjunctionDisjunctionTree(LHS, 0)) {
- bool Negate = (CC == ISD::SETNE) ^ RHSC->isNullValue();
- Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate);
- goto CreateCCNode;
+ if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+ if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
+ if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+ AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
+ }
}
}
- Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
-
-CreateCCNode:
+ if (!Cmp) {
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
+ }
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
- // The the values aren't constants, this isn't the pattern we're looking for.
+ // The values aren't constants, this isn't the pattern we're looking for.
if (!CFVal || !CTVal)
return Op;
const char *LibcallName =
(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- SDNode *Elt = N->getOperand(i).getNode();
+ for (const SDValue &Elt : N->op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
unsigned HalfSize = EltSize / 2;
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
+ /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// non-compliant manner for larger structs.
- EVT PtrTy = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
- SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue ArgValue;
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SmallVector<SDValue, 8> MemOps;
if (GPRSaveSize != 0) {
GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
- SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 8), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, DL, getPointerTy()));
+ FIN =
+ DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
if (FPRSaveSize != 0) {
FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 16), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, DL, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+ DAG.getConstant(16, DL, PtrVT));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- const Triple TT(getTargetMachine().getTargetTriple());
+ const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Outs[i].VT;
// Get type of the original argument.
- EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
+ EVT ActualVT = getValueType(DAG.getDataLayout(),
+ CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
true),
DL);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
+ getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
- PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
if (IsTailCall) {
Offset = Offset + FPDiff;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
- DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(FI);
// Make sure any stack arguments overlapping with where we're storing
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
- DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(LocMemOffset);
}
const GlobalValue *GV = G->getGlobal();
bool InternalLinkage = GV->hasInternalLinkage();
if (InternalLinkage)
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
else {
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
- AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ Callee =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
}
} else if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- Callee =
- DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
// We don't usually want to end the call-sequence here because we would tidy
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
SDLoc DL(Op);
- MVT PtrVT = getPointerTy();
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
/// the sequence is produced as per above.
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
}
SDValue TPOff;
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
EltVT = MVT::i64;
VecVT = MVT::v2i64;
- // We want to materialize a mask with the the high bit set, but the AdvSIMD
+ // We want to materialize a mask with the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
EltMask = 0;
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
- SDValue FR =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
+ getPointerTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
- SDValue Stack =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), false, false, 8));
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
- GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(8, DL, getPointerTy()));
+ GRTopAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
- GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
- GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
- DAG.getConstant(GPRSize, DL, getPointerTy()));
+ GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
+ GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
+ DAG.getConstant(GPRSize, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, 8), false, false, 8));
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
- VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(16, DL, getPointerTy()));
+ VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(16, DL, PtrVT));
- VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
- VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
- DAG.getConstant(FPRSize, DL, getPointerTy()));
+ VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
+ VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
+ DAG.getConstant(FPRSize, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, 16), false, false, 8));
}
// int __gr_offs at offset 24
- SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(24, DL, getPointerTy()));
+ SDValue GROffsAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL,
DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, 24), false,
false, 4));
// int __vr_offs at offset 28
- SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(28, DL, getPointerTy()));
+ SDValue VROffsAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL,
DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, 28), false,
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
unsigned Align = Op.getConstantOperandVal(3);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
- MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V),
+ false, false, false, 0);
Chain = VAList.getValue(1);
if (Align > 8) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
- VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(Align - 1, DL, getPointerTy()));
- VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
- DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
+ VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Align - 1, DL, PtrVT));
+ VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+ DAG.getConstant(-(int64_t)Align, DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
- uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
}
// Increment the pointer, VAList, to the next vaarg
- SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(ArgSize, DL, getPointerTy()));
+ SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(ArgSize, DL, PtrVT));
// Store the incremented VAList to the legalized pointer
SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
false, false, 0);
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
.Default(0);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
-AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
unsigned Size = Constraint.size();
if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
- const std::string Reg =
- std::string(&Constraint[2], &Constraint[Size - 1]);
- int RegNo = atoi(Reg.c_str());
- if (RegNo >= 0 && RegNo <= 31) {
+ int RegNo;
+ bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
+ if (!Failed && RegNo >= 0 && RegNo <= 31) {
// v0 - v31 are aliases of q0 - q31.
// By default we'll emit v0-v31 for this unless there's a modifier where
// we'll emit the correct register as well.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
+ auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
return true;
const TargetOptions &Options = getTargetMachine().Options;
- EVT VT = getValueType(User->getOperand(0)->getType());
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ EVT VT = getValueType(DL, User->getOperand(0)->getType());
if (isFMAFasterThanFMulAndFAdd(VT) &&
isOperationLegalOrCustom(ISD::FMA, VT) &&
break;
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
+ auto &DL = Ext->getModule()->getDataLayout();
std::advance(GTI, U.getOperandNo());
Type *IdxTy = *GTI;
// This extension will end up with a shift because of the scaling factor.
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
- countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+ countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
return NumBits == 32 || NumBits == 64;
}
+/// \brief Lower an interleaved load into a ldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
+/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
+///
+/// Into:
+/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
+/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
+/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
+bool AArch64TargetLowering::lowerInterleavedLoad(
+ LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices, unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+ assert(!Shuffles.empty() && "Empty shufflevector input");
+ assert(Shuffles.size() == Indices.size() &&
+ "Unmatched number of shufflevectors and indices");
+
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+
+ VectorType *VecTy = Shuffles[0]->getType();
+ unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+
+ // Skip illegal vector types.
+ if (VecSize != 64 && VecSize != 128)
+ return false;
+
+ // A pointer vector can not be the return type of the ldN intrinsics. Need to
+ // load integer vectors first and then convert to pointer vectors.
+ Type *EltTy = VecTy->getVectorElementType();
+ if (EltTy->isPointerTy())
+ VecTy =
+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+
+ Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *Tys[2] = {VecTy, PtrTy};
+ static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
+ Intrinsic::aarch64_neon_ld3,
+ Intrinsic::aarch64_neon_ld4};
+ Function *LdNFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+
+ IRBuilder<> Builder(LI);
+ Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+
+ CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SVI = Shuffles[i];
+ unsigned Index = Indices[i];
+
+ Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+
+ SVI->replaceAllUsesWith(SubVec);
+ }
+
+ return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned NumElts) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < NumElts; i++)
+ Mask.push_back(Builder.getInt32(Start + i));
+
+ return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a stN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+/// Into:
+/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// st3 instruction in CodeGen.
+bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
+ ShuffleVectorInst *SVI,
+ unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+
+ VectorType *VecTy = SVI->getType();
+ assert(VecTy->getVectorNumElements() % Factor == 0 &&
+ "Invalid interleaved store");
+
+ unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+ Type *EltTy = VecTy->getVectorElementType();
+ VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+
+ // Skip illegal vector types.
+ if (SubVecSize != 64 && SubVecSize != 128)
+ return false;
+
+ Value *Op0 = SVI->getOperand(0);
+ Value *Op1 = SVI->getOperand(1);
+ IRBuilder<> Builder(SI);
+
+ // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+ // vectors to integer vectors.
+ if (EltTy->isPointerTy()) {
+ Type *IntTy = DL.getIntPtrType(EltTy);
+ unsigned NumOpElts =
+ dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
+
+ // Convert to the corresponding integer vector.
+ Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
+ Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+ Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+ SubVecTy = VectorType::get(IntTy, NumSubElts);
+ }
+
+ Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
+ Type *Tys[2] = {SubVecTy, PtrTy};
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
+ Intrinsic::aarch64_neon_st3,
+ Intrinsic::aarch64_neon_st4};
+ Function *StNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ SmallVector<Value *, 5> Ops;
+
+ // Split the shufflevector operands into sub vectors for the new stN call.
+ for (unsigned i = 0; i < Factor; i++)
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+ Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
+ Builder.CreateCall(StNFunc, Ops);
+ return true;
+}
+
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
unsigned AlignCheck) {
return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// AArch64 has five basic addressing modes:
// reg
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
if (Ty->isSized()) {
- uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
return false;
}
-int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty,
+int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
- if (isLegalAddressingMode(AM, Ty, AS))
+ if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
-static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
+static SDValue tryCombineLongOpWithDup(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
+ bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
+ SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
+ SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
return SDValue();
}
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS, RHS);
+ // N could either be an intrinsic or a sabsdiff/uabsdiff node.
+ if (IsIntrinsic)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), LHS, RHS);
+ else
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_sabd:
+ return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_uabd:
+ return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
- return tryCombineLongOpWithDup(IID, N, DCI, DAG);
+ return tryCombineLongOpWithDup(N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
+ N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
SDNode *ABDNode = N->getOperand(0).getNode();
- unsigned IID = getIntrinsicID(ABDNode);
- if (IID == Intrinsic::aarch64_neon_sabd ||
- IID == Intrinsic::aarch64_neon_uabd) {
- SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
- }
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+ NewABD);
}
// This is effectively a custom type legalization for AArch64.