setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
- if (VT.isInteger()) {
- setOperationAction(ISD::SABSDIFF, VT, Legal);
- setOperationAction(ISD::UABSDIFF, VT, Legal);
- }
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
-
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
- setOperationAction(ISD::UDIV, MVT::i32, Custom);
-
- setOperationAction(ISD::UDIV, MVT::i64, Custom);
- }
-
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+/// + "extern __thread" declaration.
+/// + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+/// movw rT1, :lower16:_var$non_lazy_ptr
+/// movt rT1, :upper16:_var$non_lazy_ptr
+/// ldr r0, [rT1]
+/// ldr rT2, [r0]
+/// blx rT2
+/// [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ SDLoc DL(Op);
+
+ // First step is to get the address of the actua global symbol. This is where
+ // the TLS descriptor lives.
+ SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+ // The first entry in the descriptor is a function pointer that we must call
+ // to obtain the address of the variable.
+ SDValue Chain = DAG.getEntryNode();
+ SDValue FuncTLVGet =
+ DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, true, true, 4);
+ Chain = FuncTLVGet.getValue(1);
+
+ MachineFunction &F = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+ // silly).
+ auto TRI =
+ getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+ auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+ // Finally, we can make the call. This is just a degenerate version of a
+ // normal AArch64 call node: r0 takes the address of the descriptor, and
+ // returns the address of the variable in this thread.
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+ Chain =
+ DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+ DAG.getRegisterMask(Mask), Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->isTargetDarwin())
+ return LowerGlobalTLSAddressDarwin(Op, DAG);
+
// TODO: implement the "local dynamic" model
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+ assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
// created by LowerConstantFP().
SDValue BitcastOp = Op->getOperand(0);
- if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
- SDValue MoveOp = BitcastOp->getOperand(0);
- if (MoveOp->getOpcode() == ISD::TargetConstant &&
- cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
- return true;
- }
- }
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(BitcastOp->getOperand(0)))
+ return true;
}
return false;
}
Results.push_back(Read.getOperand(0));
}
+/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
+/// When \p DstVT, the destination type of \p BC, is on the vector
+/// register bank and the source of bitcast, \p Op, operates on the same bank,
+/// it might be possible to combine them, such that everything stays on the
+/// vector register bank.
+/// \p return The node that would replace \p BT, if the combine
+/// is possible.
+static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
+ SelectionDAG &DAG) {
+ SDValue Op = BC->getOperand(0);
+ EVT DstVT = BC->getValueType(0);
+
+ // The only vector instruction that can produce a scalar (remember,
+ // since the bitcast was about to be turned into VMOVDRR, the source
+ // type is i64) from a vector is EXTRACT_VECTOR_ELT.
+ // Moreover, we can do this combine only if there is one use.
+ // Finally, if the destination type is not a vector, there is not
+ // much point on forcing everything on the vector bank.
+ if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !Op.hasOneUse())
+ return SDValue();
+
+ // If the index is not constant, we will introduce an additional
+ // multiply that will stick.
+ // Give up in that case.
+ ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Index)
+ return SDValue();
+ unsigned DstNumElt = DstVT.getVectorNumElements();
+
+ // Compute the new index.
+ const APInt &APIntIndex = Index->getAPIntValue();
+ APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
+ NewIndex *= APIntIndex;
+ // Check if the new constant index fits into i32.
+ if (NewIndex.getBitWidth() > 32)
+ return SDValue();
+
+ // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
+ // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
+ SDLoc dl(Op);
+ SDValue ExtractSrc = Op.getOperand(0);
+ EVT VecVT = EVT::getVectorVT(
+ *DAG.getContext(), DstVT.getScalarType(),
+ ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
+ SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
+ DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
// Turn i64->f64 into VMOVDRR.
if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
+ // if we can combine the bitcast with its source.
+ if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
+ return Val;
+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
"Unknown shift to lower!");
// We only lower SRA, SRL of 1 here, all others use generic lowering.
- if (!isa<ConstantSDNode>(N->getOperand(1)) ||
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ if (!isOneConstant(N->getOperand(1)))
return SDValue();
// If we are in thumb mode, we don't have RRX.
// just use VDUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the VDUP from lane instructions only have
// constant-index forms.
+ ConstantSDNode *constIndex;
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Value->getOperand(1))) {
+ (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
// such that the register coalescer will remove unnecessary copies.
if (VT != Value->getOperand(0).getValueType()) {
- ConstantSDNode *constIndex;
- constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
- assert(constIndex && "The index is not a constant!");
unsigned index = constIndex->getAPIntValue().getLimitedValue() %
VT.getVectorNumElements();
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
}
SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
+ bool Signed,
SDValue &Chain) const {
EVT VT = Op.getValueType();
assert((VT == MVT::i32 || VT == MVT::i64) &&
const auto &TLI = DAG.getTargetLoweringInfo();
const char *Name = nullptr;
- Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
+ if (Signed)
+ Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
+ else
+ Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
return LowerCallTo(CLI).first;
}
-SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
assert(Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV");
SDLoc dl(Op);
SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
DAG.getEntryNode(), Op.getOperand(1));
- return LowerWindowsDIVLibCall(Op, DAG, DBZCHK);
+ return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
}
void ARMTargetLowering::ExpandDIV_Windows(
- SDValue Op, SelectionDAG &DAG,
+ SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const {
const auto &DL = DAG.getDataLayout();
const auto &TLI = DAG.getTargetLoweringInfo();
SDValue DBZCHK =
DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
- SDValue Result = LowerWindowsDIVLibCall(Op, DAG, DBZCHK);
+ SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::UDIV:
- if (Subtarget->isTargetWindows())
- return LowerDIV_Windows(Op, DAG);
- return LowerUDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
case ISD::UDIV:
+ case ISD::SDIV:
assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
- return ExpandDIV_Windows(SDValue(N, 0), DAG, Results);
+ return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
+ Results);
}
if (Res.getNode())
Results.push_back(Res);
}
}
- BB->addSuccessor(DispatchBB);
+ BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+ BB->normalizeSuccProbs();
// Find the invoke call and mark all of the callee-saved registers as
// 'implicit defined' so that they're spilled. This prevents code from
// Helper function that checks if N is a null or all ones constant.
static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- if (!C)
- return false;
- return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+ return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
}
// Return true if N is conditionally 0 or all ones.
// their position in "to" (Rd).
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
assert(N->getOpcode() == ARMISD::BFI);
-
+
SDValue From = N->getOperand(1);
ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
if (BitsProperlyConcatenate(NewToMask, ToMask) &&
BitsProperlyConcatenate(NewFromMask, FromMask))
return V;
-
+
// We've seen a write to some bits, so track it.
CombinedToMask |= NewToMask;
// Keep going...
SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
assert(From1 == From2);
(void)From2;
-
+
// First, unlink CombineBFI.
DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
// Then create a new BFI, combining the two together.
// Don't do anything for most intrinsics.
break;
- case Intrinsic::arm_neon_vabds:
- if (!N->getValueType(0).isInteger())
- return SDValue();
- return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::arm_neon_vabdu:
- return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
-
// Vector shifts: check for immediate versions and lower them.
// Note: This is done during DAG combining instead of DAG legalizing because
// the build_vectors for 64-bit vector element shift counts are generally
SDValue CmpZ = CMOV->getOperand(4);
// The compare must be against zero.
- SDValue Zero = CmpZ->getOperand(1);
- if (!isa<ConstantSDNode>(Zero.getNode()) ||
- !cast<ConstantSDNode>(Zero.getNode())->isNullValue())
+ if (!isNullConstant(CmpZ->getOperand(1)))
return SDValue();
assert(CmpZ->getOpcode() == ARMISD::CMPZ);
return;
case 'J':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a constant between -255 and -1, for negated ADD
// immediates. This can be used in GCC with an "n" modifier that
// prints the negated value, for use with SUB instructions. It is
return;
case 'M':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a multiple of 4 between 0 and 1020, for
// ADD sp + immediate.
if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLSC
+ return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
: AtomicExpansionKind::None;
}
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
// Skip if we do not have NEON and skip illegal vector types and vector types
// with i64/f64 elements (vldN doesn't support i64/f64 elements).
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
// Skip if we do not have NEON and skip illegal vector types and vector types
// with i64/f64 elements (vstN doesn't support i64/f64 elements).