addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO())
- return new TargetLoweringObjectFileMachO();
- if (TT.isOSWindows())
- return new TargetLoweringObjectFileCOFF();
- return new ARMElfTargetObjectFile();
-}
-
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
Itins = TM.getSubtargetImpl()->getInstrItineraryData();
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
}
- // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
- if (!Subtarget->hasV8Ops()) {
+ // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
+ if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
}
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ if (Subtarget->getTargetTriple().isiOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
}
}
- // ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasV8Ops()) {
- static MVT RoundingTypes[] = {MVT::f32, MVT::f64};
- for (const auto Ty : RoundingTypes) {
- setOperationAction(ISD::FFLOOR, Ty, Legal);
- setOperationAction(ISD::FCEIL, Ty, Legal);
- setOperationAction(ISD::FROUND, Ty, Legal);
- setOperationAction(ISD::FTRUNC, Ty, Legal);
- setOperationAction(ISD::FNEARBYINT, Ty, Legal);
- setOperationAction(ISD::FRINT, Ty, Legal);
+ // FP-ARMv8 implements a lot of rounding-like FP operations.
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ if (!Subtarget->isFPOnlySP()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
}
// We have target-specific dag combine patterns for the following nodes:
// True if this byval aggregate will be split between registers
// and memory.
unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
- unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
if (CurByValIdx < ByValArgsCount) {
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
Copy = *UI;
- else
+ else {
+ // We are at the top of this chain.
+ // If the copy has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
// First CopyToReg
TCChain = UseChain;
+ }
}
} else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
TCChain = Copy->getOperand(0);
} else {
return false;
if (Flags.isByVal()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
Flags.getByValSize(),
ExtraArgRegsSize, ExtraArgRegsSaveSize);
else if (RegVT == MVT::v2f64)
RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = AFI->isThumb1OnlyFunction() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
int FrameIndex = StoreByValRegs(
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isPosZero();
}
+ } else if (Op->getOpcode() == ISD::BITCAST &&
+ Op->getValueType(0) == MVT::f64) {
+ // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+ // created by LowerConstantFP().
+ SDValue BitcastOp = Op->getOperand(0);
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
+ SDValue MoveOp = BitcastOp->getOperand(0);
+ if (MoveOp->getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
+ return true;
+ }
+ }
}
return false;
}
// select c, a, b
// We only do this in unsafe-fp-math, because signed zeros and NaNs are
// handled differently than the original code sequence.
- if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
- RHS == FalseVal) {
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
- return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ if (getTargetMachine().Options.UnsafeFPMath) {
+ if (LHS == TrueVal && RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ } else if (LHS == FalseVal && RHS == TrueVal) {
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
}
bool swpCmpOps = false;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
// Create stack object for sret.
const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC = isThumb ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
- .addFrameIndex(FI)
- .addImm(36)); // &jbuf[1] :: pc
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36); // &jbuf[1] :: pc
AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg5, RegState::Kill)
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC = Subtarget->isThumb() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
+ : &ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (SeenMBBs.insert(CurMBB))
+ if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
// Select the correct opcode and register class for unit size load/store
bool IsNeon = UnitSize >= 8;
- TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
- : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
if (IsNeon)
- VecTRC = UnitSize == 16
- ? (const TargetRegisterClass *)&ARM::DPairRegClass
- : UnitSize == 8
- ? (const TargetRegisterClass *)&ARM::DPRRegClass
- : nullptr;
+ VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
+ : UnitSize == 8 ? &ARM::DPRRegClass
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned NewRsbDstReg =
+ MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- if (!MI->hasPostISelHook()) {
- assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
- "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
- return;
- }
-
const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
return SDValue();
}
-/// PerformSTORECombine - Target-specific dag combine xforms for
-/// ISD::STORE.
-static SDValue PerformSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- StoreSDNode *St = cast<StoreSDNode>(N);
- if (St->isVolatile())
- return SDValue();
-
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
- // pack all of the elements in one place. Next, store to memory in fewer
- // chunks.
- SDValue StVal = St->getValue();
- EVT VT = StVal.getValueType();
- if (St->isTruncatingStore() && VT.isVector()) {
- SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT StVT = St->getMemoryVT();
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
- unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
-
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
-
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
-
- unsigned SizeRatio = FromEltSz / ToEltSz;
- assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
- NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDLoc DL(St);
- SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
- DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
- StoreType = Tp;
- }
- // Didn't find a legal store type.
- if (!TLI.isTypeLegal(StoreType))
- return SDValue();
-
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
- TLI.getPointerTy());
- SDValue BasePtr = St->getBasePtr();
-
- // Perform one or more big stores into memory.
- unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
- for (unsigned I = 0; I < E; I++) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(I));
- SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- Increment);
- Chains.push_back(Ch);
- }
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- }
-
- if (!ISD::isNormalStore(St))
- return SDValue();
-
- // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
- // ARM stores of arguments in the same cache line.
- if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse()) {
- SelectionDAG &DAG = DCI.DAG;
- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
- SDLoc DL(St);
- SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
- BasePtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
-
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
- DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL,
- StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(),
- std::min(4U, St->getAlignment() / 2));
- }
-
- if (StVal.getValueType() != MVT::i64 ||
- StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
-
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(StVal);
- SDValue IntVec = StVal.getOperand(0);
- EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
- IntVec.getValueType().getVectorNumElements());
- SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
- SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- Vec, StVal.getOperand(1));
- dl = SDLoc(N);
- SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
- // Make the DAGCombiner fold the bitcasts.
- DCI.AddToWorklist(Vec.getNode());
- DCI.AddToWorklist(ExtElt.getNode());
- DCI.AddToWorklist(V.getNode());
- return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment(),
- St->getAAInfo());
-}
-
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
/// are normal, non-volatile loads. If so, it is profitable to bitcast an
/// i64 vector to have f64 elements, since the value can then be loaded
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
/// NEON load/store intrinsics to merge base address updates.
+/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
- return SDValue();
-
SelectionDAG &DAG = DCI.DAG;
bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
}
// Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
EVT Tys[6];
unsigned NumResultVecs = (isLoad ? NumVecs : 0);
unsigned n;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+
+ // Then, gather the new node's operands.
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
Ops.push_back(N->getOperand(i));
}
- MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ MemSDNode *MemInt = cast<MemSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
Ops, MemInt->getMemoryVT(),
MemInt->getMemOperand());
return SDValue();
}
+static SDValue PerformVLDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ return CombineBaseUpdate(N, DCI);
+}
+
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDLoc DL(St);
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ if (!ISD::isNormalStore(St))
+ return SDValue();
+
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse()) {
+ SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+ SDLoc DL(St);
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() == MVT::i64 &&
+ StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(StVal);
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = SDLoc(N);
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ }
+
+ return SDValue();
+}
+
// isConstVecPow2 - Return true if each vector element is a power of 2, all
// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
default: break;
}
break;
return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
+ if (Subtarget->isThumb1Only())
+ return RCPair(0U, &ARM::tGPRRegClass);
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::Other)
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy());
- Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
}
// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
-void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
if (!getInsertFencesForAtomic())
- return;
+ return nullptr;
switch (Ord) {
case NotAtomic:
llvm_unreachable("Invalid fence: unordered/non-atomic");
case Monotonic:
case Acquire:
- return; // Nothing to do
+ return nullptr; // Nothing to do
case SequentiallyConsistent:
if (!IsStore)
- return; // Nothing to do
- /*FALLTHROUGH*/
+ return nullptr; // Nothing to do
+ /*FALLTHROUGH*/
case Release:
case AcquireRelease:
if (Subtarget->isSwift())
- makeDMB(Builder, ARM_MB::ISHST);
+ return makeDMB(Builder, ARM_MB::ISHST);
// FIXME: add a comment with a link to documentation justifying this.
else
- makeDMB(Builder, ARM_MB::ISH);
- return;
+ return makeDMB(Builder, ARM_MB::ISH);
}
+ llvm_unreachable("Unknown fence ordering in emitLeadingFence");
}
-void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
if (!getInsertFencesForAtomic())
- return;
+ return nullptr;
switch (Ord) {
case NotAtomic:
llvm_unreachable("Invalid fence: unordered/not-atomic");
case Monotonic:
case Release:
- return; // Nothing to do
+ return nullptr; // Nothing to do
case Acquire:
case AcquireRelease:
- case SequentiallyConsistent:
- makeDMB(Builder, ARM_MB::ISH);
- return;
+ case SequentiallyConsistent:
+ return makeDMB(Builder, ARM_MB::ISH);
}
+ llvm_unreachable("Unknown fence ordering in emitTrailingFence");
}
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
// anything for those.
+// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
+// guarantee, see DDI0406C ARM architecture reference manual,
+// sections A8.8.72-74 LDRD)
bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
return (Size == 64) && !Subtarget->isMClass();
// This has so far only been implemented for MachO.
bool ARMTargetLowering::useLoadStackGuardNode() const {
- return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+ return Subtarget->isTargetMachO();
+}
+
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const {
+ // If we do not have NEON, vector types are not natively supported.
+ if (!Subtarget->hasNEON())
+ return false;
+
+ // Floating point values and vector values map to the same register file.
+ // Therefore, althought we could do a store extract of a vector type, this is
+ // better to leave at float as we have more freedom in the addressing mode for
+ // those.
+ if (VectorTy->isFPOrFPVectorTy())
+ return false;
+
+ // If the index is unknown at compile time, this is very expensive to lower
+ // and it is not possible to combine the store with the extract.
+ if (!isa<ConstantInt>(Idx))
+ return false;
+
+ assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ // We can do a store + vector extract on any vector that fits perfectly in a D
+ // or Q register.
+ if (BitWidth == 64 || BitWidth == 128) {
+ Cost = 0;
+ return true;
+ }
+ return false;
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,