X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64ISelLowering.cpp;h=6bddf46da1915fdf6bbb09ed6d1f85a7bf224de8;hb=97f8f69a7fe4d28743c92bdb0daf169558a3e3ba;hp=275ae6e5c4212ce700d42cc4ca2fcf688e9605d0;hpb=d8e31c73cd5c4c2ff09bdd5aa6ade3a2d2c6fe71;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 275ae6e5c42..6bddf46da19 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64ISelLowering.h" +#include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64PerfectShuffle.h" #include "AArch64Subtarget.h" @@ -386,6 +387,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM) setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } + // Make floating-point constants legal for the large code model, so they don't + // become loads from the constant pool. + if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + } + // AArch64 does not have floating-point extending loads, i1 sign-extending // load, floating-point truncating stores, or v2i32->v2i16 truncating store. setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); @@ -1642,7 +1650,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0); @@ -2106,7 +2114,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( unsigned ArgSize = VA.getValVT().getSizeInBits() / 8; uint32_t BEAlign = 0; - if (ArgSize < 8 && !Subtarget->isLittleEndian()) + if (!Subtarget->isLittleEndian() && ArgSize < 8 && + !Ins[i].Flags.isInConsecutiveRegs()) BEAlign = 8 - ArgSize; int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); @@ -2349,7 +2358,9 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // cannot rely on the linker replacing the tail call with a return. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); - if (GV->hasExternalWeakLinkage()) + const Triple TT(getTargetMachine().getTargetTriple()); + if (GV->hasExternalWeakLinkage() && + (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) return false; } @@ -2660,7 +2671,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8 : VA.getValVT().getSizeInBits(); OpSize = (OpSize + 7) / 8; - if (!Subtarget->isLittleEndian() && !Flags.isByVal()) { + if (!Subtarget->isLittleEndian() && !Flags.isByVal() && + !Flags.isInConsecutiveRegs()) { if (OpSize < 8) BEAlign = 8 - OpSize; } @@ -4615,19 +4627,21 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, // The extraction can just take the second half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, - DAG.getIntPtrConstant(NumSrcElts)); + DAG.getConstant(NumSrcElts, MVT::i64)); Src.WindowBase = -NumSrcElts; } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half - Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, - Src.ShuffleVec, DAG.getIntPtrConstant(0)); + Src.ShuffleVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getConstant(0, MVT::i64)); } else { // An actual VEXT is needed - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, - Src.ShuffleVec, DAG.getIntPtrConstant(0)); + SDValue VEXTSrc1 = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getConstant(0, MVT::i64)); SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, - DAG.getIntPtrConstant(NumSrcElts)); + DAG.getConstant(NumSrcElts, MVT::i64)); unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1); Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, @@ -6948,7 +6962,8 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, return SDValue(); } -static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { // First try to optimize away the conversion when it's conditionally from // a constant. Vectors only. SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG); @@ -6967,7 +6982,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. // This eliminates an "integer-to-vector-move UOP and improve throughput. SDValue N0 = N->getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); @@ -7756,9 +7771,9 @@ static SDValue performExtendCombine(SDNode *N, EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, MVT::i64)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64)); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); @@ -7880,9 +7895,9 @@ static SDValue performSTORECombine(SDNode *N, EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, MVT::i64)); SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getIntPtrConstant(NumElts)); + DAG.getConstant(NumElts, MVT::i64)); SDValue BasePtr = S->getBasePtr(); SDValue NewST1 = DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), @@ -8478,6 +8493,12 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { // largest real NEON comparison is 64-bits per lane, which means the result is // at most 32-bits and an illegal vector. Just bail out for now. EVT SrcVT = N0.getOperand(0).getValueType(); + + // Don't try to do this optimization when the setcc itself has i1 operands. + // There are no legal vectors of i1, so this would be pointless. + if (SrcVT == MVT::i1) + return SDValue(); + int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits(); if (!ResVT.isVector() || NumMaskElts == 0) return SDValue(); @@ -8518,7 +8539,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performMulCombine(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - return performIntToFpCombine(N, DAG); + return performIntToFpCombine(N, DAG, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: @@ -8696,13 +8717,12 @@ bool AArch64TargetLowering::getPostIndexedAddressParts( static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { - if (N->getValueType(0) != MVT::i16) - return; - SDLoc DL(N); SDValue Op = N->getOperand(0); - assert(Op.getValueType() == MVT::f16 && - "Inconsistent bitcast? Only 16-bit types should be i16 or f16"); + + if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16) + return; + Op = SDValue( DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, DAG.getUNDEF(MVT::i32), Op, @@ -8732,6 +8752,12 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const { return true; } +bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal if there are three or more FDIVs. + return NumUsers > 2; +} + TargetLoweringBase::LegalizeTypeAction AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { MVT SVT = VT.getSimpleVT(); @@ -8836,3 +8862,8 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Val, Stxr->getFunctionType()->getParamType(0)), Addr); } + +bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + return Ty->isArrayTy(); +}