#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO())
- return new TargetLoweringObjectFileMachO();
- if (TT.isOSWindows())
- return new TargetLoweringObjectFileCOFF();
- return new ARMElfTargetObjectFile();
-}
-
-ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
- Itins = TM.getSubtargetImpl()->getInstrItineraryData();
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
+ const ARMSubtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
+ RegInfo = Subtarget->getRegisterInfo();
+ Itins = Subtarget->getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
- setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
- MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
- MVT::v4i16, MVT::v2i16,
- MVT::v2i32};
- for (unsigned i = 0; i < 6; ++i) {
- setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
- setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
- setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
+ MVT::v2i32}) {
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
+ }
}
}
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
- computeRegisterProperties();
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// ARM does not have floating-point extending loads.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ }
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// ARM supports all 4 flavors of integer indexed load / store.
if (!Subtarget->isThumb1Only()) {
// Various VFP goodness
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
- // int <-> fp are custom expanded into bit_convert + ARMISD ops.
- if (Subtarget->hasVFP2()) {
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- }
-
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
// of the difficulty prior to coalescing of modeling operand register classes
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
-std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(MVT VT) const{
+std::pair<const TargetRegisterClass *, uint8_t>
+ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
- return TargetLowering::findRepresentativeClass(VT);
+ return TargetLowering::findRepresentativeClass(TRI, VT);
// Use DPR as representative register class for all floating point
// and vector types. Since there are 32 SPR registers and 32 DPR registers so
// the cost is 1 for both f32 and f64.
case ARMISD::RBIT: return "ARMISD::RBIT";
- case ARMISD::FTOSI: return "ARMISD::FTOSI";
- case ARMISD::FTOUI: return "ARMISD::FTOUI";
- case ARMISD::SITOF: return "ARMISD::SITOF";
- case ARMISD::UITOF: return "ARMISD::UITOF";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
return TargetLowering::getRegClassFor(VT);
}
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const {
+ if (!isa<MemIntrinsic>(CI))
+ return false;
+ MinSize = 8;
+ // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+ // cycle faster than 4-byte aligned LDM.
+ PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ return true;
+}
+
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
return ARM::createFastISel(funcInfo, libInfo);
}
-/// getMaximalGlobalOffset - Returns the maximal possible offset which can
-/// be used for loads / stores from the global.
-unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
- return (Subtarget->isThumb1Only() ? 127 : 4095);
-}
-
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
// Load are scheduled for latency even if there instruction itinerary
// is not available.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
// True if this byval aggregate will be split between registers
// and memory.
unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
- unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
if (CurByValIdx < ByValArgsCount) {
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
// Add a register mask operand representing the call-preserved registers.
if (!isTailCall) {
const uint32_t *Mask;
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
// Set isThisReturn to false if the calling convention is not one that
// allows 'returned' to be modeled in this way, so LowerCallResult does
// not try to pass 'this' straight through
isThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
-void
-ARMTargetLowering::HandleByVal(
- CCState *State, unsigned &size, unsigned Align) const {
- unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
- if (Subtarget->isAAPCS_ABI() && Align > 4) {
- unsigned AlignInRegs = Align / 4;
- unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
- for (unsigned i = 0; i < Waste; ++i)
- reg = State->AllocateReg(GPRArgRegs, 4);
- }
- if (reg != 0) {
- unsigned excess = 4 * (ARM::R4 - reg);
-
- // Special case when NSAA != SP and parameter size greater than size of
- // all remained GPR regs. In that case we can't split parameter, we must
- // send it to stack. We also must set NCRN to R4, so waste all
- // remained registers.
- const unsigned NSAAOffset = State->getNextStackOffset();
- if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
- return;
- }
+ // Byval (as with any stack) slots are always at least 4 byte aligned.
+ Align = std::max(Align, 4U);
- // First register for byval parameter is the first register that wasn't
- // allocated before this method call, so it would be "reg".
- // If parameter is small enough to be saved in range [reg, r4), then
- // the end (first after last) register would be reg + param-size-in-regs,
- // else parameter would be splitted between registers and stack,
- // end register would be r4 in this case.
- unsigned ByValRegBegin = reg;
- unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
- State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
- // Note, first register is allocated in the beginning of function already,
- // allocate remained amount of registers we need.
- for (unsigned i = reg+1; i != ByValRegEnd; ++i)
- State->AllocateReg(GPRArgRegs, 4);
- // A byval parameter that is split between registers and memory needs its
- // size truncated here.
- // In the case where the entire structure fits in registers, we set the
- // size in memory to zero.
- if (size < excess)
- size = 0;
- else
- size -= excess;
- }
+ unsigned Reg = State->AllocateReg(GPRArgRegs);
+ if (!Reg)
+ return;
+
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ Reg = State->AllocateReg(GPRArgRegs);
+
+ if (!Reg)
+ return;
+
+ unsigned Excess = 4 * (ARM::R4 - Reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
+ if (NSAAOffset != 0 && Size > Excess) {
+ while (State->AllocateReg(GPRArgRegs))
+ ;
+ return;
}
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = Reg;
+ unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs);
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ Size = std::max<int>(Size - Excess, 0);
}
+
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
// the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
// support in the assembler and linker to be used. This would need to be
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- if (GV->hasExternalWeakLinkage())
+ const Triple TT(getTargetMachine().getTargetTriple());
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
-void
-ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize)
- const {
- unsigned NumGPRs;
- if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
- unsigned RBegin, REnd;
- CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- NumGPRs = REnd - RBegin;
- } else {
- unsigned int firstUnalloced;
- firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
- sizeof(GPRArgRegs) /
- sizeof(GPRArgRegs[0]));
- NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
- }
-
- unsigned Align = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
- ArgRegsSize = NumGPRs * 4;
-
- // If parameter is split between stack and GPRs...
- if (NumGPRs && Align > 4 &&
- (ArgRegsSize < ArgSize ||
- InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
- // Add padding for part of param recovered from GPRs. For example,
- // if Align == 8, its last byte must be at address K*8 - 1.
- // We need to do it, since remained (stack) part of parameter has
- // stack alignment, and we need to "attach" "GPRs head" without gaps
- // to it:
- // Stack:
- // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
- // [ [padding] [GPRs head] ] [ Tail passed via stack ....
- //
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned Padding =
- OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
- ArgRegsSaveSize = ArgRegsSize + Padding;
- } else
- // We don't need to extend regs save size for byval parameters if they
- // are passed via GPRs only.
- ArgRegsSaveSize = ArgRegsSize;
-}
-
// The remaining GPRs hold either the beginning of variable-argument
// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const {
-
+ int ArgOffset,
+ unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex, lastRegToSaveIndex;
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- firstRegToSaveIndex = RBegin - ARM::R0;
- lastRegToSaveIndex = REnd - ARM::R0;
} else {
- firstRegToSaveIndex = CCInfo.getFirstUnallocated
- (GPRArgRegs, array_lengthof(GPRArgRegs));
- lastRegToSaveIndex = 4;
- }
-
- unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
- ArgRegsSize, ArgRegsSaveSize);
-
- // Store any by-val regs to their spots on the stack so that they may be
- // loaded by deferencing the result of formal parameter pointer or va_next.
- // Note: once stack area for byval/varargs registers
- // was initialized, it can't be initialized again.
- if (ArgRegsSaveSize) {
- unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
-
- if (Padding) {
- assert(AFI->getStoredByValParamsPadding() == 0 &&
- "The only parameter may be padded.");
- AFI->setStoredByValParamsPadding(Padding);
- }
-
- int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
- Padding +
- ByValStoreOffset -
- (int64_t)TotalArgRegsSaveSize,
- false);
- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (Padding) {
- MFI->CreateFixedObject(Padding,
- ArgOffset + ByValStoreOffset -
- (int64_t)ArgRegsSaveSize,
- false);
- }
-
- SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
- ++firstRegToSaveIndex, ++i) {
- const TargetRegisterClass *RC;
- if (AFI->isThumb1OnlyFunction())
- RC = &ARM::tGPRRegClass;
- else
- RC = &ARM::GPRRegClass;
+ unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
+ REnd = ARM::R4;
+ }
- unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
- }
+ if (REnd != RBegin)
+ ArgOffset = -4 * (ARM::R4 - RBegin);
- AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+ int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- return FrameIndex;
- } else {
- if (ArgSize == 0) {
- // We cannot allocate a zero-byte object for the first variadic argument,
- // so just make up a size.
- ArgSize = 4;
- }
- // This will point to the next argument passed via stack.
- return MFI->CreateFixedObject(
- ArgSize, ArgOffset, !ForceMutable);
+ SmallVector<SDValue, 4> MemOps;
+ const TargetRegisterClass *RC =
+ AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+
+ for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
+ unsigned VReg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
}
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ return FrameIndex;
}
// Setup stack frame, the va_list pointer will start from.
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
- int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
- CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
- 0, TotalArgRegsSaveSize);
-
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(),
+ CCInfo.getNextStackOffset(), 4);
AFI->setVarArgsFrameIndex(FrameIndex);
}
isVarArg));
SmallVector<SDValue, 16> ArgValues;
- int lastInsIndex = -1;
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
- unsigned ByValStoreOffset = 0;
- unsigned TotalArgRegsSaveSize = 0;
- unsigned ArgRegsSaveSizeMaxAlign = 4;
-
// Calculate the amount of stack space that we need to allocate to store
// byval and variadic arguments that are passed in registers.
// We need to know this before we allocate the first byval or variadic
// argument, as they will be allocated a stack slot below the CFA (Canonical
// Frame Address, the stack pointer at entry to the function).
+ unsigned ArgRegBegin = ARM::R4;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
+ break;
+
CCValAssign &VA = ArgLocs[i];
- if (VA.isMemLoc()) {
- int index = VA.getValNo();
- if (index != lastInsIndex) {
- ISD::ArgFlagsTy Flags = Ins[index].Flags;
- if (Flags.isByVal()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
- Flags.getByValSize(),
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
-
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
- if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
- ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
- CCInfo.nextInRegsParam();
- }
- lastInsIndex = index;
- }
- }
+ unsigned Index = VA.getValNo();
+ ISD::ArgFlagsTy Flags = Ins[Index].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ assert(VA.isMemLoc() && "unexpected byval pointer in reg");
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
+ ArgRegBegin = std::min(ArgRegBegin, RBegin);
+
+ CCInfo.nextInRegsParam();
}
CCInfo.rewindByValRegsInfo();
- lastInsIndex = -1;
+
+ int lastInsIndex = -1;
if (isVarArg && MFI->hasVAStart()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ if (RegIdx != array_lengthof(GPRArgRegs))
+ ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
}
- // If the arg regs save area contains N-byte aligned values, the
- // bottom of it must be at least N-byte aligned.
- TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
- TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
+ unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
+ AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
+ if (Ins[VA.getValNo()].isOrigArg()) {
+ std::advance(CurOrigArg,
+ Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
+ }
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
else if (RegVT == MVT::v2f64)
RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = AFI->isThumb1OnlyFunction() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
- int index = ArgLocs[i].getValNo();
+ int index = VA.getValNo();
// Some Ins[] entries become multiple ArgLoc[] entries.
// Process them only once.
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
-
- ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
- int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- Flags.getByValSize(),
- true /*force mutable frames*/,
- ByValStoreOffset,
- TotalArgRegsSaveSize);
- ByValStoreOffset += Flags.getByValSize();
- ByValStoreOffset = std::min(ByValStoreOffset, 16U);
+ assert(Ins[index].isOrigArg() &&
+ "Byval arguments cannot be implicit");
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
+
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex, VA.getLocMemOffset(),
+ Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
// inverting the compare condition, swapping 'less' and 'greater') and
// sometimes need to swap the operands to the VSEL (which inverts the
// condition in the sense of firing whenever the previous condition didn't)
- if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
+ if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- // Try to generate VSEL on ARMv8.
- if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
- // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // Try to generate VMAXNM/VMINNM on ARMv8.
+ if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ // We can use VMAXNM/VMINNM for a compare followed by a select with the
// same operands, as follows:
- // c = fcmp [ogt, olt, ugt, ult] a, b
+ // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
// select c, a, b
- // We only do this in unsafe-fp-math, because signed zeros and NaNs are
- // handled differently than the original code sequence.
+ // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
+ // We only do this transformation in UnsafeFPMath and for no-NaNs
+ // comparisons, because signed zeros and NaNs are handled differently than
+ // the original code sequence.
+ // FIXME: There are more cases that can be transformed even with NaNs,
+ // signed zeroes and safe math. E.g. in the following, the result will be
+ // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too.
+ // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0.
+ // FIXME: There is similar code that allows some extensions in
+ // AArch64TargetLowering::LowerSELECT_CC that should be shared with this
+ // code.
if (getTargetMachine().Options.UnsafeFPMath) {
if (LHS == TrueVal && RHS == FalseVal) {
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ if (CC == ISD::SETGT || CC == ISD::SETGE)
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ if (CC == ISD::SETLT || CC == ISD::SETLE)
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
} else if (LHS == FalseVal && RHS == TrueVal) {
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ if (CC == ISD::SETLT || CC == ISD::SETLE)
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ if (CC == ISD::SETGT || CC == ISD::SETGE)
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
}
}
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::FP_TO_SINT:
- Opc = ARMISD::FTOSI;
- break;
- case ISD::FP_TO_UINT:
- Opc = ARMISD::FTOUI;
- break;
- }
- Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+ return Op;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::SINT_TO_FP:
- Opc = ARMISD::SITOF;
- break;
- case ISD::UINT_TO_FP:
- Opc = ARMISD::UITOF;
- break;
- }
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(Opc, dl, VT, Op);
+ return Op;
}
SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
+ EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
- Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
- Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
break;
case ISD::SETUO: Invert = true; // Fallthrough
case ISD::SETO:
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
- Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
- Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
break;
}
} else {
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
Opc = ARMISD::VTST;
- Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
- Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+ Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
Invert = !Invert;
}
}
if (SingleOp.getNode()) {
switch (Opc) {
case ARMISD::VCEQ:
- Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGE:
- Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLEZ:
- Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGT:
- Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLTZ:
- Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
default:
- Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
} else {
- Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
+ Result = DAG.getSExtOrTrunc(Result, dl, VT);
+
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
// Create stack object for sret.
const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
void ARMTargetLowering::
SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const {
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC = isThumb ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
}
}
-MachineBasicBlock *ARMTargetLowering::
-EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC = Subtarget->isThumb() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
+ : &ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (SeenMBBs.insert(CurMBB))
+ if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
// The instruction is gone now.
MI->eraseFromParent();
-
- return MBB;
}
static
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
// Select the correct opcode and register class for unit size load/store
bool IsNeon = UnitSize >= 8;
- TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
- : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
if (IsNeon)
- VecTRC = UnitSize == 16
- ? (const TargetRegisterClass *)&ARM::DPairRegClass
- : UnitSize == 8
- ? (const TargetRegisterClass *)&ARM::DPRRegClass
- : nullptr;
+ VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
+ : UnitSize == 8 ? &ARM::DPRRegClass
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (IsThumb2) {
+ if (Subtarget->useMovt(*MF)) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
- .addImm(LoopSize & 0xFFFF));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+ Vtmp).addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(Vtmp).addImm(LoopSize >> 16));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+ varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetWindows() &&
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned NewRsbDstReg =
+ MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
- const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
else
IsLeftOperandMUL = true;
if (MULOp == SDValue())
- return SDValue();
+ return SDValue();
// Figure out the right opcode.
unsigned Opc = MULOp->getOpcode();
unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
// Figure out the high and low input values to the MLAL node.
- SDValue* HiMul = &MULOp;
SDValue* HiAdd = nullptr;
SDValue* LoMul = nullptr;
SDValue* LowAdd = nullptr;
+ // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
+ if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
+ return SDValue();
+
if (IsLeftOperandMUL)
HiAdd = &AddeOp1;
else
HiAdd = &AddeOp0;
- if (AddcOp0->getOpcode() == Opc) {
+ // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
+ // whose low result is fed to the ADDC we are checking.
+
+ if (AddcOp0 == MULOp.getValue(0)) {
LoMul = &AddcOp0;
LowAdd = &AddcOp1;
}
- if (AddcOp1->getOpcode() == Opc) {
+ if (AddcOp1 == MULOp.getValue(0)) {
LoMul = &AddcOp1;
LowAdd = &AddcOp0;
}
if (!LoMul)
return SDValue();
- if (LoMul->getNode() != HiMul->getNode())
- return SDValue();
-
// Create the merged node.
SelectionDAG &DAG = DCI.DAG;
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
unsigned LSB = countTrailingZeros(~InvMask);
unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
- unsigned Mask = (1 << Width)-1;
+ assert(Width <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ unsigned Mask = (1u << Width) - 1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
return SDValue();
}
-/// PerformSTORECombine - Target-specific dag combine xforms for
-/// ISD::STORE.
-static SDValue PerformSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- StoreSDNode *St = cast<StoreSDNode>(N);
- if (St->isVolatile())
- return SDValue();
-
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
- // pack all of the elements in one place. Next, store to memory in fewer
- // chunks.
- SDValue StVal = St->getValue();
- EVT VT = StVal.getValueType();
- if (St->isTruncatingStore() && VT.isVector()) {
- SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT StVT = St->getMemoryVT();
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
- unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
-
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
-
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
-
- unsigned SizeRatio = FromEltSz / ToEltSz;
- assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
- NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDLoc DL(St);
- SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
- DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
- StoreType = Tp;
- }
- // Didn't find a legal store type.
- if (!TLI.isTypeLegal(StoreType))
- return SDValue();
-
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
- TLI.getPointerTy());
- SDValue BasePtr = St->getBasePtr();
-
- // Perform one or more big stores into memory.
- unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
- for (unsigned I = 0; I < E; I++) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(I));
- SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- Increment);
- Chains.push_back(Ch);
- }
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- }
-
- if (!ISD::isNormalStore(St))
- return SDValue();
-
- // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
- // ARM stores of arguments in the same cache line.
- if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse()) {
- SelectionDAG &DAG = DCI.DAG;
- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
- SDLoc DL(St);
- SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
- BasePtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
-
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
- DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL,
- StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(),
- std::min(4U, St->getAlignment() / 2));
- }
-
- if (StVal.getValueType() != MVT::i64 ||
- StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
-
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(StVal);
- SDValue IntVec = StVal.getOperand(0);
- EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
- IntVec.getValueType().getVectorNumElements());
- SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
- SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- Vec, StVal.getOperand(1));
- dl = SDLoc(N);
- SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
- // Make the DAGCombiner fold the bitcasts.
- DCI.AddToWorklist(Vec.getNode());
- DCI.AddToWorklist(ExtElt.getNode());
- DCI.AddToWorklist(V.getNode());
- return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment(),
- St->getAAInfo());
-}
-
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
/// are normal, non-volatile loads. If so, it is profitable to bitcast an
/// i64 vector to have f64 elements, since the value can then be loaded
DAG.getUNDEF(VT), NewMask.data());
}
-/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
-/// NEON load/store intrinsics to merge base address updates.
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
+/// NEON load/store intrinsics, and generic vector load/stores, to merge
+/// base address updates.
+/// For generic load/stores, the memory type is assumed to be a vector.
+/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
- return SDValue();
-
SelectionDAG &DAG = DCI.DAG;
- bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
- N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
- unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ const bool isStore = N->getOpcode() == ISD::STORE;
+ const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
SDValue Addr = N->getOperand(AddrOpIdx);
+ MemSDNode *MemN = cast<MemSDNode>(N);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
continue;
// Find the new opcode for the updating load/store.
- bool isLoad = true;
+ bool isLoadOp = true;
bool isLaneOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
NumVecs = 4; isLaneOp = true; break;
case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
- NumVecs = 1; isLoad = false; break;
+ NumVecs = 1; isLoadOp = false; break;
case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
- NumVecs = 2; isLoad = false; break;
+ NumVecs = 2; isLoadOp = false; break;
case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
- NumVecs = 3; isLoad = false; break;
+ NumVecs = 3; isLoadOp = false; break;
case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
- NumVecs = 4; isLoad = false; break;
+ NumVecs = 4; isLoadOp = false; break;
case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
- NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
- NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
- NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
}
} else {
isLaneOp = true;
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; isLaneOp = false; break;
+ case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
}
}
// Find the size of memory referenced by the load/store.
EVT VecTy;
- if (isLoad)
+ if (isLoadOp) {
VecTy = N->getValueType(0);
- else
+ } else if (isIntrinsic) {
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ } else {
+ assert(isStore && "Node has to be a load, a store, or an intrinsic!");
+ VecTy = N->getOperand(1).getValueType();
+ }
+
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
NumBytes /= VecTy.getVectorNumElements();
continue;
}
+ // OK, we found an ADD we can fold into the base update.
+ // Now, create a _UPD node, taking care of not breaking alignment.
+
+ EVT AlignedVecTy = VecTy;
+ unsigned Alignment = MemN->getAlignment();
+
+ // If this is a less-than-standard-aligned load/store, change the type to
+ // match the standard alignment.
+ // The alignment is overlooked when selecting _UPD variants; and it's
+ // easier to introduce bitcasts here than fix that.
+ // There are 3 ways to get to this base-update combine:
+ // - intrinsics: they are assumed to be properly aligned (to the standard
+ // alignment of the memory type), so we don't need to do anything.
+ // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+ // intrinsics, so, likewise, there's nothing to do.
+ // - generic load/store instructions: the alignment is specified as an
+ // explicit operand, rather than implicitly as the standard alignment
+ // of the memory type (like the intrisics). We need to change the
+ // memory type to match the explicit alignment. That way, we don't
+ // generate non-standard-aligned ARMISD::VLDx nodes.
+ if (isa<LSBaseSDNode>(N)) {
+ if (Alignment == 0)
+ Alignment = 1;
+ if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+ assert(!isLaneOp && "Unexpected generic load/store lane.");
+ unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+ AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+ }
+ // Don't set an explicit alignment on regular load/stores that we want
+ // to transform to VLD/VST 1_UPD nodes.
+ // This matches the behavior of regular load/stores, which only get an
+ // explicit alignment if the MMO alignment is larger than the standard
+ // alignment of the memory type.
+ // Intrinsics, however, always get an explicit alignment, set to the
+ // alignment of the MMO.
+ Alignment = 1;
+ }
+
// Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
EVT Tys[6];
- unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
- Tys[n] = VecTy;
+ Tys[n] = AlignedVecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+
+ // Then, gather the new node's operands.
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
Ops.push_back(Inc);
- for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
- Ops.push_back(N->getOperand(i));
+
+ if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+ // Try to match the intrinsic's signature
+ Ops.push_back(StN->getValue());
+ } else {
+ // Loads (and of course intrinsics) match the intrinsics' signature,
+ // so just add all but the alignment operand.
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
+ Ops.push_back(N->getOperand(i));
+ }
+
+ // For all node types, the alignment operand is always the last one.
+ Ops.push_back(DAG.getConstant(Alignment, MVT::i32));
+
+ // If this is a non-standard-aligned STORE, the penultimate operand is the
+ // stored value. Bitcast it to the aligned type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+ SDValue &StVal = Ops[Ops.size()-2];
+ StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal);
}
- MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops, MemInt->getMemoryVT(),
- MemInt->getMemOperand());
+ Ops, AlignedVecTy,
+ MemN->getMemOperand());
// Update the uses.
- std::vector<SDValue> NewResults;
- for (unsigned i = 0; i < NumResultVecs; ++i) {
+ SmallVector<SDValue, 5> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i)
NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+ // If this is an non-standard-aligned LOAD, the first result is the loaded
+ // value. Bitcast it to the expected result type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+ SDValue &LdVal = NewResults[0];
+ LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal);
}
+
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
return SDValue();
}
+static SDValue PerformVLDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ return CombineBaseUpdate(N, DCI);
+}
+
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
+static SDValue PerformLOADCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // If this is a legal vector load, try to combine it into a VLD1_UPD.
+ if (ISD::isNormalLoad(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDLoc DL(St);
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (MVT Tp : MVT::integer_valuetypes()) {
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ if (!ISD::isNormalStore(St))
+ return SDValue();
+
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse()) {
+ SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+ SDLoc DL(St);
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() == MVT::i64 &&
+ StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(StVal);
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = SDLoc(N);
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ }
+
+ // If this is a legal vector store, try to combine it into a VST1_UPD.
+ if (ISD::isNormalStore(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
// isConstVecPow2 - Return true if each vector element is a power of 2, all
// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
+ NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
- // be lossy.
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
return SDValue();
}
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
default: break;
}
break;
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
- if ((!IsMemset || ZeroMemset) &&
- Subtarget->hasNEON() &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat)) {
+ if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
return false;
}
+bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ EVT VT = ExtVal.getValueType();
+
+ if (!isTypeLegal(VT))
+ return false;
+
+ // Don't create a loadext if we can fold the extension into a wide/long
+ // instruction.
+ // If there's more than one user instruction, the loadext is desirable no
+ // matter what. There can be two uses by the same instruction.
+ if (ExtVal->use_empty() ||
+ !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+ return true;
+
+ SDNode *U = *ExtVal->use_begin();
+ if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ return false;
+
+ return true;
+}
+
bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
// Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Same encoding for add/sub, just flip the sign.
- int64_t AbsImm = llvm::abs64(Imm);
+ int64_t AbsImm = std::abs(Imm);
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(AbsImm) != -1;
if (Subtarget->isThumb2())
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
RCPair
-ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
+ if (Subtarget->isThumb1Only())
+ return RCPair(0U, &ARM::tGPRRegClass);
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::Other)
if (StringRef("{cc}").equals_lower(Constraint))
return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy());
- Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
// there can be 1's on either or both "outsides", all the "inside"
// bits must be 0's
- unsigned TO = CountTrailingOnes_32(v);
- unsigned LO = CountLeadingOnes_32(v);
- v = (v >> TO) << TO;
- v = (v << LO) >> LO;
- return v == 0;
+ return isShiftedMask_32(~v);
}
/// isFPImmLegal - Returns true if the target can instruction select the
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= (Subtarget->isMClass() ? 32U : 64U);
+ return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+ ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
// This has so far only been implemented for MachO.
bool ARMTargetLowering::useLoadStackGuardNode() const {
- return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+ return Subtarget->isTargetMachO();
+}
+
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const {
+ // If we do not have NEON, vector types are not natively supported.
+ if (!Subtarget->hasNEON())
+ return false;
+
+ // Floating point values and vector values map to the same register file.
+ // Therefore, althought we could do a store extract of a vector type, this is
+ // better to leave at float as we have more freedom in the addressing mode for
+ // those.
+ if (VectorTy->isFPOrFPVectorTy())
+ return false;
+
+ // If the index is unknown at compile time, this is very expensive to lower
+ // and it is not possible to combine the store with the extract.
+ if (!isa<ConstantInt>(Idx))
+ return false;
+
+ assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ // We can do a store + vector extract on any vector that fits perfectly in a D
+ // or Q register.
+ if (BitWidth == 64 || BitWidth == 128) {
+ Cost = 0;
+ return true;
+ }
+ return false;
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
return (Members > 0 && Members <= 4);
}
-/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
+/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
+/// passing according to AAPCS rules.
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
if (getEffectiveCallingConv(CallConv, isVarArg) !=
HABaseType Base = HA_UNKNOWN;
uint64_t Members = 0;
- bool result = isHomogeneousAggregate(Ty, Base, Members);
- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
- return result;
+ bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
+
+ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
+ return IsHA || IsIntArray;
}