#define DEBUG_TYPE "arm-isel"
#include "ARMISelLowering.h"
-#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <utility>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
-// This option should go away when tail calls fully work.
-static cl::opt<bool>
-EnableARMTailCalls("arm-tail-calls", cl::Hidden,
- cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(false));
-
cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions"),
}
void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
- addRegisterClass(VT, &ARM::QPRRegClass);
+ addRegisterClass(VT, &ARM::DPairRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
return new TargetLoweringObjectFileMachO();
return new ARMElfTargetObjectFile();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ if (Subtarget->getTargetTriple().isiOS() &&
!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- // Custom expand long extensions to vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
-
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
+
+ // FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ // Register based DivRem for AEABI (RTABI 4.2)
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+
+ setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ } else {
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ }
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (!Subtarget->isTargetDarwin()) {
- // Non-Darwin platforms may return values in these registers via the
+ if (!Subtarget->isTargetMachO()) {
+ // Non-MachO platforms may return values in these registers via the
// personality function.
setExceptionPointerRegister(ARM::R0);
setExceptionSelectorRegister(ARM::R1);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
- // FIXME: This should be checking for v6k, not just v6.
- if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
- // membarrier needs custom lowering; the rest are legal and handled
- // normally.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
+ // handled normally.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ bool isThumb2, unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
+ {ARM::LDREXH, ARM::t2LDREXH},
+ {ARM::LDREX, ARM::t2LDREX},
+ {ARM::LDREXD, ARM::t2LDREXD}};
+ static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
+ {ARM::LDAEXH, ARM::t2LDAEXH},
+ {ARM::LDAEX, ARM::t2LDAEX},
+ {ARM::LDAEXD, ARM::t2LDAEXD}};
+ static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
+ {ARM::STREXH, ARM::t2STREXH},
+ {ARM::STREX, ARM::t2STREX},
+ {ARM::STREXD, ARM::t2STREXD}};
+ static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
+ {ARM::STLEXH, ARM::t2STLEXH},
+ {ARM::STLEX, ARM::t2STLEX},
+ {ARM::STLEXD, ARM::t2STLEXD}};
+
+ const unsigned (*LoadOps)[2], (*StoreOps)[2];
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
+ StrOpc = StoreOps[Log2_32(Size)][isThumb2];
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
switch (Opcode) {
default: return 0;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
- case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
- case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::VSHL: return "ARMISD::VSHL";
case ARMISD::VSHRs: return "ARMISD::VSHRs";
case ARMISD::VSHRu: return "ARMISD::VSHRu";
- case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
- case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
- case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
- case ARMISD::VSHRN: return "ARMISD::VSHRN";
case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::VMAXNM: return "ARMISD::VMAX";
+ case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
bool isSibCall = false;
+
// Disable tail calls if they're not supported.
- if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
+
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
const GlobalValue *GV = G->getGlobal();
isDirect = true;
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
- bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ bool isStub = (isExt && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
- if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
- Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
- getPointerTy(), Callee, PICLabel);
+ if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
+ Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
+ DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
- bool isStub = Subtarget->isTargetDarwin() &&
+ bool isStub = Subtarget->isTargetMachO() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// tBX takes a register source operand.
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool HasMinSizeAttr = Subtarget->isMinSize();
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
- if (isThisReturn) {
- // For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
- if (!Mask) {
- // Set isThisReturn to false if the calling convention is not one that
- // allows 'returned' to be modeled in this way, so LowerCallResult does
- // not try to pass 'this' straight through
- isThisReturn = false;
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(CallConv);
+ }
+ } else
Mask = ARI->getCallPreservedMask(CallConv);
- }
- } else
- Mask = ARI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- // For in-prologue parameters handling, we also introduce stack offset
- // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
- // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
- // NSAA should be evaluted (NSAA means "next stacked argument address").
- // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
- // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
- unsigned NSAAOffset = State->getNextStackOffset();
- if (State->getCallOrPrologue() != Call) {
- for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
- unsigned RB, RE;
- State->getInRegsParamInfo(i, RB, RE);
- assert(NSAAOffset >= (RE-RB)*4 &&
- "Stack offset for byval regs doesn't introduced anymore?");
- NSAAOffset -= (RE-RB)*4;
- }
- }
if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
// all remained GPR regs. In that case we can't split parameter, we must
// send it to stack. We also must set NCRN to R4, so waste all
// remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
while (State->AllocateReg(GPRArgRegs, 4))
;
// allocate remained amount of registers we need.
for (unsigned i = reg+1; i != ByValRegEnd; ++i)
State->AllocateReg(GPRArgRegs, 4);
- // At a call site, a byval parameter that is split between
- // registers and memory needs its size truncated here. In a
- // function prologue, such byval parameters are reassembled in
- // memory, and are not truncated.
- if (State->getCallOrPrologue() == Call) {
- // Make remained size equal to 0 in case, when
- // the whole structure may be stored into registers.
- if (size < excess)
- size = 0;
- else
- size -= excess;
- }
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
if (isVarArg && !Outs.empty())
return false;
+ // Exception-handling functions need a special set of instructions to indicate
+ // a return to the hardware. Tail-calling another function would probably
+ // break this.
+ if (CallerF->hasFnAttribute("interrupt"))
+ return false;
+
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
isVarArg));
}
+static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+
+ StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+
+ // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
+ // version of the "preferred return address". These offsets affect the return
+ // instruction if this is a return from PL1 without hypervisor extensions.
+ // IRQ/FIQ: +4 "subs pc, lr, #4"
+ // SWI: 0 "subs pc, lr, #0"
+ // ABORT: +4 "subs pc, lr, #4"
+ // UNDEF: +4/+2 "subs pc, lr, #0"
+ // UNDEF varies depending on where the exception came from ARM or Thumb
+ // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
+
+ int64_t LROffset;
+ if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
+ IntKind == "ABORT")
+ LROffset = 4;
+ else if (IntKind == "SWI" || IntKind == "UNDEF")
+ LROffset = 0;
+ else
+ report_fatal_error("Unsupported interrupt attribute. If present, value "
+ "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
+
+ RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
+
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
if (Flag.getNode())
RetOps.push_back(Flag);
+ // CPUs which aren't M-class use a special sequence to return from
+ // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
+ // though we use "subs pc, lr, #N").
+ //
+ // M-class CPUs actually use a normal return sequence with a special
+ // (hardware-provided) value in LR, so the normal code path works.
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ !Subtarget->isMClass()) {
+ if (Subtarget->isThumb1Only())
+ report_fatal_error("interrupt attribute is not supported in Thumb1");
+ return LowerInterruptReturn(RetOps, dl, DAG);
+ }
+
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
RetOps.data(), RetOps.size());
}
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG)
+ if (UI->getOpcode() != ARMISD::RET_FLAG &&
+ UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ if (!Subtarget->supportsTailCall())
return false;
- if (!CI->isTailCall())
+ if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
return false;
return !Subtarget->isThumb1Only();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- // FIXME: Enable this for static codegen when tool issues are fixed. Also
- // update ARMFastISel::ARMMaterializeGV.
- if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ if (Subtarget->useMovt())
++NumMovwMovt;
- // FIXME: Once remat is capable of dealing with instructions with register
- // operands, expand this into two nodes.
- if (RelocM == Reloc::Static)
- return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
-
- unsigned Wrapper = (RelocM == Reloc::PIC_)
- ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
- SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(),
- false, false, false, 0);
- return Result;
- }
-
- unsigned ARMPCLabelIndex = 0;
- SDValue CPAddr;
- if (RelocM == Reloc::Static) {
- CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
- } else {
- ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
- ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
- PCAdj);
- CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- }
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue Chain = Result.getValue(1);
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into multiple nodes
+ unsigned Wrapper =
+ RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
- if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
- Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
- }
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
- false, false, false, 0);
-
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
unsigned Domain = ARM_MB::ISH;
- if (Subtarget->isSwift() && Ord == Release) {
+ if (Subtarget->isMClass()) {
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = ARM_MB::SY;
+ } else if (Subtarget->isSwift() && Ord == Release) {
// Swift happens to implement ISHST barriers in a way that's compatible with
// Release semantics but weaker than ISH so we'd be fools not to use
// it. Beware: other processors probably don't!
Domain = ARM_MB::ISHST;
}
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
DAG.getConstant(Domain, MVT::i32));
}
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
- if (NumGPRs && Align == 8 &&
+ if (NumGPRs && Align > 4 &&
(ArgRegsSize < ArgSize ||
InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
- // Add padding for part of param recovered from GPRs, so
- // its last byte must be at address K*8 - 1.
+ // Add padding for part of param recovered from GPRs. For example,
+ // if Align == 8, its last byte must be at address K*8 - 1.
// We need to do it, since remained (stack) part of parameter has
// stack alignment, and we need to "attach" "GPRs head" without gaps
// to it:
//
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Padding =
- ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) -
- (ArgRegsSize + AFI->getArgRegsSaveSize());
+ OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
ArgRegsSaveSize = ArgRegsSize + Padding;
} else
// We don't need to extend regs save size for byval parameters if they
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
unsigned ArgSize,
- bool ForceMutable) const {
+ bool ForceMutable,
+ unsigned ByValStoreOffset,
+ unsigned TotalArgRegsSaveSize) const {
// Currently, two use-cases possible:
- // Case #1. Non var-args function, and we meet first byval parameter.
+ // Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
// eat all remained registers
// (these two actions are performed by HandleByVal method).
// Note: once stack area for byval/varargs registers
// was initialized, it can't be initialized again.
if (ArgRegsSaveSize) {
-
unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
if (Padding) {
AFI->setStoredByValParamsPadding(Padding);
}
- int FrameIndex = MFI->CreateFixedObject(
- ArgRegsSaveSize,
- Padding + ArgOffset,
- false);
+ int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
+ Padding +
+ ByValStoreOffset -
+ (int64_t)TotalArgRegsSaveSize,
+ false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
+ if (Padding) {
+ MFI->CreateFixedObject(Padding,
+ ArgOffset + ByValStoreOffset -
+ (int64_t)ArgRegsSaveSize,
+ false);
+ }
SmallVector<SDValue, 4> MemOps;
for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
return FrameIndex;
- } else
+ } else {
+ if (ArgSize == 0) {
+ // We cannot allocate a zero-byte object for the first variadic argument,
+ // so just make up a size.
+ ArgSize = 4;
+ }
// This will point to the next argument passed via stack.
return MFI->CreateFixedObject(
- 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable);
+ ArgSize, ArgOffset, !ForceMutable);
+ }
}
// Setup stack frame, the va_list pointer will start from.
ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
unsigned ArgOffset,
+ unsigned TotalArgRegsSaveSize,
bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// argument passed via stack.
int FrameIndex =
StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
- 0, ArgOffset, 0, ForceMutable);
+ 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize);
AFI->setVarArgsFrameIndex(FrameIndex);
}
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
+ unsigned ByValStoreOffset = 0;
+ unsigned TotalArgRegsSaveSize = 0;
+ unsigned ArgRegsSaveSizeMaxAlign = 4;
+
+ // Calculate the amount of stack space that we need to allocate to store
+ // byval and variadic arguments that are passed in registers.
+ // We need to know this before we allocate the first byval or variadic
+ // argument, as they will be allocated a stack slot below the CFA (Canonical
+ // Frame Address, the stack pointer at entry to the function).
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ int index = VA.getValNo();
+ if (index != lastInsIndex) {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ if (Flags.isByVal()) {
+ unsigned ExtraArgRegsSize;
+ unsigned ExtraArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+ Flags.getByValSize(),
+ ExtraArgRegsSize, ExtraArgRegsSaveSize);
+
+ TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
+ ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
+ CCInfo.nextInRegsParam();
+ }
+ lastInsIndex = index;
+ }
+ }
+ }
+ CCInfo.rewindByValRegsInfo();
+ lastInsIndex = -1;
+ if (isVarArg) {
+ unsigned ExtraArgRegsSize;
+ unsigned ExtraArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
+ ExtraArgRegsSize, ExtraArgRegsSaveSize);
+ TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ }
+ // If the arg regs save area contains N-byte aligned values, the
+ // bottom of it must be at least N-byte aligned.
+ TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
+ TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
// a tail call.
if (Flags.isByVal()) {
unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+
+ ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
int FrameIndex = StoreByValRegs(
CCInfo, DAG, dl, Chain, CurOrigArg,
CurByValIndex,
Ins[VA.getValNo()].PartOffset,
VA.getLocMemOffset(),
Flags.getByValSize(),
- true /*force mutable frames*/);
+ true /*force mutable frames*/,
+ ByValStoreOffset,
+ TotalArgRegsSaveSize);
+ ByValStoreOffset += Flags.getByValSize();
+ ByValStoreOffset = std::min(ByValStoreOffset, 16U);
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
- unsigned FIOffset = VA.getLocMemOffset() +
- AFI->getStoredByValParamsPadding();
+ unsigned FIOffset = VA.getLocMemOffset();
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
FIOffset, true);
// varargs
if (isVarArg)
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
- CCInfo.getNextStackOffset());
+ CCInfo.getNextStackOffset(),
+ TotalArgRegsSaveSize);
return Chain;
}
SelectTrue, SelectFalse, ISD::SETNE);
}
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+ if (CC == ISD::SETNE)
+ return ISD::SETEQ;
+ return ISD::getSetCCInverse(CC, true);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = getInverseCCForVSEL(CC);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // Try to generate VSEL on ARMv8.
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // same operands, as follows:
+ // c = fcmp [ogt, olt, ugt, ult] a, b
+ // select c, a, b
+ // We only do this in unsafe-fp-math, because signed zeros and NaNs are
+ // handled differently than the original code sequence.
+ if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
+ RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
+
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
return FrameAddr;
}
-/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
-/// and size(DestVec) > 128-bits.
-/// This is achieved by doing the one extension from the SrcVec, splitting the
-/// result, extending these parts, and then concatenating these into the
-/// destination.
-static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
- EVT DestVT = N->getValueType(0);
-
- assert(DestVT.getSizeInBits() > 128 &&
- "Custom sext/zext expansion needs >128-bit vector.");
- // If this is a normal length extension, use the default expansion.
- if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
- SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
- return SDValue();
-
- SDLoc dl(N);
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
- unsigned NumElts = SrcVT.getVectorNumElements();
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
-
- EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts);
- EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts/2);
- EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
- NumElts/2);
-
- Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
- SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(0));
- SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(NumElts/2));
- ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
- ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
-}
-
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
// Value = 0x0000nnff: Op=x, Cmode=1100.
OpCmode = 0xc;
Imm = SplatBits >> 8;
- SplatBits |= 0xff;
break;
}
// Value = 0x00nnffff: Op=x, Cmode=1101.
OpCmode = 0xd;
Imm = SplatBits >> 16;
- SplatBits |= 0xffff;
break;
}
}
// Op=1, Cmode=1110.
OpCmode = 0x1e;
- SplatBits = Val;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ if (!ST->hasVFP3())
return SDValue();
+ bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
if (ImmVal != -1) {
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
DAG.getConstant(0, MVT::i32));
}
- // If that fails, try a VMOV.i32
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
EVT VMovVT;
- unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
- SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
- VMOVModImm);
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
- VMVNModImm);
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
- if (isOnlyLowElement)
+ // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
Op.getOperand(1), Op.getOperand(2));
}
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+
+ // Create stack object for sret.
+ const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
+ const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/false,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
return Op;
- // Aquire/Release load/store is not legal for targets without a
+ // Acquire/Release load/store is not legal for targets without a
// dmb or equivalent available.
return SDValue();
}
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG, unsigned NewOp) {
+ SelectionDAG &DAG) {
SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
SmallVector<SDValue, 6> Ops;
Ops.push_back(Node->getOperand(0)); // Chain
Ops.push_back(Node->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
- // High part of Val1
+ for(unsigned i=2; i<Node->getNumOperands(); i++) {
+ // Low part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
+ Node->getOperand(i), DAG.getIntPtrConstant(0)));
+ // High part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ Node->getOperand(i), DAG.getIntPtrConstant(1)));
}
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
+ SDValue Result = DAG.getAtomic(
+ Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+ cast<MemSDNode>(Node)->getMemOperand(), AN->getSuccessOrdering(),
+ AN->getFailureOrdering(), AN->getSynchScope());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
- return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
}
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = ExpandVectorExtension(N, DAG);
- break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
- return;
case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
- return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
- return;
case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ ReplaceATOMIC_OP_64(N, Results, DAG);
return;
}
if (Res.getNode())
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRnopcRegClass);
+ if (!isThumb2)
+ MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
unsigned Op1, unsigned Op2,
bool NeedsCarry, bool IsCmpxchg,
bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
unsigned ptr = MI->getOperand(2).getReg();
unsigned vallo = MI->getOperand(3).getReg();
unsigned valhi = MI->getOperand(4).getReg();
+ AtomicOrdering Ord =
+ static_cast<AtomicOrdering>(MI->getOperand(IsCmpxchg ? 7 : 5).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
}
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
// Load
if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
} else {
unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(GPRPair0, RegState::Define)
+ .addReg(ptr));
// Copy r2/r3 into dest. (This copy will normally be coalesced.)
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
+ .addReg(GPRPair0, 0, ARM::gsub_0);
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
+ .addReg(GPRPair0, 0, ARM::gsub_1);
}
unsigned StoreLo, StoreHi;
// Store
if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
.addImm(ARM::gsub_1);
// ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
+
+ if (isThumb2) {
+ MIB.addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr);
+
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
+
+ // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ AddDefaultPred(MIB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
llvm_unreachable("Expecting a BB with two successors!");
}
-MachineBasicBlock *ARMTargetLowering::
-EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(StSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
+ }
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize = 0;
+ unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *VecTRC = 0;
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- const TargetRegisterClass *TRC_Vec = 0;
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
if (Align & 1) {
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
- ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
- strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16) {
- ldrOpc = ARM::VLD1q32wb_fixed;
- strOpc = ARM::VST1q32wb_fixed;
+ if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
- }
- else if ((Align % 8 == 0) && SizeVal >= 8) {
- ldrOpc = ARM::VLD1d32wb_fixed;
- strOpc = ARM::VST1d32wb_fixed;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
- }
}
// Can't use NEON instructions.
- if (UnitSize == 0) {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ if (UnitSize == 0)
UnitSize = 4;
- }
}
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
+ : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0;
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(destIn).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn)
- .addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
+ if (IsThumb2) {
+ unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
+ .addReg(Vtmp).addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
}
BB->addSuccessor(loopMBB);
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(destPhi).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+ case ARM::ATOMIC_LOAD_I64:
+ return EmitAtomicLoad64(MI, BB);
- case ARM::ATOMADD6432:
+ case ARM::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMSUB6432:
+ case ARM::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMOR6432:
+ case ARM::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMXOR6432:
+ case ARM::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMAND6432:
+ case ARM::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMSWAP6432:
+ case ARM::ATOMIC_SWAP_I64:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMCMPXCHG6432:
+ case ARM::ATOMIC_CMP_SWAP_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMMIN6432:
+ case ARM::ATOMIC_LOAD_MIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMMAX6432:
+ case ARM::ATOMIC_LOAD_MAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMUMIN6432:
+ case ARM::ATOMIC_LOAD_UMIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMUMAX6432:
+ case ARM::ATOMIC_LOAD_UMAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(copy0MBB);
case ARM::BCCi64:
case ARM::BCCZi64: {
// If there is an unconditional branch to the other successor, remove it.
- BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
// Compare both parts that make up the double comparison separately for
// equality.
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(RSBBB);
/// is faster than
/// vadd d3, d0, d1
/// vmul d3, d3, d2
+// However, for (A + B) * (A + B),
+// vadd d2, d0, d1
+// vmul d3, d0, d2
+// vmla d3, d1, d2
+// is slower than
+// vadd d2, d0, d1
+// vmul d3, d2, d2
static SDValue PerformVMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
std::swap(N0, N1);
}
+ if (N0 == N1)
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
unsigned SplatBitSize;
bool HasAnyUndefs;
+ APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
- APInt SplatBits0;
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ // Ensure that the second operand of both ands are constants
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs) {
- BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
- APInt SplatBits1;
- if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs &&
- SplatBits0 == ~SplatBits1) {
- // Canonicalize the vector type to make instruction selection simpler.
- EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
- SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
- N0->getOperand(1), N0->getOperand(0),
- N1->getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
- }
+ HasAnyUndefs) && !HasAnyUndefs) {
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ // Ensure that the bit width of the constants are the same and that
+ // the splat arguments are logical inverses as per the pattern we
+ // are trying to simplify.
+ if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection
+ // simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1),
+ N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
}
}
// loads from a constant pool.
case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:
- case Intrinsic::arm_neon_vshiftls:
- case Intrinsic::arm_neon_vshiftlu:
- case Intrinsic::arm_neon_vshiftn:
case Intrinsic::arm_neon_vrshifts:
case Intrinsic::arm_neon_vrshiftu:
case Intrinsic::arm_neon_vrshiftn:
}
return SDValue();
- case Intrinsic::arm_neon_vshiftls:
- case Intrinsic::arm_neon_vshiftlu:
- if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
- break;
- llvm_unreachable("invalid shift count for vshll intrinsic");
-
case Intrinsic::arm_neon_vrshifts:
case Intrinsic::arm_neon_vrshiftu:
if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
break;
llvm_unreachable("invalid shift count for vqshlu intrinsic");
- case Intrinsic::arm_neon_vshiftn:
case Intrinsic::arm_neon_vrshiftn:
case Intrinsic::arm_neon_vqshiftns:
case Intrinsic::arm_neon_vqshiftnu:
case Intrinsic::arm_neon_vshiftu:
// Opcode already set above.
break;
- case Intrinsic::arm_neon_vshiftls:
- case Intrinsic::arm_neon_vshiftlu:
- if (Cnt == VT.getVectorElementType().getSizeInBits())
- VShiftOpc = ARMISD::VSHLLi;
- else
- VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
- ARMISD::VSHLLs : ARMISD::VSHLLu);
- break;
- case Intrinsic::arm_neon_vshiftn:
- VShiftOpc = ARMISD::VSHRN; break;
case Intrinsic::arm_neon_vrshifts:
VShiftOpc = ARMISD::VRSHRs; break;
case Intrinsic::arm_neon_vrshiftu:
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
+ bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
case MVT::v2f64: {
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
- // A big-endian target may also explictly support unaligned accesses
+ // A big-endian target may also explicitly support unaligned accesses
if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
if (Fast)
*Fast = true;
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
+ (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
return MVT::v2f64;
} else if (Size >= 8 &&
(memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
+ (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
return MVT::f64;
}
}
return false;
}
+bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ if (!isTypeLegal(EVT::getEVT(Ty1)))
+ return false;
+
+ assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
+
+ // Assuming the caller doesn't have a zeroext or signext return parameter,
+ // truncation all the way down to i1 is valid.
+ return true;
+}
+
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
case 'r':
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ }
+
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy());
+
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+
+ SDLoc dl(Op);
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
+ 0, getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_ldrex: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_strex: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
case Intrinsic::arm_strexd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
return false;
}
+
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return false;
+ return true;
+}