cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
+namespace llvm {
+ class ARMCCState : public CCState {
+ public:
+ ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C, ParmContext PC)
+ : CCState(CC, isVarArg, MF, TM, locs, C) {
+ assert(((PC == Call) || (PC == Prologue)) &&
+ "ARMCCState users must specify whether their context is call"
+ "or prologue generation.");
+ CallOrPrologue = PC;
+ }
+ };
+}
+
+// The APCS parameter registers.
+static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
+ setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
+ setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
}
if (Subtarget->isThumb1Only())
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+ // a destination type that is wider than the source.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
}
// i64 operation support.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
if (Subtarget->isThumb1Only()) {
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- } else {
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- if (!Subtarget->hasV6Ops())
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
}
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Since the libcalls include locking, fold in the fences
setShouldFoldAtomicFences(true);
}
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
}
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setMinStackArgumentAlignment(4);
benefitFromCodePlacementOpt = true;
+
+ setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
// FIXME: It might make sense to define the representative register class as the
// pressure of the register class's representative and all of it's super
// classes' representatives transitively. We have not implemented this because
// of the difficulty prior to coalescing of modeling operand register classes
-// due to the common occurence of cross class copies and subregister insertions
+// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(EVT VT) const{
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::CNEG: return "ARMISD::CNEG";
case ARMISD::RBIT: return "ARMISD::RBIT";
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VBSL: return "ARMISD::VBSL";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
return ARM::createFastISel(funcInfo);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
- return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
-}
-
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
return Sched::RegPressure;
}
-// FIXME: Move to RegInfo
-unsigned
-ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- switch (RC->getID()) {
- default:
- return 0;
- case ARM::tGPRRegClassID:
- return TFI->hasFP(MF) ? 4 : 5;
- case ARM::GPRRegClassID: {
- unsigned FP = TFI->hasFP(MF) ? 1 : 0;
- return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
- }
- case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
- case ARM::DPRRegClassID:
- return 32 - 10;
- }
-}
-
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
CCInfo.AnalyzeCallResult(Ins,
CCAssignFnForNode(CallConv, /* Return*/ true,
isVarArg));
return Chain;
}
-/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
-/// by "Src" to address "Dst" of size "Size". Alignment information is
-/// specified by the specific parameter attribute. The copy will be passed as
-/// a byval function parameter.
-/// Sometimes what we are copying is the end of a larger object, the part that
-/// does not fit in registers.
-static SDValue
-CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
- ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
- DebugLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
- return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
-}
-
/// LowerMemOpCallTo - Store the argument to the stack.
SDValue
ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal())
- return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
- *DAG.getContext());
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (isByVal) {
+ assert(VA.isMemLoc());
+ unsigned offset = 0;
+
+ // True if this byval aggregate will be split between registers
+ // and memory.
+ if (CCInfo.isFirstByValRegValid()) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned int i, j;
+ for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ SDValue Const = DAG.getConstant(4*i, MVT::i32);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(j, Load));
+ }
+ offset = ARM::R4 - CCInfo.getFirstByValReg();
+ CCInfo.clearFirstByValReg();
+ }
+
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile=*/false,
+ /*AlwaysInline=*/false,
+ MachinePointerInfo(0),
+ MachinePointerInfo(0)));
+
} else if (!IsSibCall) {
assert(VA.isMemLoc());
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
dl, DAG, InVals);
}
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack. Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+ assert((State->getCallOrPrologue() == Prologue ||
+ State->getCallOrPrologue() == Call) &&
+ "unhandled ParmContext");
+ if ((!State->isFirstByValRegValid()) &&
+ (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
+ }
+ // Confiscate any remaining parameter registers to preclude their
+ // assignment to subsequent parameters.
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+}
+
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, getTargetMachine(),
- RVLocs1, *DAG.getContext());
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, getTargetMachine(),
- RVLocs2, *DAG.getContext());
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
- ArgLocs, *DAG.getContext());
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
- *DAG.getContext());
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext(), Call);
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
return HasRet;
}
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!EnableARMTailCalls)
+ return false;
+
+ if (!CI->isTailCall())
+ return false;
+
+ return !Subtarget->isThumb1Only();
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (Subtarget->useMovt()) {
+ // FIXME: Enable this for static codegen when tool issues are fixed.
+ if (Subtarget->useMovt() && RelocM != Reloc::Static) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
}
return Result;
}
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+ ? ARMISD::VMULLs : ARMISD::VMULLu;
+ return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
// ARMv7 with MP extension has PLDW.
return Op.getOperand(0);
- if (Subtarget->isThumb())
+ unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (Subtarget->isThumb()) {
// Invert the bits.
isRead = ~isRead & 1;
- unsigned isData = Subtarget->isThumb() ? 0 : 1;
+ isData = ~isData & 1;
+ }
- // Currently there is no intrinsic that matches pli.
return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
DAG.getConstant(isData, MVT::i32));
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
- Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize)
+ const {
+ unsigned NumGPRs;
+ if (CCInfo.isFirstByValRegValid())
+ NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+ else {
+ unsigned int firstUnalloced;
+ firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+ sizeof(GPRArgRegs) /
+ sizeof(GPRArgRegs[0]));
+ NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+ }
+
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ VARegSize = NumGPRs * 4;
+ VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usuall
+// byval). Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned firstRegToSaveIndex;
+ if (CCInfo.isFirstByValRegValid())
+ firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+ else {
+ firstRegToSaveIndex = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ }
+
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+ ArgOffset + VARegSaveSize
+ - VARegSize,
+ false));
+ SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+}
+
SDValue
ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
-
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
- *DAG.getContext());
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
SmallVector<SDValue, 16> ArgValues;
+ int lastInsIndex = -1;
+ SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
- SDValue ArgValue;
if (VA.needsCustom()) {
// f64 and vector types are split up into multiple registers or
// combinations of registers and stack slots.
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
- unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
+ int index = ArgLocs[i].getValNo();
+
+ // Some Ins[] entries become multiple ArgLoc[] entries.
+ // Process them only once.
+ if (index != lastInsIndex)
+ {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ // FIXME: For now, all byval parameter objects are marked mutable.
+ // This can be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable.
+ // Since they could be overwritten by lowering of arguments in case of
+ // a tail call.
+ if (Flags.isByVal()) {
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
+ unsigned Bytes = Flags.getByValSize() - VARegSize;
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes,
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
- // Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ lastInsIndex = index;
+ }
}
}
// varargs
- if (isVarArg) {
- static const unsigned GPRArgRegs[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3
- };
-
- unsigned NumGPRs = CCInfo.getFirstUnallocated
- (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
-
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
- unsigned VARegSize = (4 - NumGPRs) * 4;
- unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
- unsigned ArgOffset = CCInfo.getNextStackOffset();
- if (VARegSaveSize) {
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
- // the result of va_next.
- AFI->setVarArgsRegSaveSize(VARegSaveSize);
- AFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(VARegSaveSize,
- ArgOffset + VARegSaveSize - VARegSize,
- false));
- SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
- getPointerTy());
-
- SmallVector<SDValue, 4> MemOps;
- for (; NumGPRs < 4; ++NumGPRs) {
- TargetRegisterClass *RC;
- if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
- else
- RC = ARM::GPRRegisterClass;
-
- unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
- }
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
- } else
- // This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
- }
+ if (isVarArg)
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
return Chain;
}
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ DebugLoc DL = Cmp.getDebugLoc();
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
}
if (True.getNode() && False.getNode()) {
- EVT VT = Cond.getValueType();
+ EVT VT = Op.getValueType();
SDValue ARMcc = Cond.getOperand(2);
SDValue CCR = Cond.getOperand(3);
- SDValue Cmp = Cond.getOperand(4);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+ assert(True.getValueType() == VT);
return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
}
}
// If one of the operand is zero, it's safe to ignore the NaN case since
// we only care about equality comparisons.
(SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
- // If unsafe fp math optimization is enabled and there are no othter uses of
- // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // If unsafe fp math optimization is enabled and there are no other uses of
+ // the CMP operands, and the condition code is EQ or NE, we can optimize it
// to an integer comparison.
if (CC == ISD::SETOEQ)
CC = ISD::SETEQ;
return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
}
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT OperandVT = Op.getOperand(0).getValueType();
+ assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+ if (VT != MVT::v4f32)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned CastOpc;
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ CastOpc = ISD::SIGN_EXTEND;
+ Opc = ISD::SINT_TO_FP;
+ break;
+ case ISD::UINT_TO_FP:
+ CastOpc = ISD::ZERO_EXTEND;
+ Opc = ISD::UINT_TO_FP;
+ break;
+ }
+
+ Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
+
DebugLoc dl = Op.getDebugLoc();
unsigned Opc;
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
- SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
- SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ } else if (VT == MVT::f32)
+ Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (VT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
return true;
}
+static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+ // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+ // range, then 0 is placed into the resulting vector. So pretty much any mask
+ // of 8 elements can work here.
+ return VT == MVT::v8i8 && M.size() == 8;
+}
+
static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
isVTRNMask(M, VT, WhichResult) ||
isVUZPMask(M, VT, WhichResult) ||
isVZIPMask(M, VT, WhichResult) ||
switch (OpNum) {
default: llvm_unreachable("Unknown shuffle opcode!");
case OP_VREV:
- return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ // VREV divides the vector in half and swaps within the half.
+ if (VT.getVectorElementType() == MVT::i32 ||
+ VT.getVectorElementType() == MVT::f32)
+ return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+ // vrev <4 x i16> -> VREV32
+ if (VT.getVectorElementType() == MVT::i16)
+ return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+ // vrev <4 x i8> -> VREV16
+ assert(VT.getVectorElementType() == MVT::i8);
+ return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
case OP_VDUP0:
case OP_VDUP1:
case OP_VDUP2:
}
}
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ SmallVectorImpl<int> &ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (SmallVectorImpl<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
return SDValue();
}
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
}
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
- if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
NewOpc = ARMISD::VMULLs;
- else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
- NewOpc = ARMISD::VMULLu;
- else if (VT == MVT::v2i64)
- // Fall through to expand this. It is not legal.
- return SDValue();
- else
- // Other vector multiplications are legal.
- return Op;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = ARMISD::VMULLu;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLs;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
// Legalize to a VMULL instruction.
DebugLoc DL = Op.getDebugLoc();
- SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op0;
SDValue Op1 = SkipExtension(N1, DAG);
+ if (!isMLA) {
+ Op0 = SkipExtension(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+
+ // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back vmul + vmla.
+ // vmull q0, d4, d6
+ // vmlal q0, d5, d6
+ // is faster than
+ // vaddl q0, d4, d5
+ // vmovl q1, d6
+ // vmul q0, q0, q1
+ SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0x89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(0x89, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
- assert(Op0.getValueType().is64BitVector() &&
- Op1.getValueType().is64BitVector() &&
- "unexpected types for extended operands to VMULL");
- return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ BN1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 2);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalAddress:
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
- case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
}
return SDValue();
}
unsigned ptr = MI->getOperand(1).getReg();
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
- unsigned scratch = BB->getParent()->getRegInfo()
- .createVirtualRegister(ARM::GPRRegisterClass);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ unsigned scratch =
+ MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass);
+
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
+ }
+
unsigned ldrOpc, strOpc;
switch (Size) {
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
case 1:
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
break;
case 2:
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
DebugLoc dl = MI->getDebugLoc();
-
bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
unsigned ldrOpc, strOpc;
switch (Size) {
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
- unsigned scratch2 = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
// thisMBB:
// ...
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
+ MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
+ }
+
+ unsigned ldrOpc, strOpc, extendOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ extendOpc = 0;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ TargetRegisterClass *TRC =
+ isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch2 = MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // (sign extend dest, if required)
+ // cmp dest, incr
+ // cmov.cond scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+
+ // Sign extend the value, if necessary.
+ if (signExtend && extendOpc) {
+ oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+ }
+
+ // Build compare and cmov instructions.
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(oldval).addReg(incr));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+ .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
llvm_unreachable("Expecting a BB with two successors!");
}
+// FIXME: This opcode table should obviously be expressed in the target
+// description. We probably just need a "machine opcode" value in the pseudo
+// instruction. But the ideal solution maybe to simply remove the "S" version
+// of the opcode altogether.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADCSri, ARM::ADCri},
+ {ARM::ADCSrr, ARM::ADCrr},
+ {ARM::ADCSrs, ARM::ADCrs},
+ {ARM::SBCSri, ARM::SBCri},
+ {ARM::SBCSrr, ARM::SBCrr},
+ {ARM::SBCSrs, ARM::SBCrs},
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrr, ARM::RSBrr},
+ {ARM::RSBSrs, ARM::RSBrs},
+ {ARM::RSCSri, ARM::RSCri},
+ {ARM::RSCSrs, ARM::RSCrs},
+ {ARM::t2ADCSri, ARM::t2ADCri},
+ {ARM::t2ADCSrr, ARM::t2ADCrr},
+ {ARM::t2ADCSrs, ARM::t2ADCrs},
+ {ARM::t2SBCSri, ARM::t2SBCri},
+ {ARM::t2SBCSrr, ARM::t2SBCrr},
+ {ARM::t2SBCSrs, ARM::t2SBCrs},
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+// Convert and Add or Subtract with Carry and Flags to a generic opcode with
+// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
+//
+// FIXME: Somewhere we should assert that CPSR<def> is in the correct
+// position to be recognized by the target descrition as the 'S' bit.
+bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ unsigned OldOpc = MI->getOpcode();
+ unsigned NewOpc = 0;
+
+ // This is only called for instructions that need remapping, so iterating over
+ // the tiny opcode table is not costly.
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
+ if (OldOpc == Pair->PseudoOpc) {
+ NewOpc = Pair->MachineOpc;
+ break;
+ }
+ }
+ if (!NewOpc)
+ return false;
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ AddDefaultPred(MIB);
+ MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+ MI->eraseFromParent();
+ return true;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
- default:
+ default: {
+ if (RemapAddSubWithFlags(MI, BB))
+ return BB;
+
MI->dump();
llvm_unreachable("Unexpected instr type to insert");
-
+ }
case ARM::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
case ARM::ATOMIC_LOAD_ADD_I16:
case ARM::ATOMIC_LOAD_SUB_I32:
return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_MIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+ case ARM::ATOMIC_LOAD_MAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+ case ARM::ATOMIC_LOAD_UMIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+ case ARM::ATOMIC_LOAD_UMAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
return SDValue();
}
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Only perform optimization if after legalize, and if NEON is available. We
+ // also expected both operands to be BUILD_VECTORs.
+ if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+ || N0.getOpcode() != ISD::BUILD_VECTOR
+ || N1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Check that the vector operands are of the right form.
+ // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+ // operands, where N is the size of the formed vector.
+ // Each EXTRACT_VECTOR should have the same input vector and odd or even
+ // index such that we have a pair wise add pattern.
+
+ // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+ if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = N0->getOperand(0)->getOperand(0);
+ SDNode *V = Vec.getNode();
+ unsigned nextIndex = 0;
+
+ // For each operands to the ADD which are BUILD_VECTORs,
+ // check to see if each of their operands are an EXTRACT_VECTOR with
+ // the same vector and appropriate index.
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+ if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+ && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ SDValue ExtVec0 = N0->getOperand(i);
+ SDValue ExtVec1 = N1->getOperand(i);
+
+ // First operand is the vector, verify its the same.
+ if (V != ExtVec0->getOperand(0).getNode() ||
+ V != ExtVec1->getOperand(0).getNode())
+ return SDValue();
+
+ // Second is the constant, verify its correct.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+ // For the constant, we want to see all the even or all the odd.
+ if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+ || C1->getZExtValue() != nextIndex+1)
+ return SDValue();
+
+ // Increment index.
+ nextIndex+=2;
+ } else
+ return SDValue();
+ }
+
+ // Create VPADDL node.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DebugLoc DL = N->getDebugLoc();
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+ TLI.getPointerTy()));
+
+ // Input is the vector.
+ Ops.push_back(Vec);
+
+ // Get widened type and narrowed type.
+ MVT widenType;
+ unsigned numElem = VT.getVectorNumElements();
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+ case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+ case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+ default:
+ assert(0 && "Invalid vector element type for padd optimization.");
+ }
+
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ widenType, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
/// operands.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget){
+
+ // Attempt to create vpaddl for this add.
+ SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// First try with the default operand order.
- SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
if (Result.getNode())
return Result;
// If that didn't work, try again with the operands commuted.
- return PerformADDCombineWithOperands(N, N1, N0, DCI);
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
}
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
return SDValue();
}
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+/// vmul d3, d0, d2
+/// vmla d3, d1, d2
+/// is faster than
+/// vadd d3, d0, d1
+/// vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasVMLxForwarding())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+ Opcode = N1.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB)
+ return SDValue();
+ std::swap(N0, N1);
+ }
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+ DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
static SDValue PerformMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
return SDValue();
EVT VT = N->getValueType(0);
+ if (VT.is64BitVector() || VT.is128BitVector())
+ return PerformVMULCombine(N, DCI, Subtarget);
if (VT != MVT::i32)
return SDValue();
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
+
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
}
}
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ APInt SplatBits0;
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ APInt SplatBits1;
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1), N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
- // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // && mask == ~mask2
// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
- // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // && ~mask == mask2
// (i.e., copy a bitfield value into another bitfield of the same width)
- if (N0.getOpcode() != ISD::AND)
- return SDValue();
if (VT != MVT::i32)
return SDValue();
return SDValue();
unsigned Mask2 = N11C->getZExtValue();
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
if (ARM::isBitFieldInvertedMask(Mask) &&
- ARM::isBitFieldInvertedMask(~Mask2) &&
- (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ (Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasT2ExtractPack() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
- unsigned lsb = CountTrailingZeros_32(Mask2);
+ unsigned amt = CountTrailingZeros_32(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
- DAG.getConstant(lsb, MVT::i32));
+ DAG.getConstant(amt, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
- ARM::isBitFieldInvertedMask(Mask2) &&
- (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ (~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasT2ExtractPack() &&
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
- DAG.getConstant(Mask2, MVT::i32));
+ DAG.getConstant(Mask2, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
return SDValue();
}
-/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
-/// C1 & C2 == C1.
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
static SDValue PerformBFICombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
- unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(~InvMask);
+ unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned Mask = (1 << Width)-1;
unsigned Mask2 = N11C->getZExtValue();
- if ((Mask & Mask2) == Mask2)
+ if ((Mask & (~Mask2)) == 0)
return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ DCI.RemoveFromWorklist(LD);
+ DAG.DeleteNode(LD);
+ return Result;
+ }
+
return SDValue();
}
// Otherwise, the i64 value will be legalized to a pair of i32 values.
StoreSDNode *St = cast<StoreSDNode>(N);
SDValue StVal = St->getValue();
- if (!ISD::isNormalStore(St) || St->isVolatile() ||
- StVal.getValueType() != MVT::i64 ||
+ if (!ISD::isNormalStore(St) || St->isVolatile())
+ return SDValue();
+
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = St->getDebugLoc();
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(0), BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() != MVT::i64 ||
StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
EVT VecTy;
if (isLoad)
VecTy = N->getValueType(0);
- else
+ else
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
- }
+ }
return SDValue();
}
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
return Imm >= 0 && Imm <= 255;
}
+/// isLegalAddImmediate - Return true if the specified immediate is legal
+/// add immediate, that is the target has add instructions which can add
+/// a register with the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+}
+
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
case 'l': return C_RegisterClass;
case 'w': return C_RegisterClass;
}
+ } else {
+ if (Constraint == "Uv")
+ return C_Memory;
}
return TargetLowering::getConstraintType(Constraint);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- char Constraint,
+ std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
- switch (Constraint) {
+ // Currently only support length 1 constraints.
+ if (Constraint.length() != 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
default: break;
case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O':
if (CVal != CVal64)
return;
- switch (Constraint) {
+ switch (ConstraintLetter) {
case 'I':
if (Subtarget->isThumb1Only()) {
// This must be a constant between 0 and 255, for ADD
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_strexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::arm_ldrexd: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
default:
break;
}