}
static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
return new TargetLoweringObjectFileMachO();
return new ARMElfTargetObjectFile();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- // Custom expand long extensions to vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
-
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (!Subtarget->isTargetDarwin()) {
- // Non-Darwin platforms may return values in these registers via the
+ if (!Subtarget->isTargetMachO()) {
+ // Non-MachO platforms may return values in these registers via the
// personality function.
setExceptionPointerRegister(ARM::R0);
setExceptionSelectorRegister(ARM::R1);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
- // FIXME: This should be checking for v6k, not just v6.
- if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
- // membarrier needs custom lowering; the rest are legal and handled
- // normally.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
+ // handled normally.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setInsertFencesForAtomic(true);
}
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
- //setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
} else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
switch (Opcode) {
default: return 0;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
- case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::CALL: return "ARMISD::CALL";
const GlobalValue *GV = G->getGlobal();
isDirect = true;
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
- bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ bool isStub = (isExt && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
- if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
- Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
- getPointerTy(), Callee, PICLabel);
+ if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
+ Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
+ DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
- bool isStub = Subtarget->isTargetDarwin() &&
+ bool isStub = Subtarget->isTargetMachO() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// tBX takes a register source operand.
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool HasMinSizeAttr = Subtarget->isMinSize();
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- // FIXME: Enable this for static codegen when tool issues are fixed. Also
- // update ARMFastISel::ARMMaterializeGV.
- if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ if (Subtarget->useMovt())
++NumMovwMovt;
- // FIXME: Once remat is capable of dealing with instructions with register
- // operands, expand this into two nodes.
- if (RelocM == Reloc::Static)
- return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
-
- unsigned Wrapper = (RelocM == Reloc::PIC_)
- ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
- SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(),
- false, false, false, 0);
- return Result;
- }
- unsigned ARMPCLabelIndex = 0;
- SDValue CPAddr;
- if (RelocM == Reloc::Static) {
- CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
- } else {
- ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
- ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
- PCAdj);
- CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- }
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue Chain = Result.getValue(1);
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into multiple nodes
+ unsigned Wrapper =
+ RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
- if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
- Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
- }
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
- false, false, false, 0);
-
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
bool ForceMutable) const {
// Currently, two use-cases possible:
- // Case #1. Non var-args function, and we meet first byval parameter.
+ // Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
// eat all remained registers
// (these two actions are performed by HandleByVal method).
static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
if (CC == ISD::SETNE)
return ISD::SETEQ;
- return ISD::getSetCCSwappedOperands(CC);
+ return ISD::getSetCCInverse(CC, true);
}
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
return FrameAddr;
}
-/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
-/// and size(DestVec) > 128-bits.
-/// This is achieved by doing the one extension from the SrcVec, splitting the
-/// result, extending these parts, and then concatenating these into the
-/// destination.
-static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
- EVT DestVT = N->getValueType(0);
-
- assert(DestVT.getSizeInBits() > 128 &&
- "Custom sext/zext expansion needs >128-bit vector.");
- // If this is a normal length extension, use the default expansion.
- if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
- SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
- return SDValue();
-
- SDLoc dl(N);
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
- unsigned NumElts = SrcVT.getVectorNumElements();
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
-
- EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts);
- EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts/2);
- EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
- NumElts/2);
-
- Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
- SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(0));
- SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(NumElts/2));
- ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
- ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
-}
-
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
Op.getOperand(1), Op.getOperand(2));
}
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+
+ // Create stack object for sret.
+ const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
+ const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/false,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
- return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = ExpandVectorExtension(N, DAG);
- break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
llvm_unreachable("Expecting a BB with two successors!");
}
-namespace {
-// This class is a helper for lowering the COPY_STRUCT_BYVAL_I32 instruction.
-// It defines the operations needed to lower the byval copy. We use a helper
-// class because the opcodes and machine instructions are different for each
-// subtarget, but the overall algorithm for the lowering is the same. The
-// implementation of each operation will be defined separately for arm, thumb1,
-// and thumb2 targets by subclassing this base class. See
-// ARMTargetLowering::EmitStructByval() for how these operations are used.
-class TargetStructByvalEmitter {
-public:
- TargetStructByvalEmitter(const TargetInstrInfo *TII_,
- MachineRegisterInfo &MRI_,
- const TargetRegisterClass *TRC_)
- : TII(TII_), MRI(MRI_), TRC(TRC_) {}
-
- // Emit a post-increment load of "unit" size. The unit size is based on the
- // alignment of the struct being copied (4, 2, or 1 bytes). Alignments higher
- // than 4 are handled separately by using NEON instructions.
- //
- // \param baseReg the register holding the address to load.
- // \param baseOut the register to recieve the incremented address.
- // \returns the register holding the loaded value.
- virtual unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned baseReg,
- unsigned baseOut) = 0;
-
- // Emit a post-increment store of "unit" size. The unit size is based on the
- // alignment of the struct being copied (4, 2, or 1 bytes). Alignments higher
- // than 4 are handled separately by using NEON instructions.
- //
- // \param baseReg the register holding the address to store.
- // \param storeReg the register holding the value to store.
- // \param baseOut the register to recieve the incremented address.
- virtual void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned baseReg, unsigned storeReg,
- unsigned baseOut) = 0;
-
- // Emit a post-increment load of one byte.
- //
- // \param baseReg the register holding the address to load.
- // \param baseOut the register to recieve the incremented address.
- // \returns the register holding the loaded value.
- virtual unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned baseReg,
- unsigned baseOut) = 0;
-
- // Emit a post-increment store of one byte.
- //
- // \param baseReg the register holding the address to store.
- // \param storeReg the register holding the value to store.
- // \param baseOut the register to recieve the incremented address.
- virtual void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned baseReg, unsigned storeReg,
- unsigned baseOut) = 0;
-
- // Emit a load of a constant value.
- //
- // \param Constant the register holding the address to store.
- // \returns the register holding the loaded value.
- virtual unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned Constant,
- const DataLayout *DL) = 0;
-
- // Emit a subtract of a register minus immediate, with the immediate equal to
- // the "unit" size. The unit size is based on the alignment of the struct
- // being copied (16, 8, 4, 2, or 1 bytes).
- //
- // \param InReg the register holding the initial value.
- // \param OutReg the register to recieve the subtracted value.
- virtual void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned InReg, unsigned OutReg) = 0;
-
- // Emit a branch based on a condition code of not equal.
- //
- // \param TargetBB the destination of the branch.
- virtual void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, MachineBasicBlock *TargetBB) = 0;
-
- // Find the constant pool index for the given constant. This method is
- // implemented in the base class because it is the same for all subtargets.
- //
- // \param LoopSize the constant value for which the index should be returned.
- // \returns the constant pool index for the constant.
- unsigned getConstantPoolIndex(MachineFunction *MF, const DataLayout *DL,
- unsigned LoopSize) {
- MachineConstantPool *ConstantPool = MF->getConstantPool();
- Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
- const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
-
- // MachineConstantPool wants an explicit alignment.
- unsigned Align = DL->getPrefTypeAlignment(Int32Ty);
- if (Align == 0)
- Align = DL->getTypeAllocSize(C->getType());
- return ConstantPool->getConstantPoolIndex(C, Align);
- }
-
- // Return the register class used by the subtarget.
- //
- // \returns the target register class.
- const TargetRegisterClass *getTRC() const { return TRC; }
-
- virtual ~TargetStructByvalEmitter() {};
-
-protected:
- const TargetInstrInfo *TII;
- MachineRegisterInfo &MRI;
- const TargetRegisterClass *TRC;
-};
-
-class ARMStructByvalEmitter : public TargetStructByvalEmitter {
-public:
- ARMStructByvalEmitter(const TargetInstrInfo *TII, MachineRegisterInfo &MRI,
- unsigned LoadStoreSize)
- : TargetStructByvalEmitter(
- TII, MRI, (const TargetRegisterClass *)&ARM::GPRRegClass),
- UnitSize(LoadStoreSize),
- UnitLdOpc(LoadStoreSize == 4
- ? ARM::LDR_POST_IMM
- : LoadStoreSize == 2
- ? ARM::LDRH_POST
- : LoadStoreSize == 1 ? ARM::LDRB_POST_IMM : 0),
- UnitStOpc(LoadStoreSize == 4
- ? ARM::STR_POST_IMM
- : LoadStoreSize == 2
- ? ARM::STRH_POST
- : LoadStoreSize == 1 ? ARM::STRB_POST_IMM : 0) {}
-
- unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned baseOut) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch).addReg(
- baseOut, RegState::Define).addReg(baseReg).addReg(0).addImm(UnitSize));
- return scratch;
- }
-
- void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg, unsigned baseOut) {
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), baseOut).addReg(
- storeReg).addReg(baseReg).addReg(0).addImm(UnitSize));
- }
-
- unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned baseOut) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRB_POST_IMM), scratch)
- .addReg(baseOut, RegState::Define).addReg(baseReg)
- .addReg(0).addImm(1));
- return scratch;
- }
-
- void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg, unsigned baseOut) {
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::STRB_POST_IMM), baseOut)
- .addReg(storeReg).addReg(baseReg).addReg(0).addImm(1));
- }
-
- unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned Constant,
- const DataLayout *DL) {
- unsigned constReg = MRI.createVirtualRegister(TRC);
- unsigned Idx = getConstantPoolIndex(BB->getParent(), DL, Constant);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
- constReg, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
- return constReg;
- }
-
- void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned InReg, unsigned OutReg) {
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, dl, TII->get(ARM::SUBri), OutReg);
- AddDefaultCC(AddDefaultPred(MIB.addReg(InReg).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
- }
-
- void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- MachineBasicBlock *TargetBB) {
- BuildMI(*BB, MI, dl, TII->get(ARM::Bcc)).addMBB(TargetBB).addImm(ARMCC::NE)
- .addReg(ARM::CPSR);
- }
-
-private:
- const unsigned UnitSize;
- const unsigned UnitLdOpc;
- const unsigned UnitStOpc;
-};
-
-class Thumb2StructByvalEmitter : public TargetStructByvalEmitter {
-public:
- Thumb2StructByvalEmitter(const TargetInstrInfo *TII, MachineRegisterInfo &MRI,
- unsigned LoadStoreSize)
- : TargetStructByvalEmitter(
- TII, MRI, (const TargetRegisterClass *)&ARM::tGPRRegClass),
- UnitSize(LoadStoreSize),
- UnitLdOpc(LoadStoreSize == 4
- ? ARM::t2LDR_POST
- : LoadStoreSize == 2
- ? ARM::t2LDRH_POST
- : LoadStoreSize == 1 ? ARM::t2LDRB_POST : 0),
- UnitStOpc(LoadStoreSize == 4
- ? ARM::t2STR_POST
- : LoadStoreSize == 2
- ? ARM::t2STRH_POST
- : LoadStoreSize == 1 ? ARM::t2STRB_POST : 0) {}
-
- unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned baseOut) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch).addReg(
- baseOut, RegState::Define).addReg(baseReg).addImm(UnitSize));
- return scratch;
- }
-
- void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg, unsigned baseOut) {
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), baseOut)
- .addReg(storeReg).addReg(baseReg).addImm(UnitSize));
- }
-
- unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned baseOut) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::t2LDRB_POST), scratch)
- .addReg(baseOut, RegState::Define).addReg(baseReg)
- .addImm(1));
- return scratch;
- }
-
- void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg, unsigned baseOut) {
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::t2STRB_POST), baseOut)
- .addReg(storeReg).addReg(baseReg).addImm(1));
- }
-
- unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned Constant,
- const DataLayout *DL) {
- unsigned VConst = MRI.createVirtualRegister(TRC);
- unsigned Vtmp = VConst;
- if ((Constant & 0xFFFF0000) != 0)
- Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
- .addImm(Constant & 0xFFFF));
-
- if ((Constant & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), VConst)
- .addReg(Vtmp).addImm(Constant >> 16));
- return VConst;
- }
-
- void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned InReg, unsigned OutReg) {
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, dl, TII->get(ARM::t2SUBri), OutReg);
- AddDefaultCC(AddDefaultPred(MIB.addReg(InReg).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
- }
-
- void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- MachineBasicBlock *TargetBB) {
- BuildMI(BB, dl, TII->get(ARM::t2Bcc)).addMBB(TargetBB).addImm(ARMCC::NE)
- .addReg(ARM::CPSR);
- }
-
-private:
- const unsigned UnitSize;
- const unsigned UnitLdOpc;
- const unsigned UnitStOpc;
-};
-
-class Thumb1StructByvalEmitter : public TargetStructByvalEmitter {
-public:
- Thumb1StructByvalEmitter(const TargetInstrInfo *TII, MachineRegisterInfo &MRI,
- unsigned LoadStoreSize)
- : TargetStructByvalEmitter(
- TII, MRI, (const TargetRegisterClass *)&ARM::tGPRRegClass),
- UnitSize(LoadStoreSize),
- UnitLdOpc(LoadStoreSize == 4 ? ARM::tLDRi : LoadStoreSize == 2
- ? ARM::tLDRHi
- : LoadStoreSize == 1
- ? ARM::tLDRBi
- : 0),
- UnitStOpc(LoadStoreSize == 4 ? ARM::tSTRi : LoadStoreSize == 2
- ? ARM::tSTRHi
- : LoadStoreSize == 1
- ? ARM::tSTRBi
- : 0) {}
-
- void emitAddSubi8(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned opcode, unsigned baseReg, unsigned Imm,
- unsigned baseOut) {
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(opcode), baseOut);
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
MIB = AddDefaultT1CC(MIB);
- MIB.addReg(baseReg).addImm(Imm);
+ MIB.addReg(AddrIn).addImm(StSize);
AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
}
-
- unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned baseOut) {
- // load into scratch
- unsigned scratch = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch)
- .addReg(baseReg).addImm(0));
-
- // update base pointer
- emitAddSubi8(BB, MI, dl, ARM::tADDi8, baseReg, UnitSize, baseOut);
- return scratch;
- }
-
- void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg, unsigned baseOut) {
- // load into scratch
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc)).addReg(storeReg)
- .addReg(baseReg).addImm(0));
-
- // update base pointer
- emitAddSubi8(BB, MI, dl, ARM::tADDi8, baseReg, UnitSize, baseOut);
- }
-
- unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned baseOut) {
- // load into scratch
- unsigned scratch = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRBi), scratch)
- .addReg(baseReg).addImm(0));
-
- // update base pointer
- emitAddSubi8(BB, MI, dl, ARM::tADDi8, baseReg, 1, baseOut);
- return scratch;
- }
-
- void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg, unsigned baseOut) {
- // load into scratch
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tSTRBi)).addReg(storeReg)
- .addReg(baseReg).addImm(0));
-
- // update base pointer
- emitAddSubi8(BB, MI, dl, ARM::tADDi8, baseReg, 1, baseOut);
- }
-
- unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned Constant,
- const DataLayout *DL) {
- unsigned constReg = MRI.createVirtualRegister(TRC);
- unsigned Idx = getConstantPoolIndex(BB->getParent(), DL, Constant);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
- constReg, RegState::Define).addConstantPoolIndex(Idx));
- return constReg;
- }
-
- void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned InReg, unsigned OutReg) {
- emitAddSubi8(BB, MI, dl, ARM::tSUBi8, InReg, UnitSize, OutReg);
- }
-
- void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- MachineBasicBlock *TargetBB) {
- BuildMI(*BB, MI, dl, TII->get(ARM::tBcc)).addMBB(TargetBB).addImm(ARMCC::NE)
- .addReg(ARM::CPSR);
- }
-
-private:
- const unsigned UnitSize;
- const unsigned UnitLdOpc;
- const unsigned UnitStOpc;
-};
-
-// This class is a thin wrapper that delegates most of the work to the correct
-// TargetStructByvalEmitter implementation. It also handles the lowering for
-// targets that support neon because the neon implementation is the same for all
-// targets that support it.
-class StructByvalEmitter {
-public:
- StructByvalEmitter(unsigned LoadStoreSize, const ARMSubtarget *Subtarget,
- const TargetInstrInfo *TII_, MachineRegisterInfo &MRI_,
- const DataLayout *DL_)
- : UnitSize(LoadStoreSize),
- TargetEmitter(
- Subtarget->isThumb1Only()
- ? static_cast<TargetStructByvalEmitter *>(
- new Thumb1StructByvalEmitter(TII_, MRI_, LoadStoreSize))
- : Subtarget->isThumb2()
- ? static_cast<TargetStructByvalEmitter *>(
- new Thumb2StructByvalEmitter(TII_, MRI_,
- LoadStoreSize))
- : static_cast<TargetStructByvalEmitter *>(
- new ARMStructByvalEmitter(TII_, MRI_,
- LoadStoreSize))),
- TII(TII_), MRI(MRI_), DL(DL_),
- VecTRC(UnitSize == 16
- ? (const TargetRegisterClass *)&ARM::DPairRegClass
- : UnitSize == 8
- ? (const TargetRegisterClass *)&ARM::DPRRegClass
- : 0),
- VecLdOpc(UnitSize == 16 ? ARM::VLD1q32wb_fixed
- : UnitSize == 8 ? ARM::VLD1d32wb_fixed : 0),
- VecStOpc(UnitSize == 16 ? ARM::VST1q32wb_fixed
- : UnitSize == 8 ? ARM::VST1d32wb_fixed : 0) {}
-
- // Emit a post-increment load of "unit" size. The unit size is based on the
- // alignment of the struct being copied (16, 8, 4, 2, or 1 bytes). Loads of 16
- // or 8 bytes use NEON instructions to load the value.
- //
- // \param baseReg the register holding the address to load.
- // \param baseOut the register to recieve the incremented address. If baseOut
- // is 0 then a new register is created to hold the incremented address.
- // \returns a pair of registers holding the loaded value and the updated
- // address.
- std::pair<unsigned, unsigned> emitUnitLoad(MachineBasicBlock *BB,
- MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg,
- unsigned baseOut = 0) {
- unsigned scratch = 0;
- if (baseOut == 0)
- baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
- if (UnitSize >= 8) { // neon
- scratch = MRI.createVirtualRegister(VecTRC);
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(VecLdOpc), scratch).addReg(
- baseOut, RegState::Define).addReg(baseReg).addImm(0));
- } else {
- scratch = TargetEmitter->emitUnitLoad(BB, MI, dl, baseReg, baseOut);
- }
- return std::make_pair(scratch, baseOut);
- }
-
- // Emit a post-increment store of "unit" size. The unit size is based on the
- // alignment of the struct being copied (16, 8, 4, 2, or 1 bytes). Stores of
- // 16 or 8 bytes use NEON instructions to store the value.
- //
- // \param baseReg the register holding the address to store.
- // \param storeReg the register holding the value to store.
- // \param baseOut the register to recieve the incremented address. If baseOut
- // is 0 then a new register is created to hold the incremented address.
- // \returns the register holding the updated address.
- unsigned emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg,
- unsigned baseOut = 0) {
- if (baseOut == 0)
- baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
- if (UnitSize >= 8) { // neon
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(VecStOpc), baseOut)
- .addReg(baseReg).addImm(0).addReg(storeReg));
- } else {
- TargetEmitter->emitUnitStore(BB, MI, dl, baseReg, storeReg, baseOut);
- }
- return baseOut;
- }
-
- // Emit a post-increment load of one byte.
- //
- // \param baseReg the register holding the address to load.
- // \returns a pair of registers holding the loaded value and the updated
- // address.
- std::pair<unsigned, unsigned> emitByteLoad(MachineBasicBlock *BB,
- MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg) {
- unsigned baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
- unsigned scratch =
- TargetEmitter->emitByteLoad(BB, MI, dl, baseReg, baseOut);
- return std::make_pair(scratch, baseOut);
- }
-
- // Emit a post-increment store of one byte.
- //
- // \param baseReg the register holding the address to store.
- // \param storeReg the register holding the value to store.
- // \returns the register holding the updated address.
- unsigned emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned baseReg, unsigned storeReg) {
- unsigned baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
- TargetEmitter->emitByteStore(BB, MI, dl, baseReg, storeReg, baseOut);
- return baseOut;
- }
-
- // Emit a load of the constant LoopSize.
- //
- // \param LoopSize the constant to load.
- // \returns the register holding the loaded constant.
- unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
- DebugLoc &dl, unsigned LoopSize) {
- return TargetEmitter->emitConstantLoad(BB, MI, dl, LoopSize, DL);
- }
-
- // Emit a subtract of a register minus immediate, with the immediate equal to
- // the "unit" size. The unit size is based on the alignment of the struct
- // being copied (16, 8, 4, 2, or 1 bytes).
- //
- // \param InReg the register holding the initial value.
- // \param OutReg the register to recieve the subtracted value.
- void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- unsigned InReg, unsigned OutReg) {
- TargetEmitter->emitSubImm(BB, MI, dl, InReg, OutReg);
- }
-
- // Emit a branch based on a condition code of not equal.
- //
- // \param TargetBB the destination of the branch.
- void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
- MachineBasicBlock *TargetBB) {
- TargetEmitter->emitBranchNE(BB, MI, dl, TargetBB);
- }
-
- // Return the register class used by the subtarget.
- //
- // \returns the target register class.
- const TargetRegisterClass *getTRC() const { return TargetEmitter->getTRC(); }
-
-private:
- const unsigned UnitSize;
- OwningPtr<TargetStructByvalEmitter> TargetEmitter;
- const TargetInstrInfo *TII;
- MachineRegisterInfo &MRI;
- const DataLayout *DL;
-
- const TargetRegisterClass *VecTRC;
- const unsigned VecLdOpc;
- const unsigned VecStOpc;
-};
}
MachineBasicBlock *
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *VecTRC = 0;
+
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
if (Align & 1) {
UnitSize = 1;
UnitSize = 4;
}
- StructByvalEmitter ByvalEmitter(UnitSize, Subtarget, TII, MRI,
- getDataLayout());
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
+ : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0;
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- std::pair<unsigned, unsigned> res =
- ByvalEmitter.emitUnitLoad(BB, MI, dl, srcIn);
- unsigned scratch = res.first;
- srcIn = res.second;
- destIn = ByvalEmitter.emitUnitStore(BB, MI, dl, destIn, scratch);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
+ srcIn = srcOut;
+ destIn = destOut;
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
for (unsigned i = 0; i < BytesLeft; i++) {
- std::pair<unsigned, unsigned> res =
- ByvalEmitter.emitByteLoad(BB, MI, dl, srcIn);
- unsigned scratch = res.first;
- srcIn = res.second;
- destIn = ByvalEmitter.emitByteStore(BB, MI, dl, destIn, scratch);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
+ srcIn = srcOut;
+ destIn = destOut;
}
MI->eraseFromParent(); // The instruction is gone now.
return BB;
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
- unsigned varEnd = ByvalEmitter.emitConstantLoad(BB, MI, dl, LoopSize);
+ unsigned varEnd = MRI.createVirtualRegister(TRC);
+ if (IsThumb2) {
+ unsigned Vtmp = varEnd;
+ if ((LoopSize & 0xFFFF0000) != 0)
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF));
+
+ if ((LoopSize & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
+ .addReg(Vtmp).addImm(LoopSize >> 16));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
+ }
BB->addSuccessor(loopMBB);
// Generate the loop body:
// destPhi = PHI(destLoop, dst)
MachineBasicBlock *entryBB = BB;
BB = loopMBB;
- unsigned varLoop = MRI.createVirtualRegister(ByvalEmitter.getTRC());
- unsigned varPhi = MRI.createVirtualRegister(ByvalEmitter.getTRC());
- unsigned srcLoop = MRI.createVirtualRegister(ByvalEmitter.getTRC());
- unsigned srcPhi = MRI.createVirtualRegister(ByvalEmitter.getTRC());
- unsigned destLoop = MRI.createVirtualRegister(ByvalEmitter.getTRC());
- unsigned destPhi = MRI.createVirtualRegister(ByvalEmitter.getTRC());
+ unsigned varLoop = MRI.createVirtualRegister(TRC);
+ unsigned varPhi = MRI.createVirtualRegister(TRC);
+ unsigned srcLoop = MRI.createVirtualRegister(TRC);
+ unsigned srcPhi = MRI.createVirtualRegister(TRC);
+ unsigned destLoop = MRI.createVirtualRegister(TRC);
+ unsigned destPhi = MRI.createVirtualRegister(TRC);
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
.addReg(varLoop).addMBB(loopMBB)
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- {
- std::pair<unsigned, unsigned> res =
- ByvalEmitter.emitUnitLoad(BB, BB->end(), dl, srcPhi, srcLoop);
- unsigned scratch = res.first;
- ByvalEmitter.emitUnitStore(BB, BB->end(), dl, destPhi, scratch, destLoop);
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
- ByvalEmitter.emitSubImm(BB, BB->end(), dl, varPhi, varLoop);
- ByvalEmitter.emitBranchNE(BB, BB->end(), dl, loopMBB);
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- std::pair<unsigned, unsigned> res =
- ByvalEmitter.emitByteLoad(BB, StartOfExit, dl, srcIn);
- unsigned scratch = res.first;
- srcIn = res.second;
- destIn = ByvalEmitter.emitByteStore(BB, StartOfExit, dl, destIn, scratch);
+ unsigned srcOut = MRI.createVirtualRegister(TRC);
+ unsigned destOut = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
+ srcIn = srcOut;
+ destIn = destOut;
}
MI->eraseFromParent(); // The instruction is gone now.
case 'r':
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)