X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMBaseInstrInfo.cpp;h=1c6c210dae8d93ef7e0f47827c96883f398870cb;hb=5d0f7af3dc42d7bc843858317fba3bb91c44d68f;hp=ed8b9cd9a1c043fcb0bef101024f11b258c3bb1b;hpb=bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ed8b9cd9a1c..1c6c210dae8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMFeatures.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -31,16 +32,19 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#define GET_INSTRINFO_CTOR -#include "ARMGenInstrInfo.inc" - using namespace llvm; +#define DEBUG_TYPE "arm-instrinfo" + +#define GET_INSTRINFO_CTOR_DTOR +#include "ARMGenInstrInfo.inc" + static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); @@ -113,8 +117,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { if (Subtarget.isThumb2() || Subtarget.hasVFP2()) - return (ScheduleHazardRecognizer *) - new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -125,14 +128,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // FIXME: Thumb2 support. if (!EnableARM3Addr) - return NULL; + return nullptr; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { - default: return NULL; + default: return nullptr; case ARMII::IndexModePre: isPre = true; break; @@ -144,10 +147,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // operation. unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) - return NULL; + return nullptr; - MachineInstr *UpdateMI = NULL; - MachineInstr *MemMI = NULL; + MachineInstr *UpdateMI = nullptr; + MachineInstr *MemMI = nullptr; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); @@ -169,7 +172,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ARM_AM::getSOImmVal(Amt) == -1) // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! - return NULL; + return nullptr; UpdateMI = BuildMI(MF, MI->getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) @@ -273,98 +276,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. + TBB = nullptr; + FBB = nullptr; + MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) - return false; + return false; // Empty blocks are easy. --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - // Get the last instruction in the block. - MachineInstr *LastInst = I; + // Walk backwards from the end of the basic block until the branch is + // analyzed or we give up. + while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; + // Flag to be raised on unanalyzeable instructions. This is useful in cases + // where we want to clean up on the end of the basic block before we bail + // out. + bool CantAnalyze = false; + + // Skip over DEBUG values and predicated nonterminators. + while (I->isDebugValue() || !I->isTerminator()) { + if (I == MBB.begin()) + return false; + --I; } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; + + if (isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode())) { + // Indirect branches and jump tables can't be analyzed, but we still want + // to clean up any instructions at the tail of the basic block. + CantAnalyze = true; + } else if (isUncondBranchOpcode(I->getOpcode())) { + TBB = I->getOperand(0).getMBB(); + } else if (isCondBranchOpcode(I->getOpcode())) { + // Bail out if we encounter multiple conditional branches. + if (!Cond.empty()) + return true; + + assert(!FBB && "FBB should have been null."); + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(I->getOperand(1)); + Cond.push_back(I->getOperand(2)); + } else if (I->isReturn()) { + // Returns can't be analyzed, but we should run cleanup. + CantAnalyze = !isPredicated(I); + } else { + // We encountered other unrecognized terminator. Bail out immediately. + return true; } - return true; // Can't handle indirect branch. - } - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); + // Cleanup code - to be run for unpredicated unconditional branches and + // returns. + if (!isPredicated(I) && + (isUncondBranchOpcode(I->getOpcode()) || + isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode()) || + I->isReturn())) { + // Forget any previous condition branch information - it no longer applies. + Cond.clear(); + FBB = nullptr; + + // If we can modify the function, delete everything below this + // unconditional branch. + if (AllowModify) { + MachineBasicBlock::iterator DI = std::next(I); + while (DI != MBB.end()) { + MachineInstr *InstToDelete = DI; + ++DI; + InstToDelete->eraseFromParent(); + } } } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (CantAnalyze) + return true; - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } + if (I == MBB.begin()) + return false; - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((isJumpTableBranchOpcode(SecondLastOpc) || - isIndirectBranchOpcode(SecondLastOpc)) && - isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; + --I; } - // Otherwise, can't handle this. - return true; + // We made it past the terminators without bailing out - we must have + // analyzed this branch successfully. + return false; } @@ -413,7 +408,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); @@ -529,13 +524,35 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { - ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo(); - return AFI->isThumb2Function(); + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo(); + + if (AFI->isThumb2Function()) { + if (getSubtarget().restrictIT()) + return isV8EligibleForIT(MI); + } else { // non-Thumb + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; + } + + return true; +} + +namespace llvm { +template <> bool IsCPSRDead(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (!MO.isDead()) + return false; } + // all definitions of CPSR are dead return true; } +} /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. LLVM_ATTRIBUTE_NOINLINE @@ -561,15 +578,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is an inline asm, measure it. if (MI->getOpcode() == ARM::INLINEASM) return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; unsigned Opc = MI->getOpcode(); switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: + default: + // pseudo-instruction sizes are zero. return 0; case TargetOpcode::BUNDLE: return getInstBundleLength(MI); @@ -613,7 +625,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); + assert(MJTI != nullptr); const std::vector &JT = MJTI->getJumpTables(); assert(JTI < JT.size()); // Thumb instructions are 2 byte aligned, but JT entries are 4 byte @@ -632,9 +644,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ++NumEntries; return NumEntries * EntrySize + InstSize; } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; } } @@ -654,16 +663,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { bool GPRDest = ARM::GPRRegClass.contains(DestReg); - bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); if (GPRDest && GPRSrc) { AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)))); + .addReg(SrcReg, getKillRegState(KillSrc)))); return; } bool SPRDest = ARM::SPRRegClass.contains(DestReg); - bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); unsigned Opc = 0; if (SPRDest && SPRSrc) @@ -692,26 +701,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Spacing = 1; // Use VORRq when possible. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; - else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 2; + } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 4; // Fall back to VMOVD. - else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; - else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; - else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; - else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; - - else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; - else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; - else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { + Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; + BeginIdx = ARM::gsub_0; + SubRegs = 2; + } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + Spacing = 2; + } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + Spacing = 2; + } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + Spacing = 2; + } assert(Opc && "Impossible reg-to-reg copy"); @@ -720,26 +750,28 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy register tuples backward when the first Dest reg overlaps with SrcReg. if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); Spacing = -Spacing; } #ifndef NDEBUG SmallSet DstRegs; #endif for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); assert(Dst && Src && "Bad sub-register"); #ifndef NDEBUG assert(!DstRegs.count(Src) && "destructive vector copy"); DstRegs.insert(Dst); #endif - Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src); + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); // VORR takes two source operands. if (Opc == ARM::VORRq) Mov.addReg(Src); Mov = AddDefaultPred(Mov); + // MOVr can set CC. + if (Opc == ARM::MOVr) + Mov = AddDefaultCC(Mov); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -747,10 +779,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Mov->addRegisterKilled(SrcReg, TRI); } -static const -MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, - unsigned Reg, unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) { +const MachineInstrBuilder & +ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const { if (!SubIdx) return MIB.addReg(Reg, State); @@ -795,12 +827,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + if (Subtarget.hasV5TEOps()) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to STM instruction, which has existed since the dawn of + // time. + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + } } else llvm_unreachable("Unknown reg class!"); break; @@ -948,7 +990,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -975,12 +1016,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MachineInstrBuilder MIB; + + if (Subtarget.hasV5TEOps()) { + MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to LDM instruction, which has existed since the dawn of + // time. + MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + } + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else @@ -1187,16 +1240,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ return true; } -MachineInstr* -ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -1210,7 +1253,8 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { static_cast(MCPE.Val.MachineCPVal); unsigned PCLabelId = AFI->createPICLabelUId(); - ARMConstantPoolValue *NewCPV = 0; + ARMConstantPoolValue *NewCPV = nullptr; + // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR @@ -1293,10 +1337,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || - Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; @@ -1308,10 +1353,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::MOV_ga_dyn || + if (Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); @@ -1399,9 +1445,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1418,8 +1466,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1466,7 +1516,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, if ((Offset2 - Offset1) / 8 > 64) return false; - if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + // Check if the machine opcodes are different. If they are different + // then we consider them to not be of the same base address, + // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. + // In this case, they are considered to be the same because they are different + // encoding forms of the same basic instruction. + if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && + !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && + Load2->getMachineOpcode() == ARM::t2LDRBi12) || + (Load1->getMachineOpcode() == ARM::t2LDRBi12 && + Load2->getMachineOpcode() == ARM::t2LDRBi8))) return false; // FIXME: overly conservative? // Four loads in a row should be sufficient. @@ -1489,7 +1548,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isPosition()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1605,10 +1664,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return NULL; + return nullptr; MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) - return NULL; + return nullptr; // After swapping the MOVCC operands, also invert the condition. MI->getOperand(MI->findFirstPredOperandIdx()) .setImm(ARMCC::getOppositeCondition(CC)); @@ -1624,35 +1683,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) - return 0; + return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) - return 0; + return nullptr; MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) - return 0; + return nullptr; // MI is folded into the MOVCC by predicating it. if (!MI->isPredicable()) - return 0; + return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); // Reject frame index operands, PEI can't handle the predicated pseudos. if (MO.isFI() || MO.isCPI() || MO.isJTI()) - return 0; + return nullptr; if (!MO.isReg()) continue; // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) - return 0; + return nullptr; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - return 0; + return nullptr; if (MO.isDef() && !MO.isDead()) - return 0; + return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) - return 0; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, + DontMoveAcrossStores)) + return nullptr; return MI; } @@ -1681,19 +1741,25 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); if (!DefMI) - return 0; + return nullptr; + + // Find new register class to use. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + unsigned DestReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); + if (!MRI.constrainRegClass(DestReg, PreviousClass)) + return nullptr; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - DefMI->getDesc(), - MI->getOperand(0).getReg()); + DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1716,7 +1782,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // register operand tied to the first def. // The tie makes the register allocator ensure the FalseReg is allocated the // same register as operand 0. - MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); @@ -1776,6 +1841,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (NumBytes == 0 && DestReg != BaseReg) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); + return; + } + bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1799,6 +1872,125 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } +static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI, + MachineInstr *MI) { + for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true); + Subreg.isValid(); ++Subreg) + if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) != + MachineBasicBlock::LQR_Dead) + return true; + return false; +} +bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, + unsigned NumBytes) { + // This optimisation potentially adds lots of load and store + // micro-operations, it's only really a great benefit to code-size. + if (!Subtarget.isMinSize()) + return false; + + // If only one register is pushed/popped, LLVM can use an LDR/STR + // instead. We can't modify those so make sure we're dealing with an + // instruction we understand. + bool IsPop = isPopOpcode(MI->getOpcode()); + bool IsPush = isPushOpcode(MI->getOpcode()); + if (!IsPush && !IsPop) + return false; + + bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || + MI->getOpcode() == ARM::VLDMDIA_UPD; + bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || + MI->getOpcode() == ARM::tPOP || + MI->getOpcode() == ARM::tPOP_RET; + + assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && + MI->getOperand(1).getReg() == ARM::SP)) && + "trying to fold sp update into non-sp-updating push/pop"); + + // The VFP push & pop act on D-registers, so we can only fold an adjustment + // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try + // if this is violated. + if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) + return false; + + // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ + // pred) so the list starts at 4. Thumb1 starts after the predicate. + int RegListIdx = IsT1PushPop ? 2 : 4; + + // Calculate the space we'll need in terms of registers. + unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); + unsigned RD0Reg, RegsNeeded; + if (IsVFPPushPop) { + RD0Reg = ARM::D0; + RegsNeeded = NumBytes / 8; + } else { + RD0Reg = ARM::R0; + RegsNeeded = NumBytes / 4; + } + + // We're going to have to strip all list operands off before + // re-adding them since the order matters, so save the existing ones + // for later. + SmallVector RegList; + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + RegList.push_back(MI->getOperand(i)); + + const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + + // Now try to find enough space in the reglist to allocate NumBytes. + for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; + --CurReg) { + if (!IsPop) { + // Pushing any register is completely harmless, mark the + // register involved as undef since we don't care about it in + // the slightest. + RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, + false, false, true)); + --RegsNeeded; + continue; + } + + // However, we can only pop an extra register if it's not live. For + // registers live within the function we might clobber a return value + // register; the other way a register can be live here is if it's + // callee-saved. + // TODO: Currently, computeRegisterLiveness() does not report "live" if a + // sub reg is live. When computeRegisterLiveness() works for sub reg, it + // can replace isAnySubRegLive(). + if (isCalleeSavedRegister(CurReg, CSRegs) || + isAnySubRegLive(CurReg, TRI, MI)) { + // VFP pops don't allow holes in the register list, so any skip is fatal + // for our transformation. GPR pops do, so we should just keep looking. + if (IsVFPPushPop) + return false; + else + continue; + } + + // Mark the unimportant registers as in the POP. + RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, + true)); + --RegsNeeded; + } + + if (RegsNeeded > 0) + return false; + + // Finally we know we can profitably perform the optimisation so go + // ahead: strip all existing registers off and add them back again + // in the right order. + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + MI->RemoveOperand(i); + + // Add the complete list back in. + MachineInstrBuilder MIB(MF, &*MI); + for (int i = RegList.size() - 1; i >= 0; --i) + MIB.addOperand(RegList[i]); + + return true; +} + bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { @@ -1992,7 +2184,7 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, // Walk down one instruction which is potentially an 'and'. const MachineInstr &Copy = *MI; MachineBasicBlock::iterator AND( - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); if (AND == MI->getParent()->end()) return false; MI = AND; return isSuitableForMask(MI, Copy.getOperand(0).getReg(), @@ -2068,9 +2260,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { - MI = 0; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), - UE = MRI->use_end(); UI != UE; ++UI) { + MI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); + UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || @@ -2094,17 +2287,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) - MI = NULL; + MI = nullptr; else return false; } @@ -2205,8 +2398,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, isSafe = true; break; } - // Condition code is after the operand before CPSR. - ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } + if (Sub) { ARMCC::CondCodes NewCC = getSwappedCondition(CC); if (NewCC == ARMCC::AL) @@ -2217,11 +2434,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), - NewCC)); - } - else + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) + return false; + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); + } + } else switch (CC) { default: // CPSR can be used multiple times, we should continue. @@ -2753,7 +2973,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, break; } return UOps; - } else if (Subtarget.isCortexA8()) { + } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2790,7 +3010,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 DefCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -2831,7 +3051,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // 4 registers would be issued: 1, 2, 1. // 5 registers would be issued: 1, 2, 2. DefCycle = RegNo / 2; @@ -2865,7 +3085,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 UseCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -2905,7 +3125,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { UseCycle = RegNo / 2; if (UseCycle < 2) UseCycle = 2; @@ -3042,8 +3262,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, Dist = 0; MachineBasicBlock::const_iterator I = MI; ++I; - MachineBasicBlock::const_instr_iterator II = - llvm::prior(I.getInstrIterator()); + MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); assert(II->isInsideBundle() && "Empty bundle?"); int Idx = -1; @@ -3082,7 +3301,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, if (Idx == -1) { Dist = 0; - return 0; + return nullptr; } UseIdx = Idx; @@ -3096,7 +3315,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -3397,7 +3616,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9() || + Subtarget.isCortexA7())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3490,6 +3710,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -3506,6 +3727,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: @@ -3577,6 +3799,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { + if (MI->isCopyLike() || MI->isInsertSubreg() || + MI->isRegSequence() || MI->isImplicitDef()) + return 0; + + if (MI->isBundle()) + return 0; + + const MCInstrDesc &MCID = MI->getDesc(); + + if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + // When predicated, CPSR is an additional source operand for CPSR updating + // instructions, this apparently increases their latencies. + return 1; + } + return 0; +} + unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3658,8 +3898,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, - /*FindMin=*/false); + int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); if (Latency <= 3) @@ -3734,9 +3973,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) @@ -4023,14 +4262,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. // // FCONSTD can be used as a dependency-breaking instruction. - - unsigned ARMBaseInstrInfo:: getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - // Only Swift has partial register update problems. - if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + if (!SwiftPartialUpdateClearance || + !(Subtarget.isSwift() || Subtarget.isCortexA15())) return 0; assert(TRI && "Need TRI instance"); @@ -4056,7 +4293,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI, // Explicitly reads the dependency. case ARM::VLD1LNd32: - UseOp = 1; + UseOp = 3; break; default: return 0; @@ -4112,7 +4349,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines // the full D-register by loading the same value to both lanes. The // instruction is micro-coded with 2 uops, so don't do this until we can - // properly schedule micro-coded instuctions. The dispatcher stalls cause + // properly schedule micro-coded instructions. The dispatcher stalls cause // too big regressions. // Insert the dependency-breaking FCONSTD before MI. @@ -4122,6 +4359,43 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, MI->addRegisterKilled(DReg, TRI, true); } +void ARMBaseInstrInfo::getUnconditionalBranch( + MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { + if (Subtarget.isThumb()) + Branch.setOpcode(ARM::tB); + else if (Subtarget.isThumb2()) + Branch.setOpcode(ARM::t2B); + else + Branch.setOpcode(ARM::Bcc); + + Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); + Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); + Branch.addOperand(MCOperand::CreateReg(0)); +} + +void ARMBaseInstrInfo::getTrap(MCInst &MI) const { + if (Subtarget.isThumb()) + MI.setOpcode(ARM::tTRAP); + else if (Subtarget.useNaClTrap()) + MI.setOpcode(ARM::TRAPNaCl); + else + MI.setOpcode(ARM::TRAP); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } + +bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { + if (MI->getNumOperands() < 4) + return true; + unsigned ShOpVal = MI->getOperand(3).getImm(); + unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); + // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. + if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || + ((ShImm == 1 || ShImm == 2) && + ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) + return true; + + return false; +}