From 9fe2009956fc40f3aea46fb3c38dcfb61c4aca46 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 20 Jan 2011 08:34:58 +0000 Subject: [PATCH] Sorry, several patches in one. TargetInstrInfo: Change produceSameValue() to take MachineRegisterInfo as an optional argument. When in SSA form, targets can use it to make more aggressive equality analysis. Machine LICM: 1. Eliminate isLoadFromConstantMemory, use MI.isInvariantLoad instead. 2. Fix a bug which prevent CSE of instructions which are not re-materializable. 3. Use improved form of produceSameValue. ARM: 1. Teach ARM produceSameValue to look pass some PIC labels. 2. Look for operands from different loads of different constant pool entries which have same values. 3. Re-implement PIC GA materialization using movw + movt. Combine the pair with a "add pc" or "ldr [pc]" to form pseudo instructions. This makes it possible to re-materialize the instruction, allow machine LICM to hoist the set of instructions out of the loop and make it possible to CSE them. It's a bit hacky, but it significantly improve code quality. 4. Some minor bug fixes as well. With the fixes, using movw + movt to materialize GAs significantly outperform the load from constantpool method. 186.crafty and 255.vortex improved > 20%, 254.gap and 176.gcc ~10%. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123905 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 10 +- lib/CodeGen/MachineLICM.cpp | 52 +--- lib/CodeGen/TargetInstrInfoImpl.cpp | 6 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 50 +++- lib/Target/ARM/ARMBaseInstrInfo.h | 3 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 313 +++++++++++--------- lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 +- lib/Target/ARM/ARMISelLowering.cpp | 7 +- lib/Target/ARM/ARMInstrInfo.td | 23 +- lib/Target/ARM/ARMInstrThumb2.td | 17 +- lib/Target/ARM/ARMRegisterInfo.td | 2 +- lib/Target/ARM/ARMSchedule.td | 2 + lib/Target/ARM/ARMScheduleA8.td | 6 + lib/Target/ARM/ARMScheduleA9.td | 10 + lib/Target/ARM/ARMScheduleV6.td | 6 + test/CodeGen/{Thumb => ARM}/machine-licm.ll | 32 +- 16 files changed, 320 insertions(+), 223 deletions(-) rename test/CodeGen/{Thumb => ARM}/machine-licm.ll (69%) diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index b336ffd5a5d..cd22765f762 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -228,9 +228,12 @@ public: /// produceSameValue - Return true if two machine instructions would produce /// identical values. By default, this is only true when the two instructions - /// are deemed identical except for defs. + /// are deemed identical except for defs. If this function is called when the + /// IR is still in SSA form, the caller can pass the MachineRegisterInfo for + /// aggressive checks. virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const = 0; + const MachineInstr *MI1, + const MachineRegisterInfo *MRI = 0) const = 0; /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't @@ -677,7 +680,8 @@ public: virtual MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const; + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const; virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 7766e116d84..443fc2d97bd 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -208,10 +208,6 @@ namespace { /// specified instruction. void UpdateRegPressure(const MachineInstr *MI); - /// isLoadFromConstantMemory - Return true if the given instruction is a - /// load from constant memory. - bool isLoadFromConstantMemory(MachineInstr *MI); - /// ExtractHoistableLoad - Unfold a load from the given machineinstr if /// the load itself could be hoisted. Return the unfolded and hoistable /// load, or null if the load couldn't be unfolded or if it wouldn't @@ -773,25 +769,6 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { return false; } -/// isLoadFromConstantMemory - Return true if the given instruction is a -/// load from constant memory. Machine LICM will hoist these even if they are -/// not re-materializable. -bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { - if (!MI->getDesc().mayLoad()) return false; - if (!MI->hasOneMemOperand()) return false; - MachineMemOperand *MMO = *MI->memoperands_begin(); - if (MMO->isVolatile()) return false; - if (!MMO->getValue()) return false; - const PseudoSourceValue *PSV = dyn_cast(MMO->getValue()); - if (PSV) { - MachineFunction &MF = *MI->getParent()->getParent(); - return PSV->isConstant(MF.getFrameInfo()); - } else { - return AA->pointsToConstantMemory(AliasAnalysis::Location(MMO->getValue(), - MMO->getSize(), - MMO->getTBAAInfo())); - } -} /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -995,7 +972,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going to // be remat'ed. if (!TII->isTriviallyReMaterializable(&MI, AA) && - !isLoadFromConstantMemory(&MI)) + !MI.isInvariantLoad(AA)) return false; } @@ -1021,7 +998,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!isLoadFromConstantMemory(MI)) + if (!MI->isInvariantLoad(AA)) return 0; // Next determine the register class for a temporary register. @@ -1072,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; - // FIXME: For now, only hoist re-materilizable instructions. LICM will - // increase register pressure. We want to make sure it doesn't increase - // spilling. - if (TII->isTriviallyReMaterializable(MI, AA)) { - unsigned Opcode = MI->getOpcode(); - DenseMap >::iterator - CI = CSEMap.find(Opcode); - if (CI != CSEMap.end()) - CI->second.push_back(MI); - else { - std::vector CSEMIs; - CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(Opcode, CSEMIs)); - } + unsigned Opcode = MI->getOpcode(); + DenseMap >::iterator + CI = CSEMap.find(Opcode); + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); } } } @@ -1095,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - if (TII->produceSameValue(MI, PrevMI)) + if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) return PrevMI; } return 0; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 5fc7f323079..787c2712e75 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -166,8 +166,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } -bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const { +bool +TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 925569e9257..ef7458d270e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -568,7 +568,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 4; case ARM::MOVi32imm: case ARM::t2MOVi32imm: - case ARM::MOV_pic_ga: return 8; case ARM::CONSTPOOL_ENTRY: // If this machine instr is a constant pool entry, its size is recorded as @@ -1053,12 +1052,16 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { } bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const { + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { int Opcode = MI0->getOpcode(); - if (Opcode == ARM::t2LDRpci || + if (Opcode == ARM::LDRi12 || + Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || - Opcode == ARM::tLDRpci_pic) { + Opcode == ARM::tLDRpci_pic || + Opcode == ARM::MOV_pic_ga_add_pc || + Opcode == ARM::t2MOV_pic_ga_add_pc) { if (MI1->getOpcode() != Opcode) return false; if (MI0->getNumOperands() != MI1->getNumOperands()) @@ -1066,9 +1069,17 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, const MachineOperand &MO0 = MI0->getOperand(1); const MachineOperand &MO1 = MI1->getOperand(1); + if (Opcode == ARM::LDRi12 && (!MO0.isCPI() || !MO1.isCPI())) + return false; + if (MO0.getOffset() != MO1.getOffset()) return false; + if (Opcode == ARM::MOV_pic_ga_add_pc || + Opcode == ARM::t2MOV_pic_ga_add_pc) + // Ignore the PC labels. + return MO0.getGlobal() == MO1.getGlobal(); + const MachineFunction *MF = MI0->getParent()->getParent(); const MachineConstantPool *MCP = MF->getConstantPool(); int CPI0 = MO0.getIndex(); @@ -1080,6 +1091,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, ARMConstantPoolValue *ACPV1 = static_cast(MCPE1.Val.MachineCPVal); return ACPV0->hasSameValue(ACPV1); + } else if (Opcode == ARM::PICLDR) { + if (MI1->getOpcode() != Opcode) + return false; + if (MI0->getNumOperands() != MI1->getNumOperands()) + return false; + + unsigned Addr0 = MI0->getOperand(1).getReg(); + unsigned Addr1 = MI1->getOperand(1).getReg(); + if (Addr0 != Addr1) { + if (!MRI || + !TargetRegisterInfo::isVirtualRegister(Addr0) || + !TargetRegisterInfo::isVirtualRegister(Addr1)) + return false; + + // This assumes SSA form. + MachineInstr *Def0 = MRI->getVRegDef(Addr0); + MachineInstr *Def1 = MRI->getVRegDef(Addr1); + // Check if the loaded value, e.g. a constantpool of a global address, are + // the same. + if (!produceSameValue(Def0, Def1, MRI)) + return false; + } + + for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { + // %vreg12 = PICLDR %vreg11, 0, pred:14, pred:%noreg + const MachineOperand &MO0 = MI0->getOperand(i); + const MachineOperand &MO1 = MI1->getOperand(i); + if (!MO0.isIdenticalTo(MO1)) + return false; + } + return true; } return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0ea8a9600bf..1fb88726d0d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -298,7 +298,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const; + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const; /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to /// determine if two loads are loading from the same base address. It should diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2dde617560d..29d4e1c9d8e 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -38,6 +38,7 @@ namespace { const ARMBaseInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -48,12 +49,16 @@ namespace { private: void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); + bool ExpandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool ExpandMBB(MachineBasicBlock &MBB); void ExpandVLD(MachineBasicBlock::iterator &MBBI); void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned Opc, bool IsExt, unsigned NumRegs); + void ExpandMOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); }; char ARMExpandPseudo::ID = 0; } @@ -612,21 +617,85 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MI.eraseFromParent(); } -bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { - bool Modified = false; +void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; + const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); + MachineInstrBuilder LO16, HI16; + + if (!STI->hasV6T2Ops() && + (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { + // Expand into a movi + orr. + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); + unsigned ImmVal = (unsigned)MO.getImm(); + unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); + unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + LO16 = LO16.addImm(SOImmValV1); + HI16 = HI16.addImm(SOImmValV2); + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg).addReg(0); + HI16.addImm(Pred).addReg(PredReg).addReg(0); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + return; + } - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + unsigned LO16Opc = 0; + unsigned HI16Opc = 0; + if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { + LO16Opc = ARM::t2MOVi16; + HI16Opc = ARM::t2MOVTi16; + } else { + LO16Opc = ARM::MOVi16; + HI16Opc = ARM::MOVTi16; + } - bool ModifiedOp = true; - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - default: - ModifiedOp = false; - break; + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + } + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); +} + +bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + return false; case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -636,7 +705,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it // for us. Otherwise, expand to nothing. if (RI.hasBasePointer(MF)) { - ARMFunctionInfo *AFI = MF.getInfo(); int32_t NumBytes = AFI->getFramePtrSpillOffset(); unsigned FramePtr = RI.getFrameRegister(MF); assert(MF.getTarget().getFrameLowering()->hasFP(MF) && @@ -670,7 +738,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } MI.eraseFromParent(); - break; + return true; } case ARM::MOVsrl_flag: @@ -678,26 +746,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { // These are just fancy MOVs insructions. AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addReg(0) - .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr - : ARM_AM::asr), 1))) - .addReg(ARM::CPSR, RegState::Define); + .addOperand(MI.getOperand(1)) + .addReg(0) + .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr + : ARM_AM::asr), 1))) + .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); - break; + return true; } case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(1)) - .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) .addReg(0); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::TPsoft: { MachineInstrBuilder MIB = @@ -708,7 +776,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::t2LDRHpci: case ARM::t2LDRBpci: @@ -733,13 +801,14 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) - .addReg(ARM::PC) - .addOperand(MI.getOperand(1))); + .addReg(ARM::PC) + .addOperand(MI.getOperand(1))); (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) @@ -748,7 +817,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg); + TII->get(NewLdOpc), DstReg); if (Opcode == ARM::t2LDRpci_pic) MIB1.addReg(ARM::PC); MIB1.addOperand(MI.getOperand(1)); AddDefaultPred(MIB1); @@ -760,103 +829,56 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - break; + return true; } - case ARM::MOVi32imm: - case ARM::MOVCCi32imm: - case ARM::MOV_pic_ga: - case ARM::t2MOVi32imm: - case ARM::t2MOVCCi32imm: - case ARM::t2MOV_pic_ga: { - unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + case ARM::MOV_pic_ga_add_pc: + case ARM::MOV_pic_ga_ldr: + case ARM::t2MOV_pic_ga_add_pc: { + // Expand into movw + movw + add pc / ldr [pc] + unsigned LabelId = AFI->createPICLabelUId(); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); - bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; - bool isPIC_GA = (Opcode == ARM::t2MOV_pic_ga || Opcode == ARM::MOV_pic_ga); - const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); - MachineInstrBuilder LO16, HI16; - - if (!STI->hasV6T2Ops() && - (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { - // Expand into a movi + orr. - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); - - assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); - unsigned ImmVal = (unsigned)MO.getImm(); - unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); - unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); - LO16 = LO16.addImm(SOImmValV1); - HI16 = HI16.addImm(SOImmValV2); - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - LO16.addImm(Pred).addReg(PredReg).addReg(0); - HI16.addImm(Pred).addReg(PredReg).addReg(0); - TransferImpOps(MI, LO16, HI16); - MI.eraseFromParent(); - break; - } - - unsigned LO16Opc = 0; - unsigned HI16Opc = 0; - if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { - LO16Opc = ARM::t2MOVi16; - HI16Opc = ARM::t2MOVTi16; - } else if (Opcode == ARM::MOV_pic_ga) { - LO16Opc = ARM::MOVi16_pic_ga; - HI16Opc = ARM::MOVTi16_pic_ga; - } else if (Opcode == ARM::t2MOV_pic_ga) { - LO16Opc = ARM::t2MOVi16_pic_ga; - HI16Opc = ARM::t2MOVTi16_pic_ga; - } else { - LO16Opc = ARM::MOVi16; - HI16Opc = ARM::MOVTi16; - } - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + const MachineOperand &MO1 = MI.getOperand(1); + const GlobalValue *GV = MO1.getGlobal(); + unsigned TF = MO1.getTargetFlags(); + bool isARM = Opcode != ARM::t2MOV_pic_ga_add_pc; + unsigned LO16Opc = isARM ? ARM::MOVi16_pic_ga : ARM::t2MOVi16_pic_ga; + unsigned HI16Opc = isARM ? ARM::MOVTi16_pic_ga : ARM::t2MOVTi16_pic_ga; + unsigned PICAddOpc = isARM + ? (Opcode == ARM::MOV_pic_ga_ldr ? ARM::PICLDR : ARM::PICADD) + : ARM::tPICADD; + MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(LO16Opc), DstReg) + .addGlobalAddress(GV, MO1.getOffset(), + TF | ARMII::MO_LO16_NONLAZY_PIC) + .addImm(LabelId); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) + .addReg(DstReg) + .addGlobalAddress(GV, MO1.getOffset(), + TF | ARMII::MO_HI16_NONLAZY_PIC) + .addImm(LabelId); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(PICAddOpc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else if (isPIC_GA) { - unsigned LabelId = MI.getOperand(2).getImm(); - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), - TF | ARMII::MO_LO16_NONLAZY_PIC) - .addImm(LabelId); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), - TF | ARMII::MO_HI16_NONLAZY_PIC) - .addImm(LabelId); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - } - - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - - if (!isPIC_GA) { - LO16.addImm(Pred).addReg(PredReg); - HI16.addImm(Pred).addReg(PredReg); + .addReg(DstReg).addImm(LabelId); + if (isARM) { + AddDefaultPred(MIB2); + if (Opcode == ARM::MOV_pic_ga_ldr) + (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } - TransferImpOps(MI, LO16, HI16); + TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - break; + return true; } + case ARM::MOVi32imm: + case ARM::MOVCCi32imm: + case ARM::t2MOVi32imm: + case ARM::t2MOVCCi32imm: + ExpandMOV32BitImm(MBB, MBBI); + return true; + case ARM::VMOVQQ: { unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); @@ -869,18 +891,18 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(EvenDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); + .addReg(EvenDst, + RegState::Define | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(OddDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); + .addReg(OddDst, + RegState::Define | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); - break; + return true; } case ARM::VLDMQIA: @@ -911,7 +933,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::VSTMQIA: @@ -943,7 +965,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::VDUPfqf: case ARM::VDUPfdf:{ @@ -954,7 +976,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, - Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); + Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); // The lane is [0,1] for the containing DReg superregister. // Copy the dst/src register operands. MIB.addOperand(MI.getOperand(OpIdx++)); @@ -968,7 +990,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::VLD1q8Pseudo: @@ -1044,7 +1066,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VLD4DUPd16Pseudo_UPD: case ARM::VLD4DUPd32Pseudo_UPD: ExpandVLD(MBBI); - break; + return true; case ARM::VST1q8Pseudo: case ARM::VST1q16Pseudo: @@ -1095,7 +1117,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VST4q16oddPseudo_UPD: case ARM::VST4q32oddPseudo_UPD: ExpandVST(MBBI); - break; + return true; case ARM::VLD1LNq8Pseudo: case ARM::VLD1LNq16Pseudo: @@ -1170,18 +1192,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VST4LNq16Pseudo_UPD: case ARM::VST4LNq32Pseudo_UPD: ExpandLaneOp(MBBI); - break; - - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break; - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break; - case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break; - case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break; - case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break; - } + return true; + + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + } + + return false; +} - if (ModifiedOp) - Modified = true; +bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + Modified |= ExpandMI(MBB, MBBI); MBBI = NMBBI; } @@ -1192,6 +1222,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getTarget().getInstrInfo()); TRI = MF.getTarget().getRegisterInfo(); STI = &MF.getTarget().getSubtarget(); + AFI = MF.getInfo(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 299174b6edd..7e1f046c6cb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -880,8 +880,8 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); SDValue N1 = N.getOperand(1); - Label = CurDAG->getTargetConstant(cast(N1)->getZExtValue(), - MVT::i32); + Label = CurDAG->getTargetConstant(cast(N1)->getZExtValue(), + MVT::i32); return true; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f4d16fc167f..a6e1fe79c19 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2003,13 +2003,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - // FIXME: Not a constant pool! - unsigned PICLabelIndex = AFI->createPICLabelUId(); - SDValue PICLabel = DAG.getConstant(PICLabelIndex, MVT::i32); SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT), - PICLabel); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(), false, false, 0); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8018fe748af..b5ac6f8d0c0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -70,7 +70,7 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; -def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntBinOp>; +def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -3408,11 +3408,22 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; -let isReMaterializable = 1 in -def MOV_pic_ga : PseudoInst<(outs GPR:$dst), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVix2, - [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>, - Requires<[IsARM, UseMovt]>; +// Pseudo instruction that combines movw + movt + add pc. +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def MOV_pic_ga_add_pc : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +let AddedComplexity = 10 in +def MOV_pic_ga_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2ld, + [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, UseMovt]>; +} // isReMaterializable // ConstantPool, GlobalAddress, and JumpTable def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index e9c7513c966..4954a7ceb42 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3245,12 +3245,15 @@ def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set rGPR:$dst, (i32 imm:$src))]>, Requires<[IsThumb, HasV6T2]>; -// Materialize GA with movw + movt. +// Pseudo instruction that combines movw + movt + add pc. +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. let isReMaterializable = 1 in -def t2MOV_pic_ga : PseudoInst<(outs rGPR:$dst), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVix2, - [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>, - Requires<[IsThumb2, UseMovt]>; +def t2MOV_pic_ga_add_pc : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsThumb2, UseMovt]>; // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, @@ -3266,9 +3269,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // be expanded into two instructions late to allow if-conversion and // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in -def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), +def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), IIC_iLoadiALU, - [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), + [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 8e8587ab1d3..22d15b572dd 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -260,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC -// or SP (R13 or R15) are used. The ARM ARM refers to these operands +// or SP (R13 or R15) are used. The ARM ISA refers to these operands // via the BadReg() pseudo-code description. def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR]> { diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 61dd3be8421..958c5c64701 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -38,6 +38,8 @@ def IIC_iMOVr : InstrItinClass; def IIC_iMOVsi : InstrItinClass; def IIC_iMOVsr : InstrItinClass; def IIC_iMOVix2 : InstrItinClass; +def IIC_iMOVix2addpc : InstrItinClass; +def IIC_iMOVix2ld : InstrItinClass; def IIC_iMVNi : InstrItinClass; def IIC_iMVNr : InstrItinClass; def IIC_iMVNsi : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 33ba683828e..8d86c01dc74 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -71,6 +71,12 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData], [1, 1, 1]>, InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LSPipe]>], [5]>, // // Move instructions, conditional InstrItinData], [2]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index a9632be61f2..82c6735f1b1 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -50,6 +50,16 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_ALU0, A9_ALU1]>, InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, + InstrItinData, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, + InstrItinData, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [5]>, // // MVN instructions InstrItinData, diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 4c0e496942f..c1880a72fff 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -64,6 +64,12 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData], [3, 2, 1]>, InstrItinData, InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData, + InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [3]>, + InstrItinData, + InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [5]>, // // Move instructions, conditional InstrItinData], [3]>, diff --git a/test/CodeGen/Thumb/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll similarity index 69% rename from test/CodeGen/Thumb/machine-licm.ll rename to test/CodeGen/ARM/machine-licm.ll index a87e82c21dd..16c15fd2136 100644 --- a/test/CodeGen/Thumb/machine-licm.ll +++ b/test/CodeGen/ARM/machine-licm.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=ARM ; rdar://7353541 ; rdar://7354376 @@ -9,19 +10,30 @@ define void @t(i32* nocapture %vals, i32 %c) nounwind { entry: -; CHECK: t: +; ARM: t: +; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0 +; ARM-NOT: ldr r{{[0-9]+}}, LCPI0_1 +; ARM: LPC0_0: +; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]] +; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}] + +; THUMB: t: %0 = icmp eq i32 %c, 0 ; [#uses=1] br i1 %0, label %return, label %bb.nph bb.nph: ; preds = %entry -; CHECK: BB#1 -; CHECK: ldr.n r2, LCPI0_0 -; CHECK: add r2, pc -; CHECK: ldr r{{[0-9]+}}, [r2] -; CHECK: LBB0_2 -; CHECK: LCPI0_0: -; CHECK-NOT: LCPI0_1: -; CHECK: .section +; ARM: LCPI0_0: +; ARM-NOT: LCPI0_1: +; ARM: .section + +; THUMB: BB#1 +; THUMB: ldr.n r2, LCPI0_0 +; THUMB: add r2, pc +; THUMB: ldr r{{[0-9]+}}, [r2] +; THUMB: LBB0_2 +; THUMB: LCPI0_0: +; THUMB-NOT: LCPI0_1: +; THUMB: .section %.pre = load i32* @GV, align 4 ; [#uses=1] br label %bb -- 2.34.1