#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
+static cl::opt<bool>
+OldARMIfCvt("old-arm-ifcvt", cl::Hidden,
+ cl::desc("Use old-style ARM if-conversion heuristics"));
+
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
: TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
Subtarget(STI) {
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
- unsigned TSFlags = MI->getDesc().TSFlags;
+ uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return NULL;
return NewMIs[0];
}
+bool
+ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // Insert the spill to the stack frame. The register is killed at the spill
+ //
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ storeRegToStackSlot(MBB, MI, Reg, isKill,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ return true;
+}
+
// Branch analysis.
bool
ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
return true;
// If the block ends with a B and a Bcc, handle it.
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(SecondLastInst->getOperand(1));
unsigned
ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
// Basic size info comes from the TSFlags field.
const TargetInstrDesc &TID = MI->getDesc();
- unsigned TSFlags = TID.TSFlags;
+ uint64_t TSFlags = TID.TSFlags;
unsigned Opc = MI->getOpcode();
switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
llvm_unreachable("Unknown or unset size field for instr!");
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
case ARMII::Size2Bytes: return 2; // Thumb1 instruction.
case ARMII::SizeSpecial: {
switch (Opc) {
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
case ARM::CONSTPOOL_ENTRY:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
- return 24;
+ return 20;
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 14;
+ return 12;
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd:
return 0; // Not reached
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool
-ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- switch (MI.getOpcode()) {
- default: break;
- case ARM::VMOVS:
- case ARM::VMOVD:
- case ARM::VMOVDneon:
- case ARM::VMOVQ:
- case ARM::VMOVQQ : {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- case ARM::MOVr:
- case ARM::tMOVr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2gpr:
- case ARM::t2MOVr: {
- assert(MI.getDesc().getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "Invalid ARM MOV instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
}
- return false;
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQ;
+ else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQ;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQQQ;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ AddDefaultPred(MIB);
}
-unsigned
-ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case ARM::LDR:
- case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isReg() &&
- MI->getOperand(3).isImm() &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
break;
- case ARM::t2LDRi12:
- case ARM::tRestore:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ case ARM::SPRRegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+ .addMemOperand(MMO));
}
break;
- case ARM::VLDRD:
- case ARM::VLDRS:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
}
break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
-
- return 0;
}
unsigned
break;
case ARM::t2STRi12:
case ARM::tSpill:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
return MI->getOperand(0).getReg();
}
break;
- case ARM::VSTRD:
- case ARM::VSTRS:
+ case ARM::VST1q64Pseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQ:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
+ MI->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia) &&
+ MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
return 0;
}
-bool
-ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (DestRC == ARM::tGPRRegisterClass)
- DestRC = ARM::GPRRegisterClass;
- if (SrcRC == ARM::tGPRRegisterClass)
- SrcRC = ARM::GPRRegisterClass;
-
- // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
- if (DestRC == ARM::DPR_8RegisterClass)
- DestRC = ARM::DPR_VFP2RegisterClass;
- if (SrcRC == ARM::DPR_8RegisterClass)
- SrcRC = ARM::DPR_VFP2RegisterClass;
-
- // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
- if (DestRC == ARM::QPR_VFP2RegisterClass ||
- DestRC == ARM::QPR_8RegisterClass)
- DestRC = ARM::QPRRegisterClass;
- if (SrcRC == ARM::QPR_VFP2RegisterClass ||
- SrcRC == ARM::QPR_8RegisterClass)
- SrcRC = ARM::QPRRegisterClass;
-
- // Allow QQPR / QQPR_VFP2 / QQPR_8 cross-class copies.
- if (DestRC == ARM::QQPR_VFP2RegisterClass ||
- DestRC == ARM::QQPR_8RegisterClass)
- DestRC = ARM::QQPRRegisterClass;
- if (SrcRC == ARM::QQPR_VFP2RegisterClass ||
- SrcRC == ARM::QQPR_8RegisterClass)
- SrcRC = ARM::QQPRRegisterClass;
-
- // Disallow copies of unequal sizes.
- if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
- return false;
-
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::SPRRegisterClass)
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
- .addReg(SrcReg));
- else
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
- DestReg).addReg(SrcReg)));
- } else {
- unsigned Opc;
-
- if (DestRC == ARM::SPRRegisterClass)
- Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
- else if (DestRC == ARM::DPRRegisterClass)
- Opc = ARM::VMOVD;
- else if (DestRC == ARM::DPR_VFP2RegisterClass ||
- SrcRC == ARM::DPR_VFP2RegisterClass)
- // Always use neon reg-reg move if source or dest is NEON-only regclass.
- Opc = ARM::VMOVDneon;
- else if (DestRC == ARM::QPRRegisterClass)
- Opc = ARM::VMOVQ;
- else if (DestRC == ARM::QQPRRegisterClass)
- Opc = ARM::VMOVQQ;
- else
- return false;
-
- AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
- }
-
- return true;
-}
-
-void ARMBaseInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
-
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOStore, 0,
- MFI.getObjectSize(FI),
- Align);
-
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
- RC = ARM::GPRRegisterClass;
-
- if (RC == ARM::GPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
- // FIXME: Neon instructions should support predicates
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
- .addMemOperand(MMO)
- .addReg(SrcReg, getKillRegState(isKill)));
- } else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)).
- addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addMemOperand(MMO));
- }
- } else {
- assert((RC == ARM::QQPRRegisterClass ||
- RC == ARM::QQPR_VFP2RegisterClass ||
- RC == ARM::QQPR_8RegisterClass) && "Unknown regclass!");
- llvm_unreachable("Not yet implemented!");
- }
-}
-
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
-
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOLoad, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
- .addFrameIndex(FI).addImm(128)
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
- } else {
- assert((RC == ARM::QQPRRegisterClass ||
- RC == ARM::QQPR_VFP2RegisterClass ||
- RC == ARM::QQPR_8RegisterClass) && "Unknown regclass!");
- llvm_unreachable("Not yet implemented!");
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ }
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
-MachineInstr*
-ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx, uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
- .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
- return &*MIB;
-}
-
-MachineInstr *ARMBaseInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead())
- return NULL;
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDR:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
}
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
+ break;
+ case ARM::t2LDRi12:
+ case ARM::tRestore:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
}
- } else if (Opc == ARM::VMOVS) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI)
- .addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+ break;
+ case ARM::VLD1q64Pseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
}
- }
- else if (Opc == ARM::VMOVD) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+ break;
+ case ARM::VLDMQ:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia) &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
}
+ break;
}
- return NewMI;
-}
-
-MachineInstr*
-ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- // FIXME
return 0;
}
-bool
-ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- return MI->getOperand(4).getReg() != ARM::CPSR ||
- MI->getOperand(4).isDead();
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- return true;
- } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
- return true;
- } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
- return false; // FIXME
- }
-
- return false;
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
}
/// Create a copy of a const pool value. Update CPI to the new index and return
unsigned PCLabelId = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
if (ACPV->isGlobalValue())
NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
ARMCP::CPValue, 4);
else if (ACPV->isBlockAddress())
NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
-
+ const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
default: {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
break;
}
}
-
- MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
}
MachineInstr *
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs,
+ float Probability,
+ float Confidence) const {
+ if (!NumInstrs)
+ return false;
+
+ // Use old-style heuristics
+ if (OldARMIfCvt) {
+ if (Subtarget.getCPUString() == "generic")
+ // Generic (and overly aggressive) if-conversion limits for testing.
+ return NumInstrs <= 10;
+ if (Subtarget.hasV7Ops())
+ return NumInstrs <= 3;
+ return NumInstrs <= 2;
+ }
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ float UnpredCost = Probability * NumInstrs;
+ UnpredCost += 1.0; // The branch itself
+ UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+ float PredCost = NumInstrs;
+
+ return PredCost < UnpredCost;
+
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF,
+ float Probability, float Confidence) const {
+ // Use old-style if-conversion heuristics
+ if (OldARMIfCvt) {
+ return NumT && NumF && NumT <= 2 && NumF <= 2;
+ }
+
+ if (!NumT || !NumF)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ float UnpredCost = Probability * NumT + (1.0 - Probability) * NumF;
+ UnpredCost += 1.0; // The branch itself
+ UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+ float PredCost = NumT + NumF;
+
+ return PredCost < UnpredCost;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+ int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::CMPzri:
+ case ARM::t2CMPri:
+ case ARM::t2CMPzri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
+ }
+
+ return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ case ARM::COPY: {
+ // Walk down one instruction which is potentially an 'and'.
+ const MachineInstr &Copy = *MI;
+ MachineBasicBlock::iterator AND(
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ if (AND == MI->getParent()->end()) return false;
+ MI = AND;
+ return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+ CmpMask, true);
+ }
+ }
+
+ return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register. Update the
+/// iterator *only* if a transformation took place.
+bool ARMBaseInstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI,
+ MachineBasicBlock::iterator &MII) const {
+ if (CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ MachineInstr *MI = &*DI;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ MI = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
+ B = MI->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || !MO.isDef()) continue;
+
+ // This instruction modifies CPSR before the one we want to change. We
+ // can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::ADDri:
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ case ARM::SUBri:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ MI->RemoveOperand(5);
+ MachineInstrBuilder(MI)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ MII = llvm::next(MachineBasicBlock::iterator(CmpInstr));
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
+ const InstrItineraryData *ItinData) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ break;
+ case ARM::VLDMQ:
+ case ARM::VSTMQ:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the the address is not 64-bit aligned.
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle.
+ // For VFP / NEON load / store multiple, the formula is
+ // (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMD:
+ case ARM::VLDMS:
+ case ARM::VLDMD_UPD:
+ case ARM::VLDMS_UPD:
+ case ARM::VSTMD:
+ case ARM::VSTMS:
+ case ARM::VSTMD_UPD:
+ case ARM::VSTMS_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+ case ARM::LDM_RET:
+ case ARM::LDM:
+ case ARM::LDM_UPD:
+ case ARM::STM:
+ case ARM::STM_UPD:
+ case ARM::tLDM:
+ case ARM::tLDM_UPD:
+ case ARM::tSTM_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDM_RET:
+ case ARM::t2LDM:
+ case ARM::t2LDM_UPD:
+ case ARM::t2STM:
+ case ARM::t2STM_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ return 1 + (NumRegs / 2);
+ } else if (Subtarget.isCortexA9()) {
+ UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLDMS:
+ case ARM::VLDMS_UPD:
+ isSLoad = true;
+ break;
+ }
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+ switch (UseTID.getOpcode()) {
+ default: break;
+ case ARM::VSTMS:
+ case ARM::VSTMS_UPD:
+ isSStore = true;
+ break;
+ }
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefTID.getSchedClass();
+ unsigned UseClass = UseTID.getSchedClass();
+
+ if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ bool LdmBypass = false;
+ int DefCycle = -1;
+ switch (DefTID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+ case ARM::VLDMD:
+ case ARM::VLDMS:
+ case ARM::VLDMD_UPD:
+ case ARM::VLDMS_UPD: {
+ DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+ case ARM::LDM_RET:
+ case ARM::LDM:
+ case ARM::LDM_UPD:
+ case ARM::tLDM:
+ case ARM::tLDM_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDM_RET:
+ case ARM::t2LDM:
+ case ARM::t2LDM_UPD: {
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseTID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+ case ARM::VSTMD:
+ case ARM::VSTMS:
+ case ARM::VSTMD_UPD:
+ case ARM::VSTMS_UPD: {
+ UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+ case ARM::STM:
+ case ARM::STM_UPD:
+ case ARM::tSTM_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STM:
+ case ARM::t2STM_UPD: {
+ UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx))
+ --UseCycle;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+
+ const TargetInstrDesc &DefTID = DefMI->getDesc();
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+
+ const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}