EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
+static cl::opt<bool>
+WidenVMOVS("widen-vmovs", cl::Hidden,
+ cl::desc("Widen ARM vmovs to vmovd when possible"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
unsigned MLxOpc; // MLA / MLS opcode
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
int BccOpc = !AFI->isThumbFunction()
? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+ bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
"ARM branch conditions have two components!");
if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- else
+ if (Cond.empty()) { // Unconditional branch?
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
// Two-way conditional branch.
BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ if (isThumb)
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ else
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
- if (SPRDest && SPRSrc) {
+ if (SPRDest && SPRSrc)
Opc = ARM::VMOVS;
-
- // An even S-S copy may be feeding a NEON v2f32 instruction being used for
- // f32 operations. In that case, it is better to copy the full D-regs with
- // a VMOVD since that can be converted to a NEON-domain move by
- // NEONMoveFix.cpp. Check that MI is the original COPY instruction, and
- // that it really defines the whole D-register.
- if ((DestReg - ARM::S0) % 2 == 0 && (SrcReg - ARM::S0) % 2 == 0 &&
- I != MBB.end() && I->isCopy() &&
- I->getOperand(0).getReg() == DestReg &&
- I->getOperand(1).getReg() == SrcReg) {
- // I is pointing to the ortiginal COPY instruction.
- // Find the parent D-registers.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned SrcD = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0,
- &ARM::DPRRegClass);
- unsigned DestD = TRI->getMatchingSuperReg(DestReg, ARM::ssub_0,
- &ARM::DPRRegClass);
- // Be careful to not clobber an INSERT_SUBREG that reads and redefines a
- // D-register. There must be an <imp-def> of destD, and no <imp-use>.
- if (I->definesRegister(DestD, TRI) && !I->readsRegister(DestD, TRI)) {
- Opc = ARM::VMOVD;
- SrcReg = SrcD;
- DestReg = DestD;
- if (KillSrc)
- KillSrc = I->killsRegister(SrcReg, TRI);
- }
- }
- } else if (GPRDest && SPRSrc)
+ else if (GPRDest && SPRSrc)
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
- else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVQQ;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::VORRq)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc != ARM::VMOVQQ)
- AddDefaultPred(MIB);
+ AddDefaultPred(MIB);
return;
}
- // Expand the MOVQQQQ pseudo instruction in place.
- if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
+ ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
- for (unsigned i = ARM::qsub_0, e = ARM::qsub_3 + 1; i != e; ++i) {
+ unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
+ ARM::qsub_1 : ARM::qsub_3;
+ for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
unsigned Dst = TRI->getSubReg(DestReg, i);
unsigned Src = TRI->getSubReg(SrcReg, i);
MachineInstrBuilder Mov =
.addReg(Dst, RegState::Define)
.addReg(Src, getKillRegState(KillSrc))
.addReg(Src, getKillRegState(KillSrc)));
- if (i == ARM::qsub_3) {
+ if (i == EndSubReg) {
Mov->addRegisterDefined(DestReg, TRI);
if (KillSrc)
Mov->addRegisterKilled(SrcReg, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
}
} else
llvm_unreachable("Unknown reg class!");
MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
} else
llvm_unreachable("Unknown reg class!");
break;
return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
+bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+ // This hook gets to expand COPY instructions before they become
+ // copyPhysReg() calls. Look for VMOVS instructions that can legally be
+ // widened to VMOVD. We prefer the VMOVD when possible because it may be
+ // changed into a VORR that can go down the NEON pipeline.
+ if (!WidenVMOVS || !MI->isCopy())
+ return false;
+
+ // Look for a copy between even S-registers. That is where we keep floats
+ // when using NEON v2f32 instructions for f32 arithmetic.
+ unsigned DstRegS = MI->getOperand(0).getReg();
+ unsigned SrcRegS = MI->getOperand(1).getReg();
+ if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
+ return false;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DstRegD || !SrcRegD)
+ return false;
+
+ // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
+ // legal if the COPY already defines the full DstRegD, and it isn't a
+ // sub-register insertion.
+ if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ return false;
+
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
+ DEBUG(dbgs() << "widening: " << *MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
+ MI->setDesc(get(ARM::VMOVD));
+ MI->getOperand(0).setReg(DstRegD);
+ MI->getOperand(1).setReg(SrcRegD);
+ AddDefaultPred(MachineInstrBuilder(MI));
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+}
+
MachineInstr*
ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
- NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
- ARMCP::CPValue, 4);
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
else if (ACPV->isExtSymbol())
- NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
- ACPV->getSymbol(), PCLabelId, 4);
+ NewCPV = ARMConstantPoolSymbol::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
else if (ACPV->isBlockAddress())
- NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
- ARMCP::CPBlockAddress, 4);
+ NewCPV = ARMConstantPoolConstant::
+ Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
else if (ACPV->isLSDA())
- NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
- ARMCP::CPLSDA, 4);
+ NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
+ else if (ACPV->isMachineBasicBlock())
+ NewCPV = ARMConstantPoolMBB::
+ Create(MF.getFunction()->getContext(),
+ cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
// Attempt to estimate the relative costs of predication versus branching.
unsigned TUnpredCost = Probability.getNumerator() * TCycles;
TUnpredCost /= Probability.getDenominator();
-
+
uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
unsigned FUnpredCost = Comp * FCycles;
FUnpredCost /= Probability.getDenominator();
}
+/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
+/// instruction is encoded with an 'S' bit is determined by the optional CPSR
+/// def operand.
+///
+/// This will go away once we can teach tblgen how to set the optional CPSR def
+/// operand itself.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADDSri, ARM::ADDri},
+ {ARM::ADDSrr, ARM::ADDrr},
+ {ARM::ADDSrsi, ARM::ADDrsi},
+ {ARM::ADDSrsr, ARM::ADDrsr},
+
+ {ARM::SUBSri, ARM::SUBri},
+ {ARM::SUBSrr, ARM::SUBrr},
+ {ARM::SUBSrsi, ARM::SUBrsi},
+ {ARM::SUBSrsr, ARM::SUBrsr},
+
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrsi, ARM::RSBrsi},
+ {ARM::RSBSrsr, ARM::RSBrsr},
+
+ {ARM::t2ADDSri, ARM::t2ADDri},
+ {ARM::t2ADDSrr, ARM::t2ADDrr},
+ {ARM::t2ADDSrs, ARM::t2ADDrs},
+
+ {ARM::t2SUBSri, ARM::t2SUBri},
+ {ARM::t2SUBSrr, ARM::t2SUBrr},
+ {ARM::t2SUBSrs, ARM::t2SUBrs},
+
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) {
+ if (OldOpc == OpcPair->PseudoOpc) {
+ return OpcPair->MachineOpc;
+ }
+ }
+ return 0;
+}
+
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
case ARM::STMIB_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
- case ARM::tSTMIA:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
case ARM::STMDA_UPD:
case ARM::STMDB_UPD:
case ARM::STMIB_UPD:
- case ARM::tSTMIA:
case ARM::tSTMIA_UPD:
case ARM::tPOP_RET:
case ARM::tPOP:
return false;
}
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
+ return false;
+ }
+ return true;
+}
+
bool
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
unsigned &AddSubOpc,
HasLane = Entry.HasLane;
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Execution domains.
+//===----------------------------------------------------------------------===//
+//
+// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
+// and some can go down both. The vmov instructions go down the VFP pipeline,
+// but they can be changed to vorr equivalents that are executed by the NEON
+// pipeline.
+//
+// We use the following execution domain numbering:
+//
+enum ARMExeDomain {
+ ExeGeneric = 0,
+ ExeVFP = 1,
+ ExeNEON = 2
+};
+//
+// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
+//
+std::pair<uint16_t, uint16_t>
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
+ // predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
+ // No other instructions can be swizzled, so just determine their domain.
+ unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+
+ if (Domain & ARMII::DomainNEON)
+ return std::make_pair(ExeNEON, 0);
+
+ // Certain instructions can go either way on Cortex-A8.
+ // Treat them as NEON instructions.
+ if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
+ return std::make_pair(ExeNEON, 0);
+
+ if (Domain & ARMII::DomainVFP)
+ return std::make_pair(ExeVFP, 0);
+
+ return std::make_pair(ExeGeneric, 0);
+}
+
+void
+ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+ // We only know how to change VMOVD into VORR.
+ assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
+ if (Domain != ExeNEON)
+ return;
+
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+}