struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
- AArch64LoadStoreOpt() : MachineFunctionPass(ID), IsStrictAlign(false) {
+ AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
}
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
- bool IsStrictAlign;
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Find and merge foldable ldr/str instructions.
bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
- bool optimizeBlock(MachineBasicBlock &MBB);
+ // Check if converting two narrow loads into a single wider load with
+ // bitfield extracts could be enabled.
+ bool enableNarrowLdMerge(MachineFunction &Fn);
+
+ bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
return false;
}
-bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
+ bool enableNarrowLdOpt) {
bool Modified = false;
// Three tranformations to do here:
// 1) Find halfword loads that can be merged into a single 32-bit word load
// ldr x0, [x2], #4
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- !IsStrictAlign && MBBI != E;) {
+ enableNarrowLdOpt && MBBI != E;) {
MachineInstr *MI = MBBI;
switch (MI->getOpcode()) {
default:
return Modified;
}
+bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
+ const AArch64Subtarget *SubTarget =
+ &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ bool ProfitableArch = SubTarget->isCortexA57();
+ // FIXME: The benefit from converting narrow loads into a wider load could be
+ // microarchitectural as it assumes that a single load with two bitfield
+ // extracts is cheaper than two narrow loads. Currently, this conversion is
+ // enabled only in cortex-a57 on which performance benefits were verified.
+ return ProfitableArch & (!SubTarget->requiresStrictAlign());
+}
+
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
TRI = Fn.getSubtarget().getRegisterInfo();
- IsStrictAlign = (static_cast<const AArch64Subtarget &>(Fn.getSubtarget()))
- .requiresStrictAlign();
bool Modified = false;
+ bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB);
+ Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
return Modified;
}