"aarch64-unscaled-mem-op", cl::Hidden,
cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
+namespace llvm {
+void initializeAArch64LoadStoreOptPass(PassRegistry &);
+}
+
+#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
+
namespace {
typedef struct LdStPairFlags {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
- AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
+ AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- LdStPairFlags const &Flags);
+ const LdStPairFlags &Flags);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "AArch64 load / store optimization pass";
+ return AARCH64_LOAD_STORE_OPT_NAME;
}
-
-private:
- int getMemSize(MachineInstr *MemMI);
};
char AArch64LoadStoreOpt::ID = 0;
} // namespace
-static bool isUnscaledLdst(unsigned Opc) {
+INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
+ AARCH64_LOAD_STORE_OPT_NAME, false, false)
+
+static bool isUnscaledLdSt(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64::STURSi:
- return true;
case AArch64::STURDi:
- return true;
case AArch64::STURQi:
- return true;
case AArch64::STURWi:
- return true;
case AArch64::STURXi:
- return true;
case AArch64::LDURSi:
- return true;
case AArch64::LDURDi:
- return true;
case AArch64::LDURQi:
- return true;
case AArch64::LDURWi:
- return true;
case AArch64::LDURXi:
- return true;
case AArch64::LDURSWi:
return true;
}
}
+static bool isUnscaledLdSt(MachineInstr *MI) {
+ return isUnscaledLdSt(MI->getOpcode());
+}
+
// Size in bytes of the data moved by an unscaled load or store
-int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
- switch (MemMI->getOpcode()) {
+static int getMemSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
default:
llvm_unreachable("Opcode has unknown size!");
case AArch64::STRSui:
}
}
+static const MachineOperand &getLdStRegOp(const MachineInstr *MI) {
+ return MI->getOperand(0);
+}
+
+static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
+ return MI->getOperand(1);
+}
+
+static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
+ return MI->getOperand(2);
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
- bool IsUnscaled = isUnscaledLdst(Opc);
+ bool IsUnscaled = isUnscaledLdSt(Opc);
int OffsetStride =
IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
- MachineOperand &BaseRegOp =
- MergeForward ? Paired->getOperand(1) : I->getOperand(1);
+ const MachineOperand &BaseRegOp =
+ MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
- if (I->getOperand(2).getImm() ==
- Paired->getOperand(2).getImm() + OffsetStride) {
+ if (getLdStOffsetOp(I).getImm() ==
+ getLdStOffsetOp(Paired).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
// Here we swapped the assumption made for SExtIdx.
Rt2MI = Paired;
}
// Handle Unscaled
- int OffsetImm = RtMI->getOperand(2).getImm();
+ int OffsetImm = getLdStOffsetOp(RtMI).getImm();
if (IsUnscaled && EnableAArch64UnscaledMemOp)
OffsetImm /= OffsetStride;
// Construct the new instruction.
MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(RtMI->getOperand(0))
- .addOperand(Rt2MI->getOperand(0))
+ .addOperand(getLdStRegOp(RtMI))
+ .addOperand(getLdStRegOp(Rt2MI))
.addOperand(BaseRegOp)
.addImm(OffsetImm);
(void)MIB;
/// trackRegDefsUses - Remember what registers the specified instruction uses
/// and modifies.
-static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
+static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
BitVector &UsedRegs,
const TargetRegisterInfo *TRI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
ModifiedRegs.setBitsNotInMask(MO.getRegMask());
}
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
- if (!IsUnscaled && (Offset > 63 || Offset < -64))
- return false;
- if (IsUnscaled) {
- // Convert the byte-offset used by unscaled into an "element" offset used
- // by the scaled pair load/store instructions.
- int ElemOffset = Offset / OffsetStride;
- if (ElemOffset > 63 || ElemOffset < -64)
- return false;
- }
- return true;
+ // Convert the byte-offset used by unscaled into an "element" offset used
+ // by the scaled pair load/store instructions.
+ if (IsUnscaled)
+ Offset /= OffsetStride;
+
+ return Offset <= 63 && Offset >= -64;
}
// Do alignment, specialized to power of 2 and for signed ints,
unsigned Opc = FirstMI->getOpcode();
bool MayLoad = FirstMI->mayLoad();
- bool IsUnscaled = isUnscaledLdst(Opc);
- unsigned Reg = FirstMI->getOperand(0).getReg();
- unsigned BaseReg = FirstMI->getOperand(1).getReg();
- int Offset = FirstMI->getOperand(2).getImm();
+ bool IsUnscaled = isUnscaledLdSt(FirstMI);
+ unsigned Reg = getLdStRegOp(FirstMI).getReg();
+ unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+ int Offset = getLdStOffsetOp(FirstMI).getImm();
// Early exit if the first instruction modifies the base register.
// e.g., ldr x0, [x0]
- // Early exit if the offset if not possible to match. (6 bits of positive
- // range, plus allow an extra one in case we find a later insn that matches
- // with Offset-1
if (FirstMI->modifiesRegister(BaseReg, TRI))
return E;
+
+ // Early exit if the offset if not possible to match. (6 bits of positive
+ // range, plus allow an extra one in case we find a later insn that matches
+ // with Offset-1)
int OffsetStride =
IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
if (!CanMergeOpc) {
bool IsValidLdStrOpc;
unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
- if (!IsValidLdStrOpc)
- continue;
+ assert(IsValidLdStrOpc &&
+ "Given Opc should be a Load or Store with an immediate");
// Opc will be the first instruction in the pair.
Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
}
- if (CanMergeOpc && MI->getOperand(2).isImm()) {
+ if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
+ assert(MI->mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
// Pairwise instructions have a 7-bit signed offset field. Single insns
// have a 12-bit unsigned offset field. To be a valid combine, the
// final offset must be in range.
- unsigned MIBaseReg = MI->getOperand(1).getReg();
- int MIOffset = MI->getOperand(2).getImm();
+ unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
+ int MIOffset = getLdStOffsetOp(MI).getImm();
if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
(Offset + OffsetStride == MIOffset))) {
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
return E;
// If the resultant immediate offset of merging these instructions
// is out of range for a pairwise instruction, bail and keep looking.
- bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
+ bool MIIsUnscaled = isUnscaledLdSt(MI);
if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
// If the alignment requirements of the paired (scaled) instruction
if (IsUnscaled && EnableAArch64UnscaledMemOp &&
(alignTo(MinOffset, OffsetStride) != MinOffset)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- if (MayLoad && Reg == MI->getOperand(0).getReg()) {
+ if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
// first.
- if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
- !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
+ if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
+ !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
!mayAlias(MI, MemInsns, TII)) {
Flags.setMergeForward(false);
return MBBI;
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
- !(FirstMI->mayLoad() &&
- UsedRegs[FirstMI->getOperand(0).getReg()]) &&
+ if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
+ !(FirstMI->mayLoad() && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
!mayAlias(FirstMI, MemInsns, TII)) {
Flags.setMergeForward(true);
return MBBI;
unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(Update->getOperand(0))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
+ .addOperand(getLdStRegOp(Update))
+ .addOperand(getLdStRegOp(I))
+ .addOperand(getLdStBaseOp(I))
.addImm(Value);
(void)MIB;
unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(Update->getOperand(0))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
+ .addOperand(getLdStRegOp(Update))
+ .addOperand(getLdStRegOp(I))
+ .addOperand(getLdStBaseOp(I))
.addImm(Value);
(void)MIB;
break;
// If the instruction has the base register as source and dest and the
// immediate will fit in a signed 9-bit integer, then we have a match.
- if (MI->getOperand(0).getReg() == BaseReg &&
- MI->getOperand(1).getReg() == BaseReg &&
- MI->getOperand(2).getImm() <= 255 &&
- MI->getOperand(2).getImm() >= -256) {
+ if (getLdStRegOp(MI).getReg() == BaseReg &&
+ getLdStBaseOp(MI).getReg() == BaseReg &&
+ getLdStOffsetOp(MI).getImm() <= 255 &&
+ getLdStOffsetOp(MI).getImm() >= -256) {
// If we have a non-zero Offset, we check that it matches the amount
// we're adding to the register.
if (!Offset || Offset == MI->getOperand(2).getImm())
MachineBasicBlock::iterator MBBI = I;
const MachineFunction &MF = *MemMI->getParent()->getParent();
- unsigned DestReg = MemMI->getOperand(0).getReg();
- unsigned BaseReg = MemMI->getOperand(1).getReg();
- int Offset = MemMI->getOperand(2).getImm() *
+ unsigned DestReg = getLdStRegOp(MemMI).getReg();
+ unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ int Offset = getLdStOffsetOp(MemMI).getImm() *
TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
// If the base register overlaps the destination register, we can't
MachineBasicBlock::iterator MBBI = I;
const MachineFunction &MF = *MemMI->getParent()->getParent();
- unsigned DestReg = MemMI->getOperand(0).getReg();
- unsigned BaseReg = MemMI->getOperand(1).getReg();
- int Offset = MemMI->getOperand(2).getImm();
+ unsigned DestReg = getLdStRegOp(MemMI).getReg();
+ unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ int Offset = getLdStOffsetOp(MemMI).getImm();
unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
// If the load/store is the first instruction in the block, there's obviously
break;
}
// Make sure this is a reg+imm (as opposed to an address reloc).
- if (!MI->getOperand(2).isImm()) {
+ if (!getLdStOffsetOp(MI).isImm()) {
++MBBI;
break;
}
MachineBasicBlock::iterator Paired =
findMatchingInsn(MBBI, Flags, ScanLimit);
if (Paired != E) {
+ ++NumPairCreated;
+ if (isUnscaledLdSt(MI))
+ ++NumUnscaledPairCreated;
+
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
MBBI = mergePairedInsns(MBBI, Paired, Flags);
-
Modified = true;
- ++NumPairCreated;
- if (isUnscaledLdst(MI->getOpcode()))
- ++NumUnscaledPairCreated;
break;
}
++MBBI;
case AArch64::LDURWi:
case AArch64::LDURXi: {
// Make sure this is a reg+imm (as opposed to an address reloc).
- if (!MI->getOperand(2).isImm()) {
+ if (!getLdStOffsetOp(MI).isImm()) {
++MBBI;
break;
}
}
// Don't know how to handle pre/post-index versions, so move to the next
// instruction.
- if (isUnscaledLdst(Opc)) {
+ if (isUnscaledLdSt(Opc)) {
++MBBI;
break;
}
// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
// loads and stores near one another?
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// createAArch64LoadStoreOptimizationPass - returns an instance of the
+/// load / store optimization pass.
FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
return new AArch64LoadStoreOpt();
}