X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64InstrInfo.cpp;h=f398117de953b2f2d9a55673cfbaa0ca7c11ecab;hp=1aa511dc548c34f0ff1c6d50b5e65c51cfc46eeb;hb=5666fc71f0e2ed2c0400d8bca079a1dd3f33fe53;hpb=847547086d87bca88f9a0fb17723a4ae7e1ad772 diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 1aa511dc548..f398117de95 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -14,7 +14,6 @@ #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" -#include "AArch64MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -31,7 +30,7 @@ using namespace llvm; AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - RI(this, &STI), Subtarget(STI) {} + RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -96,15 +95,10 @@ bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, SmallVectorImpl &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } + if (!isUnpredicatedTerminator(I)) return false; @@ -224,15 +218,10 @@ bool AArch64InstrInfo::ReverseBranchCondition( } unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } + if (!isUncondBranchOpcode(I->getOpcode()) && !isCondBranchOpcode(I->getOpcode())) return 0; @@ -255,14 +244,15 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { void AArch64InstrInfo::instantiateCondBranch( MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, - const SmallVectorImpl &Cond) const { + ArrayRef Cond) const { if (Cond[0].getImm() != -1) { // Regular Bcc BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); } else { // Folded compare-and-branch + // Note that we use addOperand instead of addReg to keep the flags. const MachineInstrBuilder MIB = - BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); + BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]); if (Cond.size() > 3) MIB.addImm(Cond[3].getImm()); MIB.addMBB(TBB); @@ -271,7 +261,7 @@ void AArch64InstrInfo::instantiateCondBranch( unsigned AArch64InstrInfo::InsertBranch( MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, DebugLoc DL) const { + ArrayRef Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); @@ -368,7 +358,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, } bool AArch64InstrInfo::canInsertSelect( - const MachineBasicBlock &MBB, const SmallVectorImpl &Cond, + const MachineBasicBlock &MBB, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { // Check register classes. @@ -411,7 +401,7 @@ bool AArch64InstrInfo::canInsertSelect( void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, - const SmallVectorImpl &Cond, + ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -542,6 +532,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, CC); } +/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. +static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) { + uint64_t Imm = MI->getOperand(1).getImm(); + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t Encoding; + return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); +} + // FIXME: this implementation should be micro-architecture dependent, so a // micro-architecture target hook should be introduced here in future. bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { @@ -582,6 +580,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { case AArch64::ORRWrr: case AArch64::ORRXrr: return true; + // If MOVi32imm or MOVi64imm can be expanded into ORRWri or + // ORRXri, it is as cheap as MOV + case AArch64::MOVi32imm: + return canBeExpandedToORR(MI, 32); + case AArch64::MOVi64imm: + return canBeExpandedToORR(MI, 64); } llvm_unreachable("Unknown opcode to check as cheap as a move!"); @@ -607,6 +611,40 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } } +bool +AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, + MachineInstr *MIb, + AliasAnalysis *AA) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned BaseRegA = 0, BaseRegB = 0; + int OffsetA = 0, OffsetB = 0; + int WidthA = 0, WidthB = 0; + + assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store."); + assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store."); + + if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || + MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) + return false; + + // Retrieve the base register, offset from the base register and width. Width + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If + // base registers are identical, and the offset of a lower memory access + + // the width doesn't overlap the offset of a higher memory access, + // then the memory accesses are different. + if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && + getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { + if (BaseRegA == BaseRegB) { + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; + int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowOffset + LowWidth <= HighOffset) + return true; + } + } + return false; +} + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. @@ -670,9 +708,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) { assert(MBB && "Can't get MachineBasicBlock here"); MachineFunction *MF = MBB->getParent(); assert(MF && "Can't get MachineFunction here"); - const TargetMachine *TM = &MF->getTarget(); - const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); - const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MachineRegisterInfo *MRI = &MF->getRegInfo(); for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; @@ -704,34 +741,87 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) { return true; } -/// convertFlagSettingOpcode - return opcode that does not -/// set flags when possible. The caller is responsible to do -/// the actual substitution and legality checking. -static unsigned convertFlagSettingOpcode(MachineInstr *MI) { - unsigned NewOpc; - switch (MI->getOpcode()) { - default: - return false; - case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; - case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break; - case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break; - case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break; - case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break; - case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break; - case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break; - case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break; - case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break; - case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break; - case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break; - case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break; - case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break; - case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break; - case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; - case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; - } - return NewOpc; +/// \brief Return the opcode that does not set flags when possible - otherwise +/// return the original opcode. The caller is responsible to do the actual +/// substitution and legality checking. +static unsigned convertFlagSettingOpcode(const MachineInstr *MI) { + // Don't convert all compare instructions, because for some the zero register + // encoding becomes the sp register. + bool MIDefinesZeroReg = false; + if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR)) + MIDefinesZeroReg = true; + + switch (MI->getOpcode()) { + default: + return MI->getOpcode(); + case AArch64::ADDSWrr: + return AArch64::ADDWrr; + case AArch64::ADDSWri: + return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; + case AArch64::ADDSWrs: + return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; + case AArch64::ADDSWrx: + return AArch64::ADDWrx; + case AArch64::ADDSXrr: + return AArch64::ADDXrr; + case AArch64::ADDSXri: + return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; + case AArch64::ADDSXrs: + return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; + case AArch64::ADDSXrx: + return AArch64::ADDXrx; + case AArch64::SUBSWrr: + return AArch64::SUBWrr; + case AArch64::SUBSWri: + return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; + case AArch64::SUBSWrs: + return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; + case AArch64::SUBSWrx: + return AArch64::SUBWrx; + case AArch64::SUBSXrr: + return AArch64::SUBXrr; + case AArch64::SUBSXri: + return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; + case AArch64::SUBSXrs: + return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; + case AArch64::SUBSXrx: + return AArch64::SUBXrx; + } } +/// True when condition code could be modified on the instruction +/// trace starting at from and ending at to. +static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To, + const bool CheckOnlyCCWrites, + const TargetRegisterInfo *TRI) { + // We iterate backward starting \p To until we hit \p From + MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin(); + + // Early exit if To is at the beginning of the BB. + if (I == B) + return true; + + // Check whether the definition of SrcReg is in the same basic block as + // Compare. If not, assume the condition code gets modified on some path. + if (To->getParent() != From->getParent()) + return true; + + // Check that NZCV isn't set on the trace. + for (--I; I != E; --I) { + const MachineInstr &Instr = *I; + + if (Instr.modifiesRegister(AArch64::NZCV, TRI) || + (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI))) + // This instruction modifies or uses NZCV after the one we want to + // change. + return true; + if (I == B) + // We currently don't allow the instruction trace to cross basic + // block boundaries + return true; + } + return false; +} /// optimizeCompareInstr - Convert the instruction supplying the argument to the /// comparison into one that sets the zero bit in the flags register. bool AArch64InstrInfo::optimizeCompareInstr( @@ -741,6 +831,11 @@ bool AArch64InstrInfo::optimizeCompareInstr( // Replace SUBSWrr with SUBWrr if NZCV is not used. int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); if (Cmp_NZCV != -1) { + if (CmpInstr->definesRegister(AArch64::WZR) || + CmpInstr->definesRegister(AArch64::XZR)) { + CmpInstr->eraseFromParent(); + return true; + } unsigned Opc = CmpInstr->getOpcode(); unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); if (NewOpc == Opc) @@ -770,36 +865,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MI) return false; - // We iterate backward, starting from the instruction before CmpInstr and - // stop when reaching the definition of the source register or done with the - // basic block, to check whether NZCV is used or modified in between. - MachineBasicBlock::iterator I = CmpInstr, E = MI, - B = CmpInstr->getParent()->begin(); - - // Early exit if CmpInstr is at the beginning of the BB. - if (I == B) - return false; - - // Check whether the definition of SrcReg is in the same basic block as - // Compare. If not, we can't optimize away the Compare. - if (MI->getParent() != CmpInstr->getParent()) - return false; - - // Check that NZCV isn't set between the comparison instruction and the one we - // want to change. + bool CheckOnlyCCWrites = false; const TargetRegisterInfo *TRI = &getRegisterInfo(); - for (--I; I != E; --I) { - const MachineInstr &Instr = *I; - - if (Instr.modifiesRegister(AArch64::NZCV, TRI) || - Instr.readsRegister(AArch64::NZCV, TRI)) - // This instruction modifies or uses NZCV after the one we want to - // change. We can't do this transformation. - return false; - if (I == B) - // The 'and' is below the comparison instruction. - return false; - } + if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI)) + return false; unsigned NewOpc = MI->getOpcode(); switch (MI->getOpcode()) { @@ -1244,9 +1313,9 @@ void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { } bool -AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, - unsigned &Offset, - const TargetRegisterInfo *TRI) const { +AArch64InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, + unsigned &Offset, + const TargetRegisterInfo *TRI) const { switch (LdSt->getOpcode()) { default: return false; @@ -1270,9 +1339,97 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, }; } +bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( + MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width, + const TargetRegisterInfo *TRI) const { + // Handle only loads/stores with base register followed by immediate offset. + if (LdSt->getNumOperands() != 3) + return false; + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + return false; + + // Offset is calculated as the immediate operand multiplied by the scaling factor. + // Unscaled instructions have scaling factor set to 1. + int Scale = 0; + switch (LdSt->getOpcode()) { + default: + return false; + case AArch64::LDURQi: + case AArch64::STURQi: + Width = 16; + Scale = 1; + break; + case AArch64::LDURXi: + case AArch64::LDURDi: + case AArch64::STURXi: + case AArch64::STURDi: + Width = 8; + Scale = 1; + break; + case AArch64::LDURWi: + case AArch64::LDURSi: + case AArch64::LDURSWi: + case AArch64::STURWi: + case AArch64::STURSi: + Width = 4; + Scale = 1; + break; + case AArch64::LDURHi: + case AArch64::LDURHHi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::STURHi: + case AArch64::STURHHi: + Width = 2; + Scale = 1; + break; + case AArch64::LDURBi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::STURBi: + case AArch64::STURBBi: + Width = 1; + Scale = 1; + break; + case AArch64::LDRQui: + case AArch64::STRQui: + Scale = Width = 16; + break; + case AArch64::LDRXui: + case AArch64::LDRDui: + case AArch64::STRXui: + case AArch64::STRDui: + Scale = Width = 8; + break; + case AArch64::LDRWui: + case AArch64::LDRSui: + case AArch64::STRWui: + case AArch64::STRSui: + Scale = Width = 4; + break; + case AArch64::LDRHui: + case AArch64::LDRHHui: + case AArch64::STRHui: + case AArch64::STRHHui: + Scale = Width = 2; + break; + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::STRBui: + case AArch64::STRBBui: + Scale = Width = 1; + break; + }; + + BaseReg = LdSt->getOperand(1).getReg(); + Offset = LdSt->getOperand(2).getImm() * Scale; + return true; +} + /// Detect opportunities for ldp/stp formation. /// -/// Only called for LdSt for which getLdStBaseRegImmOfs returns true. +/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true. bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, unsigned NumLoads) const { @@ -1281,7 +1438,7 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, return false; if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) return false; - // getLdStBaseRegImmOfs guarantees that oper 2 isImm. + // getMemOpBaseRegImmOfs guarantees that oper 2 isImm. unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); // Allow 6 bits of positive range. if (Ofs1 > 64) @@ -1293,35 +1450,54 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, MachineInstr *Second) const { - // Cyclone can fuse CMN, CMP followed by Bcc. - - // FIXME: B0 can also fuse: - // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. - if (Second->getOpcode() != AArch64::Bcc) - return false; - switch (First->getOpcode()) { - default: - return false; - case AArch64::SUBSWri: - case AArch64::ADDSWri: - case AArch64::ANDSWri: - case AArch64::SUBSXri: - case AArch64::ADDSXri: - case AArch64::ANDSXri: - return true; + if (Subtarget.isCyclone()) { + // Cyclone can fuse CMN, CMP, TST followed by Bcc. + unsigned SecondOpcode = Second->getOpcode(); + if (SecondOpcode == AArch64::Bcc) { + switch (First->getOpcode()) { + default: + return false; + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::ANDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: + case AArch64::ANDSXri: + return true; + } + } + // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ. + if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || + SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { + switch (First->getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::ANDWri: + case AArch64::ANDXri: + case AArch64::EORWri: + case AArch64::EORXri: + case AArch64::ORRWri: + case AArch64::ORRXri: + case AArch64::SUBWri: + case AArch64::SUBXri: + return true; + } + } } + return false; } -MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { +MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( + MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, + const MDNode *Expr, DebugLoc DL) const { MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) .addFrameIndex(FrameIx) .addImm(0) .addImm(Offset) - .addMetadata(MDPtr); + .addMetadata(Var) + .addMetadata(Expr); return &*MIB; } @@ -1363,7 +1539,7 @@ void AArch64InstrInfo::copyPhysRegTuple( } for (; SubReg != End; SubReg += Incr) { - const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); + const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); @@ -1663,7 +1839,7 @@ void AArch64InstrInfo::storeRegToStackSlot( MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); unsigned Opc = 0; @@ -1741,7 +1917,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI); @@ -1760,7 +1936,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); @@ -1839,7 +2015,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(DestReg, getDefRegState(true)) .addFrameIndex(FI); if (Offset) @@ -1905,10 +2081,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, .setMIFlag(Flag); } -MachineInstr * -AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const { +MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0 = COPY %SP; GPR64all:%vreg0 @@ -2076,11 +2251,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::LDPDi: case AArch64::STPXi: case AArch64::STPDi: + case AArch64::LDNPXi: + case AArch64::LDNPDi: + case AArch64::STNPXi: + case AArch64::STNPDi: + ImmIdx = 3; IsSigned = true; Scale = 8; break; case AArch64::LDPQi: case AArch64::STPQi: + case AArch64::LDNPQi: + case AArch64::STNPQi: + ImmIdx = 3; IsSigned = true; Scale = 16; break; @@ -2088,6 +2271,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::LDPSi: case AArch64::STPWi: case AArch64::STPSi: + case AArch64::LDNPWi: + case AArch64::LDNPSi: + case AArch64::STNPWi: + case AArch64::STNPSi: + ImmIdx = 3; IsSigned = true; Scale = 4; break; @@ -2203,10 +2391,10 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); - NopInst.addOperand(MCOperand::CreateImm(0)); + NopInst.addOperand(MCOperand::createImm(0)); } /// useMachineCombiner - return true when a target supports MachineCombiner -bool AArch64InstrInfo::useMachineCombiner(void) const { +bool AArch64InstrInfo::useMachineCombiner() const { // AArch64 supports the combiner return true; } @@ -2299,15 +2487,36 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, return true; } -/// hasPattern - return true when there is potentially a faster code sequence -/// for an instruction chain ending in \p Root. All potential patterns are -/// listed -/// in the \p Pattern vector. Pattern should be sorted in priority order since -/// the pattern evaluator stops checking as soon as it finds a faster sequence. +// TODO: There are many more machine instruction opcodes to match: +// 1. Other data types (integer, vectors) +// 2. Other math / logic operations (xor, or) +// 3. Other forms of the same operation (intrinsics and other variants) +bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { + case AArch64::FADDDrr: + case AArch64::FADDSrr: + case AArch64::FADDv2f32: + case AArch64::FADDv2f64: + case AArch64::FADDv4f32: + case AArch64::FMULDrr: + case AArch64::FMULSrr: + case AArch64::FMULX32: + case AArch64::FMULX64: + case AArch64::FMULXv2f32: + case AArch64::FMULXv2f64: + case AArch64::FMULXv4f32: + case AArch64::FMULv2f32: + case AArch64::FMULv2f64: + case AArch64::FMULv4f32: + return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; + default: + return false; + } +} -bool AArch64InstrInfo::hasPattern( - MachineInstr &Root, - SmallVectorImpl &Pattern) const { +/// Find instructions that can be turned into madd. +static bool getMaddPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns) { unsigned Opc = Root.getOpcode(); MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; @@ -2335,76 +2544,76 @@ bool AArch64InstrInfo::hasPattern( "ADDWrr does not have register operands"); if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, AArch64::WZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2); + Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); Found = true; } break; case AArch64::ADDXrr: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, AArch64::XZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2); + Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); Found = true; } break; case AArch64::SUBWrr: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, AArch64::WZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2); + Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); Found = true; } break; case AArch64::SUBXrr: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, AArch64::XZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2); + Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); Found = true; } break; case AArch64::ADDWri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); Found = true; } break; case AArch64::ADDXri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); Found = true; } break; case AArch64::SUBWri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); Found = true; } break; case AArch64::SUBXri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); Found = true; } break; @@ -2412,6 +2621,20 @@ bool AArch64InstrInfo::hasPattern( return Found; } +/// Return true when there is potentially a faster code sequence for an +/// instruction chain ending in \p Root. All potential patterns are listed in +/// the \p Pattern vector. Pattern should be sorted in priority order since the +/// pattern evaluator stops checking as soon as it finds a faster sequence. + +bool AArch64InstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) const { + if (getMaddPatterns(Root, Patterns)) + return true; + + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); +} + /// genMadd - Generate madd instruction and combine mul and add. /// Example: /// MUL I=A,B,0 @@ -2507,33 +2730,35 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, return MUL; } -/// genAlternativeCodeSequence - when hasPattern() finds a pattern +/// When getMachineCombinerPatterns() finds potential patterns, /// this function generates the instructions that could replace the /// original code sequence void AArch64InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { MachineBasicBlock &MBB = *Root.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo *TII = MF.getTarget().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineInstr *MUL; const TargetRegisterClass *RC; unsigned Opc; switch (Pattern) { default: - // signal error. - break; - case MachineCombinerPattern::MC_MULADDW_OP1: - case MachineCombinerPattern::MC_MULADDX_OP1: + // Reassociate instructions. + TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, + DelInstrs, InstrIdxForVirtReg); + return; + case MachineCombinerPattern::MULADDW_OP1: + case MachineCombinerPattern::MULADDX_OP1: // MUL I=A,B,0 // ADD R,I,C // ==> MADD R,A,B,C // --- Create(MADD); - if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) { + if (Pattern == MachineCombinerPattern::MULADDW_OP1) { Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -2542,13 +2767,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MC_MULADDW_OP2: - case MachineCombinerPattern::MC_MULADDX_OP2: + case MachineCombinerPattern::MULADDW_OP2: + case MachineCombinerPattern::MULADDX_OP2: // MUL I=A,B,0 // ADD R,C,I // ==> MADD R,A,B,C // --- Create(MADD); - if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) { + if (Pattern == MachineCombinerPattern::MULADDW_OP2) { Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -2557,8 +2782,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MC_MULADDWI_OP1: - case MachineCombinerPattern::MC_MULADDXI_OP1: { + case MachineCombinerPattern::MULADDWI_OP1: + case MachineCombinerPattern::MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm // ==> ORR V, ZR, Imm @@ -2566,7 +2791,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { + if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; @@ -2601,8 +2826,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::MC_MULSUBW_OP1: - case MachineCombinerPattern::MC_MULSUBX_OP1: { + case MachineCombinerPattern::MULSUBW_OP1: + case MachineCombinerPattern::MULSUBX_OP1: { // MUL I=A,B,0 // SUB R,I, C // ==> SUB V, 0, C @@ -2610,7 +2835,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *SubRC; unsigned SubOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) { + if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { SubOpc = AArch64::SUBWrr; SubRC = &AArch64::GPR32spRegClass; ZeroReg = AArch64::WZR; @@ -2634,13 +2859,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); break; } - case MachineCombinerPattern::MC_MULSUBW_OP2: - case MachineCombinerPattern::MC_MULSUBX_OP2: + case MachineCombinerPattern::MULSUBW_OP2: + case MachineCombinerPattern::MULSUBX_OP2: // MUL I=A,B,0 // SUB R,C,I // ==> MSUB R,A,B,C (computes C - A*B) // --- Create(MSUB); - if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) { + if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { Opc = AArch64::MSUBWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -2649,8 +2874,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MC_MULSUBWI_OP1: - case MachineCombinerPattern::MC_MULSUBXI_OP1: { + case MachineCombinerPattern::MULSUBWI_OP1: + case MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 // SUB R,I, Imm // ==> ORR V, ZR, -Imm @@ -2658,7 +2883,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) { + if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; @@ -2667,7 +2892,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( RC = &AArch64::GPR32RegClass; } else { OrrOpc = AArch64::ORRXri; - OrrRC = &AArch64::GPR64RegClass; + OrrRC = &AArch64::GPR64spRegClass; BitSize = 64; ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; @@ -2699,3 +2924,129 @@ void AArch64InstrInfo::genAlternativeCodeSequence( return; } + +/// \brief Replace csincr-branch sequence by simple conditional branch +/// +/// Examples: +/// 1. +/// csinc w9, wzr, wzr, +/// tbnz w9, #0, 0x44 +/// to +/// b. +/// +/// 2. +/// csinc w9, wzr, wzr, +/// tbz w9, #0, 0x44 +/// to +/// b. +/// +/// \param MI Conditional Branch +/// \return True when the simple conditional branch is generated +/// +bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const { + bool IsNegativeBranch = false; + bool IsTestAndBranch = false; + unsigned TargetBBInMI = 0; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unknown branch instruction?"); + case AArch64::Bcc: + return false; + case AArch64::CBZW: + case AArch64::CBZX: + TargetBBInMI = 1; + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + TargetBBInMI = 1; + IsNegativeBranch = true; + break; + case AArch64::TBZW: + case AArch64::TBZX: + TargetBBInMI = 2; + IsTestAndBranch = true; + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + TargetBBInMI = 2; + IsNegativeBranch = true; + IsTestAndBranch = true; + break; + } + // So we increment a zero register and test for bits other + // than bit 0? Conservatively bail out in case the verifier + // missed this case. + if (IsTestAndBranch && MI->getOperand(1).getImm()) + return false; + + // Find Definition. + assert(MI->getParent() && "Incomplete machine instruciton\n"); + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + unsigned VReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(VReg); + + // Look for CSINC + if (!(DefMI->getOpcode() == AArch64::CSINCWr && + DefMI->getOperand(1).getReg() == AArch64::WZR && + DefMI->getOperand(2).getReg() == AArch64::WZR) && + !(DefMI->getOpcode() == AArch64::CSINCXr && + DefMI->getOperand(1).getReg() == AArch64::XZR && + DefMI->getOperand(2).getReg() == AArch64::XZR)) + return false; + + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) + return false; + + AArch64CC::CondCode CC = + (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); + bool CheckOnlyCCWrites = true; + // Convert only when the condition code is not modified between + // the CSINC and the branch. The CC may be used by other + // instructions in between. + if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo())) + return false; + MachineBasicBlock &RefToMBB = *MBB; + MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB(); + DebugLoc DL = MI->getDebugLoc(); + if (IsNegativeBranch) + CC = AArch64CC::getInvertedCondCode(CC); + BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); + MI->eraseFromParent(); + return true; +} + +std::pair +AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + const unsigned Mask = AArch64II::MO_FRAGMENT; + return std::make_pair(TF & Mask, TF & ~Mask); +} + +ArrayRef> +AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace AArch64II; + static const std::pair TargetFlags[] = { + {MO_PAGE, "aarch64-page"}, + {MO_PAGEOFF, "aarch64-pageoff"}, + {MO_G3, "aarch64-g3"}, + {MO_G2, "aarch64-g2"}, + {MO_G1, "aarch64-g1"}, + {MO_G0, "aarch64-g0"}, + {MO_HI12, "aarch64-hi12"}}; + return makeArrayRef(TargetFlags); +} + +ArrayRef> +AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace AArch64II; + static const std::pair TargetFlags[] = { + {MO_GOT, "aarch64-got"}, + {MO_NC, "aarch64-nc"}, + {MO_TLS, "aarch64-tls"}, + {MO_CONSTPOOL, "aarch64-constant-pool"}}; + return makeArrayRef(TargetFlags); +}