X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64InstrInfo.cpp;h=c8592bd1c8834353d14bb726892d4e3b2b95ae50;hp=e9fecbf81d7fac8a57d621794fca5a6b53c18a69;hb=c6d2db4db1c35b70c3d955e8bf372cdf0cd2cefd;hpb=e4fa341dde3c9521b7f11bd53ecdcbeb3f8fcbda diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index e9fecbf81d7..c8592bd1c88 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } } +bool +AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, + MachineInstr *MIb, + AliasAnalysis *AA) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned BaseRegA = 0, BaseRegB = 0; + int OffsetA = 0, OffsetB = 0; + int WidthA = 0, WidthB = 0; + + assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && + "MIa must be a store or a load"); + assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && + "MIb must be a store or a load"); + + if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || + MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) + return false; + + // Retrieve the base register, offset from the base register and width. Width + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If + // base registers are identical, and the offset of a lower memory access + + // the width doesn't overlap the offset of a higher memory access, + // then the memory accesses are different. + if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && + getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { + if (BaseRegA == BaseRegB) { + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; + int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowOffset + LowWidth <= HighOffset) + return true; + } + } + return false; +} + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. /// Return true if the comparison instruction can be analyzed. @@ -641,7 +677,8 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, SrcReg = MI->getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - CmpValue = MI->getOperand(2).getImm(); + // FIXME: In order to convert CmpValue to 0 or 1 + CmpValue = (MI->getOperand(2).getImm() != 0); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -650,9 +687,14 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, SrcReg = MI->getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - CmpValue = AArch64_AM::decodeLogicalImmediate( - MI->getOperand(2).getImm(), - MI->getOpcode() == AArch64::ANDSWri ? 32 : 64); + // FIXME:The return val type of decodeLogicalImmediate is uint64_t, + // while the type of CmpValue is int. When converting uint64_t to int, + // the high 32 bits of uint64_t will be lost. + // In fact it causes a bug in spec2006-483.xalancbmk + // CmpValue is only used to compare with zero in OptimizeCompareInstr + CmpValue = (AArch64_AM::decodeLogicalImmediate( + MI->getOperand(2).getImm(), + MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0); return true; } @@ -726,6 +768,39 @@ static unsigned convertFlagSettingOpcode(MachineInstr *MI) { return NewOpc; } +/// True when condition code could be modified on the instruction +/// trace starting at from and ending at to. +static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To, + const bool CheckOnlyCCWrites, + const TargetRegisterInfo *TRI) { + // We iterate backward starting \p To until we hit \p From + MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin(); + + // Early exit if To is at the beginning of the BB. + if (I == B) + return true; + + // Check whether the definition of SrcReg is in the same basic block as + // Compare. If not, assume the condition code gets modified on some path. + if (To->getParent() != From->getParent()) + return true; + + // Check that NZCV isn't set on the trace. + for (--I; I != E; --I) { + const MachineInstr &Instr = *I; + + if (Instr.modifiesRegister(AArch64::NZCV, TRI) || + (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI))) + // This instruction modifies or uses NZCV after the one we want to + // change. + return true; + if (I == B) + // We currently don't allow the instruction trace to cross basic + // block boundaries + return true; + } + return false; +} /// optimizeCompareInstr - Convert the instruction supplying the argument to the /// comparison into one that sets the zero bit in the flags register. bool AArch64InstrInfo::optimizeCompareInstr( @@ -749,6 +824,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( } // Continue only if we have a "ri" where immediate is zero. + // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare + // function. + assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); if (CmpValue != 0 || SrcReg2 != 0) return false; @@ -761,36 +839,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MI) return false; - // We iterate backward, starting from the instruction before CmpInstr and - // stop when reaching the definition of the source register or done with the - // basic block, to check whether NZCV is used or modified in between. - MachineBasicBlock::iterator I = CmpInstr, E = MI, - B = CmpInstr->getParent()->begin(); - - // Early exit if CmpInstr is at the beginning of the BB. - if (I == B) - return false; - - // Check whether the definition of SrcReg is in the same basic block as - // Compare. If not, we can't optimize away the Compare. - if (MI->getParent() != CmpInstr->getParent()) - return false; - - // Check that NZCV isn't set between the comparison instruction and the one we - // want to change. + bool CheckOnlyCCWrites = false; const TargetRegisterInfo *TRI = &getRegisterInfo(); - for (--I; I != E; --I) { - const MachineInstr &Instr = *I; - - if (Instr.modifiesRegister(AArch64::NZCV, TRI) || - Instr.readsRegister(AArch64::NZCV, TRI)) - // This instruction modifies or uses NZCV after the one we want to - // change. We can't do this transformation. - return false; - if (I == B) - // The 'and' is below the comparison instruction. - return false; - } + if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI)) + return false; unsigned NewOpc = MI->getOpcode(); switch (MI->getOpcode()) { @@ -1261,6 +1313,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, }; } +bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth( + MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width, + const TargetRegisterInfo *TRI) const { + // Handle only loads/stores with base register followed by immediate offset. + if (LdSt->getNumOperands() != 3) + return false; + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + return false; + + // Offset is calculated as the immediate operand multiplied by the scaling factor. + // Unscaled instructions have scaling factor set to 1. + int Scale = 0; + switch (LdSt->getOpcode()) { + default: + return false; + case AArch64::LDURQi: + case AArch64::STURQi: + Width = 16; + Scale = 1; + break; + case AArch64::LDURXi: + case AArch64::LDURDi: + case AArch64::STURXi: + case AArch64::STURDi: + Width = 8; + Scale = 1; + break; + case AArch64::LDURWi: + case AArch64::LDURSi: + case AArch64::LDURSWi: + case AArch64::STURWi: + case AArch64::STURSi: + Width = 4; + Scale = 1; + break; + case AArch64::LDURHi: + case AArch64::LDURHHi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::STURHi: + case AArch64::STURHHi: + Width = 2; + Scale = 1; + break; + case AArch64::LDURBi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::STURBi: + case AArch64::STURBBi: + Width = 1; + Scale = 1; + break; + case AArch64::LDRXui: + case AArch64::STRXui: + Scale = Width = 8; + break; + case AArch64::LDRWui: + case AArch64::STRWui: + Scale = Width = 4; + break; + case AArch64::LDRBui: + case AArch64::STRBui: + Scale = Width = 1; + break; + case AArch64::LDRHui: + case AArch64::STRHui: + Scale = Width = 2; + break; + case AArch64::LDRSui: + case AArch64::STRSui: + Scale = Width = 4; + break; + case AArch64::LDRDui: + case AArch64::STRDui: + Scale = Width = 8; + break; + case AArch64::LDRQui: + case AArch64::STRQui: + Scale = Width = 16; + break; + case AArch64::LDRBBui: + case AArch64::STRBBui: + Scale = Width = 1; + break; + case AArch64::LDRHHui: + case AArch64::STRHHui: + Scale = Width = 2; + break; + }; + + BaseReg = LdSt->getOperand(1).getReg(); + Offset = LdSt->getOperand(2).getImm() * Scale; + return true; +} + /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. @@ -1303,16 +1451,15 @@ bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, } } -MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { +MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( + MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, + const MDNode *Expr, DebugLoc DL) const { MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) .addFrameIndex(FrameIx) .addImm(0) .addImm(Offset) - .addMetadata(MDPtr); + .addMetadata(Var) + .addMetadata(Expr); return &*MIB; } @@ -2197,7 +2344,7 @@ void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.addOperand(MCOperand::CreateImm(0)); } /// useMachineCombiner - return true when a target supports MachineCombiner -bool AArch64InstrInfo::useMachineCombiner(void) const { +bool AArch64InstrInfo::useMachineCombiner() const { // AArch64 supports the combiner return true; } @@ -2409,7 +2556,7 @@ bool AArch64InstrInfo::hasPattern( /// ADD R,I,C /// ==> MADD R,A,B,C /// \param Root is the ADD instruction -/// \param [out] InsInstr is a vector of machine instructions and will +/// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of /// the MUL. In the example above IdxMulOpd is 1. @@ -2417,20 +2564,34 @@ bool AArch64InstrInfo::hasPattern( static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl &InsInstrs, - unsigned IdxMulOpd, unsigned MaddOpc) { + unsigned IdxMulOpd, unsigned MaddOpc, + const TargetRegisterClass *RC) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - MachineOperand R = Root.getOperand(0); - MachineOperand A = MUL->getOperand(1); - MachineOperand B = MUL->getOperand(2); - MachineOperand C = Root.getOperand(IdxOtherOpd); - MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc)) - .addOperand(R) - .addOperand(A) - .addOperand(B) - .addOperand(C); + unsigned ResultReg = Root.getOperand(0).getReg(); + unsigned SrcReg0 = MUL->getOperand(1).getReg(); + bool Src0IsKill = MUL->getOperand(1).isKill(); + unsigned SrcReg1 = MUL->getOperand(2).getReg(); + bool Src1IsKill = MUL->getOperand(2).isKill(); + unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); + bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); + + if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + MRI.constrainRegClass(ResultReg, RC); + if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + MRI.constrainRegClass(SrcReg0, RC); + if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + MRI.constrainRegClass(SrcReg1, RC); + if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) + MRI.constrainRegClass(SrcReg2, RC); + + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), + ResultReg) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) + .addReg(SrcReg2, getKillRegState(Src2IsKill)); // Insert the MADD InsInstrs.push_back(MIB); return MUL; @@ -2444,7 +2605,7 @@ static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, /// ==> ORR V, ZR, Imm /// ==> MADD R,A,B,V /// \param Root is the ADD instruction -/// \param [out] InsInstr is a vector of machine instructions and will +/// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of /// the MUL. In the example above IdxMulOpd is 1. @@ -2455,22 +2616,35 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, - unsigned VR) { + unsigned VR, const TargetRegisterClass *RC) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - MachineOperand R = Root.getOperand(0); - MachineOperand A = MUL->getOperand(1); - MachineOperand B = MUL->getOperand(2); - MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc)) - .addOperand(R) - .addOperand(A) - .addOperand(B) + unsigned ResultReg = Root.getOperand(0).getReg(); + unsigned SrcReg0 = MUL->getOperand(1).getReg(); + bool Src0IsKill = MUL->getOperand(1).isKill(); + unsigned SrcReg1 = MUL->getOperand(2).getReg(); + bool Src1IsKill = MUL->getOperand(2).isKill(); + + if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + MRI.constrainRegClass(ResultReg, RC); + if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + MRI.constrainRegClass(SrcReg0, RC); + if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + MRI.constrainRegClass(SrcReg1, RC); + if (TargetRegisterInfo::isVirtualRegister(VR)) + MRI.constrainRegClass(VR, RC); + + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), + ResultReg) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) .addReg(VR); // Insert the MADD InsInstrs.push_back(MIB); return MUL; } + /// genAlternativeCodeSequence - when hasPattern() finds a pattern /// this function generates the instructions that could replace the /// original code sequence @@ -2482,9 +2656,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MachineBasicBlock &MBB = *Root.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo *TII = MF.getTarget().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineInstr *MUL; + const TargetRegisterClass *RC; unsigned Opc; switch (Pattern) { default: @@ -2496,9 +2671,14 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // ADD R,I,C // ==> MADD R,A,B,C // --- Create(MADD); - Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP1 ? AArch64::MADDWrrr - : AArch64::MADDXrrr; - MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc); + if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) { + Opc = AArch64::MADDWrrr; + RC = &AArch64::GPR32RegClass; + } else { + Opc = AArch64::MADDXrrr; + RC = &AArch64::GPR64RegClass; + } + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; case MachineCombinerPattern::MC_MULADDW_OP2: case MachineCombinerPattern::MC_MULADDX_OP2: @@ -2506,54 +2686,59 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // ADD R,C,I // ==> MADD R,A,B,C // --- Create(MADD); - Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP2 ? AArch64::MADDWrrr - : AArch64::MADDXrrr; - MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc); + if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) { + Opc = AArch64::MADDWrrr; + RC = &AArch64::GPR32RegClass; + } else { + Opc = AArch64::MADDXrrr; + RC = &AArch64::GPR64RegClass; + } + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MC_MULADDWI_OP1: - case MachineCombinerPattern::MC_MULADDXI_OP1: + case MachineCombinerPattern::MC_MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm // ==> ORR V, ZR, Imm // ==> MADD R,A,B,V // --- Create(MADD); - { - const TargetRegisterClass *RC = - MRI.getRegClass(Root.getOperand(1).getReg()); - unsigned NewVR = MRI.createVirtualRegister(RC); - unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { - BitSize = 32; - OrrOpc = AArch64::ORRWri; - ZeroReg = AArch64::WZR; - Opc = AArch64::MADDWrrr; - } else { - OrrOpc = AArch64::ORRXri; - BitSize = 64; - ZeroReg = AArch64::XZR; - Opc = AArch64::MADDXrrr; - } - uint64_t Imm = Root.getOperand(2).getImm(); + const TargetRegisterClass *OrrRC; + unsigned BitSize, OrrOpc, ZeroReg; + if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { + OrrOpc = AArch64::ORRWri; + OrrRC = &AArch64::GPR32spRegClass; + BitSize = 32; + ZeroReg = AArch64::WZR; + Opc = AArch64::MADDWrrr; + RC = &AArch64::GPR32RegClass; + } else { + OrrOpc = AArch64::ORRXri; + OrrRC = &AArch64::GPR64spRegClass; + BitSize = 64; + ZeroReg = AArch64::XZR; + Opc = AArch64::MADDXrrr; + RC = &AArch64::GPR64RegClass; + } + unsigned NewVR = MRI.createVirtualRegister(OrrRC); + uint64_t Imm = Root.getOperand(2).getImm(); - if (Root.getOperand(3).isImm()) { - unsigned val = Root.getOperand(3).getImm(); - Imm = Imm << val; - } - uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); - uint64_t Encoding; - - if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { - MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc)) - .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define)) - .addReg(ZeroReg) - .addImm(Encoding); - InsInstrs.push_back(MIB1); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR); - } + if (Root.getOperand(3).isImm()) { + unsigned Val = Root.getOperand(3).getImm(); + Imm = Imm << Val; + } + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t Encoding; + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) + .addReg(ZeroReg) + .addImm(Encoding); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); } break; + } case MachineCombinerPattern::MC_MULSUBW_OP1: case MachineCombinerPattern::MC_MULSUBX_OP1: { // MUL I=A,B,0 @@ -2561,38 +2746,46 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // ==> SUB V, 0, C // ==> MADD R,A,B,V // = -C + A*B // --- Create(MADD); - const TargetRegisterClass *RC = - MRI.getRegClass(Root.getOperand(1).getReg()); - unsigned NewVR = MRI.createVirtualRegister(RC); + const TargetRegisterClass *SubRC; unsigned SubOpc, ZeroReg; if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) { SubOpc = AArch64::SUBWrr; + SubRC = &AArch64::GPR32spRegClass; ZeroReg = AArch64::WZR; Opc = AArch64::MADDWrrr; + RC = &AArch64::GPR32RegClass; } else { SubOpc = AArch64::SUBXrr; + SubRC = &AArch64::GPR64spRegClass; ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; + RC = &AArch64::GPR64RegClass; } + unsigned NewVR = MRI.createVirtualRegister(SubRC); // SUB NewVR, 0, C MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc)) - .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define)) + BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) .addReg(ZeroReg) .addOperand(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR); - } break; + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); + break; + } case MachineCombinerPattern::MC_MULSUBW_OP2: case MachineCombinerPattern::MC_MULSUBX_OP2: // MUL I=A,B,0 // SUB R,C,I // ==> MSUB R,A,B,C (computes C - A*B) // --- Create(MSUB); - Opc = Pattern == MachineCombinerPattern::MC_MULSUBW_OP2 ? AArch64::MSUBWrrr - : AArch64::MSUBXrrr; - MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc); + if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) { + Opc = AArch64::MSUBWrrr; + RC = &AArch64::GPR32RegClass; + } else { + Opc = AArch64::MSUBXrrr; + RC = &AArch64::GPR64RegClass; + } + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MC_MULSUBWI_OP1: case MachineCombinerPattern::MC_MULSUBXI_OP1: { @@ -2601,43 +2794,141 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // ==> ORR V, ZR, -Imm // ==> MADD R,A,B,V // = -Imm + A*B // --- Create(MADD); - const TargetRegisterClass *RC = - MRI.getRegClass(Root.getOperand(1).getReg()); - unsigned NewVR = MRI.createVirtualRegister(RC); + const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) { - BitSize = 32; OrrOpc = AArch64::ORRWri; + OrrRC = &AArch64::GPR32spRegClass; + BitSize = 32; ZeroReg = AArch64::WZR; Opc = AArch64::MADDWrrr; + RC = &AArch64::GPR32RegClass; } else { OrrOpc = AArch64::ORRXri; + OrrRC = &AArch64::GPR64RegClass; BitSize = 64; ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; + RC = &AArch64::GPR64RegClass; } + unsigned NewVR = MRI.createVirtualRegister(OrrRC); int Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { - unsigned val = Root.getOperand(3).getImm(); - Imm = Imm << val; + unsigned Val = Root.getOperand(3).getImm(); + Imm = Imm << Val; } uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); uint64_t Encoding; if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { MachineInstrBuilder MIB1 = - BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc)) - .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define)) + BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) .addReg(ZeroReg) .addImm(Encoding); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR); + MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); } - } break; + break; } + } // end switch (Pattern) // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); return; } + +/// \brief Replace csincr-branch sequence by simple conditional branch +/// +/// Examples: +/// 1. +/// csinc w9, wzr, wzr, +/// tbnz w9, #0, 0x44 +/// to +/// b. +/// +/// 2. +/// csinc w9, wzr, wzr, +/// tbz w9, #0, 0x44 +/// to +/// b. +/// +/// \param MI Conditional Branch +/// \return True when the simple conditional branch is generated +/// +bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const { + bool IsNegativeBranch = false; + bool IsTestAndBranch = false; + unsigned TargetBBInMI = 0; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unknown branch instruction?"); + case AArch64::Bcc: + return false; + case AArch64::CBZW: + case AArch64::CBZX: + TargetBBInMI = 1; + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + TargetBBInMI = 1; + IsNegativeBranch = true; + break; + case AArch64::TBZW: + case AArch64::TBZX: + TargetBBInMI = 2; + IsTestAndBranch = true; + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + TargetBBInMI = 2; + IsNegativeBranch = true; + IsTestAndBranch = true; + break; + } + // So we increment a zero register and test for bits other + // than bit 0? Conservatively bail out in case the verifier + // missed this case. + if (IsTestAndBranch && MI->getOperand(1).getImm()) + return false; + + // Find Definition. + assert(MI->getParent() && "Incomplete machine instruciton\n"); + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + unsigned VReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(VReg); + + // Look for CSINC + if (!(DefMI->getOpcode() == AArch64::CSINCWr && + DefMI->getOperand(1).getReg() == AArch64::WZR && + DefMI->getOperand(2).getReg() == AArch64::WZR) && + !(DefMI->getOpcode() == AArch64::CSINCXr && + DefMI->getOperand(1).getReg() == AArch64::XZR && + DefMI->getOperand(2).getReg() == AArch64::XZR)) + return false; + + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) + return false; + + AArch64CC::CondCode CC = + (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); + bool CheckOnlyCCWrites = true; + // Convert only when the condition code is not modified between + // the CSINC and the branch. The CC may be used by other + // instructions in between. + if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo())) + return false; + MachineBasicBlock &RefToMBB = *MBB; + MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB(); + DebugLoc DL = MI->getDebugLoc(); + if (IsNegativeBranch) + CC = AArch64CC::getInvertedCondCode(CC); + BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); + MI->eraseFromParent(); + return true; +}