-/// useMachineCombiner - return true when a target supports MachineCombiner
-bool AArch64InstrInfo::useMachineCombiner(void) const {
- // AArch64 supports the combiner
- return true;
-}
-//
-// True when Opc sets flag
-static bool isCombineInstrSettingFlag(unsigned Opc) {
- switch (Opc) {
- case AArch64::ADDSWrr:
- case AArch64::ADDSWri:
- case AArch64::ADDSXrr:
- case AArch64::ADDSXri:
- case AArch64::SUBSWrr:
- case AArch64::SUBSXrr:
- // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
- case AArch64::SUBSWri:
- case AArch64::SUBSXri:
- return true;
- default:
- break;
- }
- return false;
-}
-//
-// 32b Opcodes that can be combined with a MUL
-static bool isCombineInstrCandidate32(unsigned Opc) {
- switch (Opc) {
- case AArch64::ADDWrr:
- case AArch64::ADDWri:
- case AArch64::SUBWrr:
- case AArch64::ADDSWrr:
- case AArch64::ADDSWri:
- case AArch64::SUBSWrr:
- // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
- case AArch64::SUBWri:
- case AArch64::SUBSWri:
- return true;
- default:
- break;
- }
- return false;
-}
-//
-// 64b Opcodes that can be combined with a MUL
-static bool isCombineInstrCandidate64(unsigned Opc) {
- switch (Opc) {
- case AArch64::ADDXrr:
- case AArch64::ADDXri:
- case AArch64::SUBXrr:
- case AArch64::ADDSXrr:
- case AArch64::ADDSXri:
- case AArch64::SUBSXrr:
- // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
- case AArch64::SUBXri:
- case AArch64::SUBSXri:
- return true;
- default:
- break;
- }
- return false;
-}
-//
-// Opcodes that can be combined with a MUL
-static bool isCombineInstrCandidate(unsigned Opc) {
- return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
-}
-
-static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
- unsigned MulOpc, unsigned ZeroReg) {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- MachineInstr *MI = nullptr;
- // We need a virtual register definition.
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- MI = MRI.getUniqueVRegDef(MO.getReg());
- // And it needs to be in the trace (otherwise, it won't have a depth).
- if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
- return false;
-
- assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
- MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
-
- // The third input reg must be zero.
- if (MI->getOperand(3).getReg() != ZeroReg)
- return false;
-
- // Must only used by the user we combine with.
- if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
- return false;
-
- return true;
-}
-
-/// hasPattern - return true when there is potentially a faster code sequence
-/// for an instruction chain ending in \p Root. All potential patterns are
-/// listed
-/// in the \p Pattern vector. Pattern should be sorted in priority order since
-/// the pattern evaluator stops checking as soon as it finds a faster sequence.
-
-bool AArch64InstrInfo::hasPattern(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
- unsigned Opc = Root.getOpcode();
- MachineBasicBlock &MBB = *Root.getParent();
- bool Found = false;
-
- if (!isCombineInstrCandidate(Opc))
- return 0;
- if (isCombineInstrSettingFlag(Opc)) {
- int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
- // When NZCV is live bail out.
- if (Cmp_NZCV == -1)
- return 0;
- unsigned NewOpc = convertFlagSettingOpcode(&Root);
- // When opcode can't change bail out.
- // CHECKME: do we miss any cases for opcode conversion?
- if (NewOpc == Opc)
- return 0;
- Opc = NewOpc;
- }
-
- switch (Opc) {
- default:
- break;
- case AArch64::ADDWrr:
- assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
- "ADDWrr does not have register operands");
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
- Found = true;
- }
- break;
- case AArch64::ADDXrr:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
- Found = true;
- }
- break;
- case AArch64::SUBWrr:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
- Found = true;
- }
- break;
- case AArch64::SUBXrr:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
- Found = true;
- }
- if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
- Found = true;
- }
- break;
- case AArch64::ADDWri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
- Found = true;
- }
- break;
- case AArch64::ADDXri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
- Found = true;
- }
- break;
- case AArch64::SUBWri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
- AArch64::WZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
- Found = true;
- }
- break;
- case AArch64::SUBXri:
- if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
- AArch64::XZR)) {
- Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
- Found = true;
- }
- break;
- }
- return Found;
-}
-
-/// genMadd - Generate madd instruction and combine mul and add.
-/// Example:
-/// MUL I=A,B,0
-/// ADD R,I,C
-/// ==> MADD R,A,B,C
-/// \param Root is the ADD instruction
-/// \param [out] InsInstr is a vector of machine instructions and will
-/// contain the generated madd instruction
-/// \param IdxMulOpd is index of operand in Root that is the result of
-/// the MUL. In the example above IdxMulOpd is 1.
-/// \param MaddOpc the opcode fo the madd instruction
-static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII, MachineInstr &Root,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- unsigned IdxMulOpd, unsigned MaddOpc) {
- assert(IdxMulOpd == 1 || IdxMulOpd == 2);
-
- unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
- MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
- MachineOperand R = Root.getOperand(0);
- MachineOperand A = MUL->getOperand(1);
- MachineOperand B = MUL->getOperand(2);
- MachineOperand C = Root.getOperand(IdxOtherOpd);
- MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc))
- .addOperand(R)
- .addOperand(A)
- .addOperand(B)
- .addOperand(C);
- // Insert the MADD
- InsInstrs.push_back(MIB);
- return MUL;
-}
-
-/// genMaddR - Generate madd instruction and combine mul and add using
-/// an extra virtual register
-/// Example - an ADD intermediate needs to be stored in a register:
-/// MUL I=A,B,0
-/// ADD R,I,Imm
-/// ==> ORR V, ZR, Imm
-/// ==> MADD R,A,B,V
-/// \param Root is the ADD instruction
-/// \param [out] InsInstr is a vector of machine instructions and will
-/// contain the generated madd instruction
-/// \param IdxMulOpd is index of operand in Root that is the result of
-/// the MUL. In the example above IdxMulOpd is 1.
-/// \param MaddOpc the opcode fo the madd instruction
-/// \param VR is a virtual register that holds the value of an ADD operand
-/// (V in the example above).
-static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII, MachineInstr &Root,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- unsigned IdxMulOpd, unsigned MaddOpc,
- unsigned VR) {
- assert(IdxMulOpd == 1 || IdxMulOpd == 2);
-
- MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
- MachineOperand R = Root.getOperand(0);
- MachineOperand A = MUL->getOperand(1);
- MachineOperand B = MUL->getOperand(2);
- MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc))
- .addOperand(R)
- .addOperand(A)
- .addOperand(B)
- .addReg(VR);
- // Insert the MADD
- InsInstrs.push_back(MIB);
- return MUL;
-}
-/// genAlternativeCodeSequence - when hasPattern() finds a pattern
-/// this function generates the instructions that could replace the
-/// original code sequence
-void AArch64InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
- MachineBasicBlock &MBB = *Root.getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
-
- MachineInstr *MUL;
- unsigned Opc;
- switch (Pattern) {
- default:
- // signal error.
- break;
- case MachineCombinerPattern::MC_MULADDW_OP1:
- case MachineCombinerPattern::MC_MULADDX_OP1:
- // MUL I=A,B,0
- // ADD R,I,C
- // ==> MADD R,A,B,C
- // --- Create(MADD);
- Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP1 ? AArch64::MADDWrrr
- : AArch64::MADDXrrr;
- MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc);
- break;
- case MachineCombinerPattern::MC_MULADDW_OP2:
- case MachineCombinerPattern::MC_MULADDX_OP2:
- // MUL I=A,B,0
- // ADD R,C,I
- // ==> MADD R,A,B,C
- // --- Create(MADD);
- Opc = Pattern == MachineCombinerPattern::MC_MULADDW_OP2 ? AArch64::MADDWrrr
- : AArch64::MADDXrrr;
- MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc);
- break;
- case MachineCombinerPattern::MC_MULADDWI_OP1:
- case MachineCombinerPattern::MC_MULADDXI_OP1:
- // MUL I=A,B,0
- // ADD R,I,Imm
- // ==> ORR V, ZR, Imm
- // ==> MADD R,A,B,V
- // --- Create(MADD);
- {
- const TargetRegisterClass *RC =
- MRI.getRegClass(Root.getOperand(1).getReg());
- unsigned NewVR = MRI.createVirtualRegister(RC);
- unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
- BitSize = 32;
- OrrOpc = AArch64::ORRWri;
- ZeroReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- } else {
- OrrOpc = AArch64::ORRXri;
- BitSize = 64;
- ZeroReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
- }
- uint64_t Imm = Root.getOperand(2).getImm();
-
- if (Root.getOperand(3).isImm()) {
- unsigned val = Root.getOperand(3).getImm();
- Imm = Imm << val;
- }
- uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
- uint64_t Encoding;
-
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc))
- .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define))
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
- }
- }
- break;
- case MachineCombinerPattern::MC_MULSUBW_OP1:
- case MachineCombinerPattern::MC_MULSUBX_OP1: {
- // MUL I=A,B,0
- // SUB R,I, C
- // ==> SUB V, 0, C
- // ==> MADD R,A,B,V // = -C + A*B
- // --- Create(MADD);
- const TargetRegisterClass *RC =
- MRI.getRegClass(Root.getOperand(1).getReg());
- unsigned NewVR = MRI.createVirtualRegister(RC);
- unsigned SubOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
- SubOpc = AArch64::SUBWrr;
- ZeroReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- } else {
- SubOpc = AArch64::SUBXrr;
- ZeroReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
- }
- // SUB NewVR, 0, C
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc))
- .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define))
- .addReg(ZeroReg)
- .addOperand(Root.getOperand(2));
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
- } break;
- case MachineCombinerPattern::MC_MULSUBW_OP2:
- case MachineCombinerPattern::MC_MULSUBX_OP2:
- // MUL I=A,B,0
- // SUB R,C,I
- // ==> MSUB R,A,B,C (computes C - A*B)
- // --- Create(MSUB);
- Opc = Pattern == MachineCombinerPattern::MC_MULSUBW_OP2 ? AArch64::MSUBWrrr
- : AArch64::MSUBXrrr;
- MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc);
- break;
- case MachineCombinerPattern::MC_MULSUBWI_OP1:
- case MachineCombinerPattern::MC_MULSUBXI_OP1: {
- // MUL I=A,B,0
- // SUB R,I, Imm
- // ==> ORR V, ZR, -Imm
- // ==> MADD R,A,B,V // = -Imm + A*B
- // --- Create(MADD);
- const TargetRegisterClass *RC =
- MRI.getRegClass(Root.getOperand(1).getReg());
- unsigned NewVR = MRI.createVirtualRegister(RC);
- unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
- BitSize = 32;
- OrrOpc = AArch64::ORRWri;
- ZeroReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- } else {
- OrrOpc = AArch64::ORRXri;
- BitSize = 64;
- ZeroReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
- }
- int Imm = Root.getOperand(2).getImm();
- if (Root.getOperand(3).isImm()) {
- unsigned val = Root.getOperand(3).getImm();
- Imm = Imm << val;
- }
- uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
- uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc))
- .addOperand(MachineOperand::CreateReg(NewVR, RegState::Define))
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR);
- }
- } break;
- }
- // Record MUL and ADD/SUB for deletion
- DelInstrs.push_back(MUL);
- DelInstrs.push_back(&Root);
-
- return;
-}