{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// AVX2 foldable instructions
{ X86::VPABSBrr256, X86::VPABSBrm256, 0 },
{ X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
{ X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
{ X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
- { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
- { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
{ X86::BEXTR32rr, X86::BEXTR32rm, 0 },
/// operand at the use. We fold the load instructions if load defines a virtual
/// register, the virtual register is used once in the same BB, and the
/// instructions in-between do not load or store, and have no side effects.
-MachineInstr* X86InstrInfo::
-optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
- MachineInstr *&DefMI) const {
+MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
if (FoldAsLoadDefReg == 0)
return nullptr;
// To be conservative, if there exists another load, clear the load candidate.
if (!DefMI->isSafeToMove(this, nullptr, SawStore))
return nullptr;
- // We try to commute MI if possible.
- unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
- for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
- // Collect information about virtual register operands of MI.
- unsigned SrcOperandId = 0;
- bool FoundSrcOperand = false;
- for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg != FoldAsLoadDefReg)
- continue;
- // Do not fold if we have a subreg use or a def or multiple uses.
- if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
- return nullptr;
-
- SrcOperandId = i;
- FoundSrcOperand = true;
- }
- if (!FoundSrcOperand) return nullptr;
-
- // Check whether we can fold the def into SrcOperandId.
- SmallVector<unsigned, 8> Ops;
- Ops.push_back(SrcOperandId);
- MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
- if (FoldMI) {
- FoldAsLoadDefReg = 0;
- return FoldMI;
- }
-
- if (Idx == 1) {
- // MI was changed but it didn't help, commute it back!
- commuteInstruction(MI, false);
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
return nullptr;
- }
- // Check whether we can commute MI and enable folding.
- if (MI->isCommutable()) {
- MachineInstr *NewMI = commuteInstruction(MI, false);
- // Unable to commute.
- if (!NewMI) return nullptr;
- if (NewMI != MI) {
- // New instruction. It doesn't need to be kept.
- NewMI->eraseFromParent();
- return nullptr;
- }
- }
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand)
+ return nullptr;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
}
+
return nullptr;
}
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Align) const {
+ unsigned Size, unsigned Align,
+ bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
return nullptr;
// If this is a 64-bit load, but the spill slot is 32, then we can do
- // a 32-bit load which is implicitly zero-extended. This likely is due
- // to liveintervalanalysis remat'ing a load from stack slot.
+ // a 32-bit load which is implicitly zero-extended. This likely is
+ // due to live interval analysis remat'ing a load from stack slot.
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
return nullptr;
Opcode = X86::MOV32rm;
// to a 32-bit one.
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
- NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- X86::sub_32bit));
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
}
}
+ // If the instruction and target operand are commutable, commute the
+ // instruction and try again.
+ if (AllowCommute) {
+ unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
+ if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
+ bool HasDef = MI->getDesc().getNumDefs();
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
+ unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
+ bool Tied0 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+ bool Tied1 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
+
+ // If either of the commutable operands are tied to the destination
+ // then we can not commute + fold.
+ if ((HasDef && Reg0 == Reg1 && Tied0) ||
+ (HasDef && Reg0 == Reg2 && Tied1))
+ return nullptr;
+
+ if ((CommuteOpIdx1 == OriginalOpIdx) ||
+ (CommuteOpIdx2 == OriginalOpIdx)) {
+ MachineInstr *CommutedMI = commuteInstruction(MI, false);
+ if (!CommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (CommutedMI != MI) {
+ // New instruction. We can't fold from this.
+ CommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Attempt to fold with the commuted version of the instruction.
+ unsigned CommuteOp =
+ (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
+ /*AllowCommute=*/false);
+ if (NewMI)
+ return NewMI;
+
+ // Folding failed again - undo the commute before returning.
+ MachineInstr *UncommutedMI = commuteInstruction(MI, false);
+ if (!UncommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (UncommutedMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ UncommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Return here to prevent duplicate fuse failure report.
+ return nullptr;
+ }
+ }
+ }
+
// No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ Size, Alignment, /*AllowCommute=*/true);
}
static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
break;
}
}
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ /*Size=*/0, Alignment, /*AllowCommute=*/true);
}
case X86::VSQRTSSm:
case X86::VSQRTSSm_Int:
case X86::VSQRTSSr:
- case X86::VSQRTPDZrm:
- case X86::VSQRTPDZrr:
- case X86::VSQRTPSZrm:
- case X86::VSQRTPSZrr:
+ case X86::VSQRTPDZm:
+ case X86::VSQRTPDZr:
+ case X86::VSQRTPSZm:
+ case X86::VSQRTPSZr:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
case X86::VSQRTSDZr: