unsigned int IsStore : 1;
unsigned int IsSwap : 1;
unsigned int MentionsPhysVR : 1;
- unsigned int HasImplicitSubreg : 1;
unsigned int IsSwappable : 1;
+ unsigned int MentionsPartialVR : 1;
unsigned int SpecialHandling : 3;
unsigned int WebRejected : 1;
unsigned int WillRemove : 1;
SH_INSERT,
SH_NOSWAP_LD,
SH_NOSWAP_ST,
- SH_SPLAT
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_COPYWIDEN
};
struct PPCVSXSwapRemoval : public MachineFunctionPass {
// handling. Return true iff any changes are made.
bool removeSwaps();
+ // Insert a swap instruction from SrcReg to DstReg at the given
+ // InsertPoint.
+ void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg);
+
// Update instructions requiring special handling.
void handleSpecialSwappables(int EntryIdx);
bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
return RC->hasSubClassEq(MRI->getRegClass(Reg));
- if (RC->contains(Reg))
- return true;
- return false;
+ return RC->contains(Reg);
}
// Return true iff the given register is a full vector register.
isRegInClass(Reg, &PPC::VRRCRegClass));
}
+ // Return true iff the given register is a partial vector register.
+ bool isScalarVecReg(unsigned Reg) {
+ return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
+ isRegInClass(Reg, &PPC::VSSRCRegClass));
+ }
+
+ // Return true iff the given register mentions all or part of a
+ // vector register. Also sets Partial to true if the mention
+ // is for just the floating-point register overlap of the register.
+ bool isAnyVecReg(unsigned Reg, bool &Partial) {
+ if (isScalarVecReg(Reg))
+ Partial = true;
+ return isScalarVecReg(Reg) || isVecReg(Reg);
+ }
+
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
- TII = static_cast<const PPCInstrInfo*>(MF->getSubtarget().getInstrInfo());
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
// An initial vector size of 256 appears to work well in practice.
// Small/medium functions with vector content tend not to incur a
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
+
bool RelevantInstr = false;
- bool ImplicitSubreg = false;
+ bool Partial = false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (isVecReg(Reg)) {
+ if (isAnyVecReg(Reg, Partial)) {
RelevantInstr = true;
- if (MO.getSubReg() != 0)
- ImplicitSubreg = true;
break;
}
}
PPCVSXSwapEntry SwapEntry{};
int VecIdx = addSwapEntry(&MI, SwapEntry);
- if (ImplicitSubreg)
- SwapVector[VecIdx].HasImplicitSubreg = 1;
-
switch(MI.getOpcode()) {
default:
// Unless noted otherwise, an instruction is considered
// safe for the optimization. There are a large number of
// such true-SIMD instructions (all vector math, logical,
- // select, compare, etc.).
- SwapVector[VecIdx].IsSwappable = 1;
+ // select, compare, etc.). However, if the instruction
+ // mentions a partial vector register and does not have
+ // special handling defined, it is not swappable.
+ if (Partial)
+ SwapVector[VecIdx].MentionsPartialVR = 1;
+ else
+ SwapVector[VecIdx].IsSwappable = 1;
break;
- case PPC::XXPERMDI:
+ case PPC::XXPERMDI: {
// This is a swap if it is of the form XXPERMDI t, s, s, 2.
// Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
// can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
// SUBREG_TO_REG to find the real source value for comparison.
// If the real source value is a physical register, then mark the
// XXPERMDI as mentioning a physical register.
- // Any other form of XXPERMDI is lane-sensitive and unsafe
- // for the optimization.
- if (MI.getOperand(3).getImm() == 2) {
+ int immed = MI.getOperand(3).getImm();
+ if (immed == 2) {
unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
VecIdx);
unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
VecIdx);
if (trueReg1 == trueReg2)
SwapVector[VecIdx].IsSwap = 1;
+ else {
+ // We can still handle these if the two registers are not
+ // identical, by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+ }
+ // This is a doubleword splat if it is of the form
+ // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we
+ // must look through chains of copy-likes to find the source
+ // register. We turn off the marking for mention of a physical
+ // register, because splatting it is safe; the optimization
+ // will not swap the value in the physical register. Whether
+ // or not the two input registers are identical, we can handle
+ // these by adjusting the form of the XXPERMDI.
+ } else if (immed == 0 || immed == 3) {
+
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+
+ unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+ VecIdx);
+ unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+ VecIdx);
+ if (trueReg1 == trueReg2)
+ SwapVector[VecIdx].MentionsPhysVR = 0;
+
+ } else {
+ // We can still handle these by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
}
break;
+ }
case PPC::LVX:
// Non-permuting loads are currently unsafe. We can use special
// handling for this in the future. By not marking these as
SwapVector[VecIdx].IsLoad = 1;
SwapVector[VecIdx].IsSwap = 1;
break;
+ case PPC::LXSDX:
+ case PPC::LXSSPX:
+ // A load of a floating-point value into the high-order half of
+ // a vector register is safe, provided that we introduce a swap
+ // following the load, which will be done by the SUBREG_TO_REG
+ // support. So just mark these as safe.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
case PPC::STVX:
// Non-permuting stores are currently unsafe. We can use special
// handling for this in the future. By not marking these as
SwapVector[VecIdx].IsStore = 1;
SwapVector[VecIdx].IsSwap = 1;
break;
- case PPC::SUBREG_TO_REG:
+ case PPC::COPY:
// These are fine provided they are moving between full vector
- // register classes. For example, the VRs are a subset of the
- // VSRs, but each VR and each VSR is a full 128-bit register.
+ // register classes.
if (isVecReg(MI.getOperand(0).getReg()) &&
- isVecReg(MI.getOperand(2).getReg()))
+ isVecReg(MI.getOperand(1).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ // If we have a copy from one scalar floating-point register
+ // to another, we can accept this even if it is a physical
+ // register. The only way this gets involved is if it feeds
+ // a SUBREG_TO_REG, which is handled by introducing a swap.
+ else if (isScalarVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(1).getReg()))
SwapVector[VecIdx].IsSwappable = 1;
break;
- case PPC::COPY:
+ case PPC::SUBREG_TO_REG: {
// These are fine provided they are moving between full vector
- // register classes.
+ // register classes. If they are moving from a scalar
+ // floating-point class to a vector class, we can handle those
+ // as well, provided we introduce a swap. It is generally the
+ // case that we will introduce fewer swaps than we remove, but
+ // (FIXME) a cost model could be used. However, introduced
+ // swaps could potentially be CSEd, so this is not trivial.
if (isVecReg(MI.getOperand(0).getReg()) &&
- isVecReg(MI.getOperand(1).getReg()))
+ isVecReg(MI.getOperand(2).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ else if (isVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(2).getReg())) {
SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
+ }
break;
+ }
case PPC::VSPLTB:
case PPC::VSPLTH:
case PPC::VSPLTW:
case PPC::LVSL:
case PPC::LVSR:
case PPC::LVXL:
- case PPC::LXVDSX:
case PPC::STVEBX:
case PPC::STVEHX:
case PPC::STVEWX:
case PPC::STVXL:
+ // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
+ // by adding special handling for narrowing copies as well as
+ // widening ones. However, I've experimented with this, and in
+ // practice we currently do not appear to use STXSDX fed by
+ // a narrowing copy from a full vector register. Since I can't
+ // generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
+ case PPC::STXSSPX:
case PPC::VCIPHER:
case PPC::VCIPHERLAST:
case PPC::VMRGHB:
case PPC::VUPKLSW:
case PPC::XXMRGHW:
case PPC::XXMRGLW:
+ // XXSLDWI could be replaced by a general permute with one of three
+ // permute control vectors (for shift values 1, 2, 3). However,
+ // VPERM has a more restrictive register class.
+ case PPC::XXSLDWI:
case PPC::XXSPLTW:
break;
}
// such operations to the ultimate source register. If a
// physical register is encountered, we stop the search and
// flag the swap entry indicated by VecIdx (the original
-// XXPERMDI) as mentioning a physical register. Similarly
-// for implicit subregister mentions (which should never
-// happen).
+// XXPERMDI) as mentioning a physical register.
unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
unsigned VecIdx) {
MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
- unsigned CopySrcReg, CopySrcSubreg;
- if (MI->isCopy()) {
+ unsigned CopySrcReg;
+ if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
- CopySrcSubreg = MI->getOperand(1).getSubReg();
- } else {
+ else {
assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg();
- CopySrcSubreg = MI->getOperand(2).getSubReg();
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
- SwapVector[VecIdx].MentionsPhysVR = 1;
- return CopySrcReg;
- }
-
- if (CopySrcSubreg != 0) {
- SwapVector[VecIdx].HasImplicitSubreg = 1;
+ if (!isScalarVecReg(CopySrcReg))
+ SwapVector[VecIdx].MentionsPhysVR = 1;
return CopySrcReg;
}
DEBUG(MI->dump());
// It's sufficient to walk vector uses and join them to their unique
- // definitions. In addition, check *all* vector register operands
- // for physical regs.
+ // definitions. In addition, check full vector register operands
+ // for physical regs. We exclude partial-vector register operands
+ // because we can handle them if copied to a full vector.
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!isVecReg(Reg))
+ if (!isVecReg(Reg) && !isScalarVecReg(Reg))
continue;
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- SwapVector[EntryIdx].MentionsPhysVR = 1;
+ if (!(MI->isCopy() && isScalarVecReg(Reg)))
+ SwapVector[EntryIdx].MentionsPhysVR = 1;
continue;
}
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
- // Reject webs containing mentions of physical registers or implicit
- // subregs, or containing operations that we don't know how to handle
- // in a lane-permuted region.
+ // If representative is already rejected, don't waste further time.
+ if (SwapVector[Repr].WebRejected)
+ continue;
+
+ // Reject webs containing mentions of physical or partial registers, or
+ // containing operations that we don't know how to handle in a lane-
+ // permuted region.
if (SwapVector[EntryIdx].MentionsPhysVR ||
- SwapVector[EntryIdx].HasImplicitSubreg ||
+ SwapVector[EntryIdx].MentionsPartialVR ||
!(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
- format("Web %d rejected for physreg, subreg, or not swap[pable]\n",
- Repr));
+ format("Web %d rejected for physreg, partial reg, or not "
+ "swap[pable]\n", Repr));
DEBUG(dbgs() << " in " << EntryIdx << ": ");
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
DEBUG(dbgs() << "\n");
}
}
- // Reject webs than contain swapping stores that are fed by something
+ // Reject webs that contain swapping stores that are fed by something
// other than a swap instruction.
} else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
}
}
+// Create an xxswapd instruction and insert it prior to the given point.
+// MI is used to determine basic block and debug loc information.
+// FIXME: When inserting a swap, we should check whether SrcReg is
+// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
+// then instead we should generate a copy from Reg to DstReg.
+void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg) {
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .addImm(2);
+}
+
// The identified swap entry requires special handling to allow its
// containing computation to be optimized. Perform that handling
// here.
-// FIXME: This code is to be phased in with subsequent patches.
+// FIXME: Additional opportunities will be phased in with subsequent
+// patches.
void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (SwapVector[EntryIdx].SpecialHandling) {
default:
- assert(false && "Unexpected special handling type");
- break;
+ llvm_unreachable("Unexpected special handling type");
// For splats based on an index into a vector, add N/2 modulo N
// to the index, where N is the number of vector elements.
switch (MI->getOpcode()) {
default:
- assert(false && "Unexpected splat opcode");
+ llvm_unreachable("Unexpected splat opcode");
case PPC::VSPLTB: NElts = 16; break;
case PPC::VSPLTH: NElts = 8; break;
case PPC::VSPLTW: NElts = 4; break;
break;
}
+ // For an XXPERMDI that isn't handled otherwise, we need to
+ // reverse the order of the operands. If the selector operand
+ // has a value of 0 or 3, we need to change it to 3 or 0,
+ // respectively. Otherwise we should leave it alone. (This
+ // is equivalent to reversing the two bits of the selector
+ // operand and complementing the result.)
+ case SHValues::SH_XXPERMDI: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing XXPERMDI: ");
+ DEBUG(MI->dump());
+
+ unsigned Selector = MI->getOperand(3).getImm();
+ if (Selector == 0 || Selector == 3)
+ Selector = 3 - Selector;
+ MI->getOperand(3).setImm(Selector);
+
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+ break;
+ }
+
+ // For a copy from a scalar floating-point register to a vector
+ // register, removing swaps will leave the copied value in the
+ // wrong lane. Insert a swap following the copy to fix this.
+ case SHValues::SH_COPYWIDEN: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
+ DEBUG(MI->dump());
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+
+ MI->getOperand(0).setReg(NewVReg);
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+
+ auto InsertPoint = ++MachineBasicBlock::iterator(MI);
+
+ // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
+ // is copying to a VRRC, we need to be careful to avoid a register
+ // assignment problem. In this case we must copy from VRRC to VSRC
+ // prior to the swap, and from VSRC to VRRC following the swap.
+ // Coalescing will usually remove all this mess.
+ if (DstRC == &PPC::VRRCRegClass) {
+ unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), VSRCTmp1)
+ .addReg(NewVReg);
+ DEBUG(std::prev(InsertPoint)->dump());
+
+ insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
+ DEBUG(std::prev(InsertPoint)->dump());
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), DstReg)
+ .addReg(VSRCTmp2);
+ DEBUG(std::prev(InsertPoint)->dump());
+
+ } else {
+ insertSwap(MI, InsertPoint, DstReg, NewVReg);
+ DEBUG(std::prev(InsertPoint)->dump());
+ }
+ break;
+ }
}
}
DEBUG(dbgs() << "swap ");
if (SwapVector[EntryIdx].MentionsPhysVR)
DEBUG(dbgs() << "physreg ");
- if (SwapVector[EntryIdx].HasImplicitSubreg)
- DEBUG(dbgs() << "implsubreg ");
+ if (SwapVector[EntryIdx].MentionsPartialVR)
+ DEBUG(dbgs() << "partialreg ");
if (SwapVector[EntryIdx].IsSwappable) {
DEBUG(dbgs() << "swappable ");
case SH_SPLAT:
DEBUG(dbgs() << "special:splat ");
break;
+ case SH_XXPERMDI:
+ DEBUG(dbgs() << "special:xxpermdi ");
+ break;
+ case SH_COPYWIDEN:
+ DEBUG(dbgs() << "special:copywiden ");
+ break;
}
}
DEBUG(dbgs() << "\n");
}
-} // namespace
+} // end default namespace
INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
"PowerPC VSX Swap Removal", false, false)