From 860c08cad5b7c1359123bb2b0e74df4b6e48a15c Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 18 Apr 2013 22:15:08 +0000 Subject: [PATCH] Implement optimizeCompareInstr for PPC Many PPC instructions have a so-called 'record form' which stores to a specific condition register the result of comparing the result of the instruction with zero (always as a signed comparison). For integer operations on PPC64, this is always a 64-bit comparison. This implementation is derived from the implementation in the ARM backend; there are some differences because PPC condition registers are allocatable virtual registers (although the record forms always use a specific one), and we look for a matching subtraction instruction after the compare (but before the first use) in addition to before it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 300 ++++++++++++++++++++++++++++ lib/Target/PowerPC/PPCInstrInfo.h | 12 ++ test/CodeGen/PowerPC/optcmp.ll | 101 ++++++++++ 3 files changed, 413 insertions(+) create mode 100644 test/CodeGen/PowerPC/optcmp.ll diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 68ea976c344..b78f071ef65 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#define GET_INSTRMAP_INFO #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" @@ -1055,6 +1056,305 @@ bool PPCInstrInfo::isPredicable(MachineInstr *MI) const { } } +bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + switch (Opc) { + default: return false; + case PPC::CMPWI: + case PPC::CMPLWI: + case PPC::CMPDI: + case PPC::CMPLDI: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + Mask = 0xFFFF; + return true; + case PPC::CMPW: + case PPC::CMPLW: + case PPC::CMPD: + case PPC::CMPLD: + case PPC::FCMPUS: + case PPC::FCMPUD: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + return true; + } +} + +bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const { + int OpC = CmpInstr->getOpcode(); + unsigned CRReg = CmpInstr->getOperand(0).getReg(); + bool isFP = OpC == PPC::FCMPUS || OpC == PPC::FCMPUD; + unsigned CRRecReg = isFP ? PPC::CR1 : PPC::CR0; + + // The record forms set the condition register based on a signed comparison + // with zero (so says the ISA manual). This is not as straightforward as it + // seems, however, because this is always a 64-bit comparison on PPC64, even + // for instructions that are 32-bit in nature (like slw for example). + // So, on PPC32, for unsigned comparisons, we can use the record forms only + // for equality checks (as those don't depend on the sign). On PPC64, + // we are restricted to equality for unsigned 64-bit comparisons and for + // signed 32-bit comparisons the applicability is more restricted. + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; + bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; + bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; + + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; + int MIOpC = MI->getOpcode(); + + bool equalityOnly = false; + bool noSub = false; + if (isPPC64) { + if (is32BitSignedCompare) { + // We can perform this optimization only if MI is sign-extending. + if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo || + MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo || + MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo || + MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo || + MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) { + noSub = true; + } else + return false; + } else if (is32BitUnsignedCompare) { + // We can perform this optimization, equality only, if MI is + // zero-extending. + if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo || + MIOpC == PPC::SLW || MIOpC == PPC::SLWo || + MIOpC == PPC::SRW || MIOpC == PPC::SRWo) { + noSub = true; + equalityOnly = true; + } else + return false; + } else if (!isFP) + equalityOnly = is64BitUnsignedCompare; + } else if (!isFP) + equalityOnly = is32BitUnsignedCompare; + + if (equalityOnly) { + // We need to check the uses of the condition register in order to reject + // non-equality comparisons. + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), + IE = MRI->use_end(); I != IE; ++I) { + MachineInstr *UseMI = &*I; + if (UseMI->getOpcode() == PPC::BCC) { + unsigned Pred = UseMI->getOperand(0).getImm(); + if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) + continue; + + return false; + } else if (UseMI->getOpcode() == PPC::ISEL || + UseMI->getOpcode() == PPC::ISEL8) { + unsigned SubIdx = UseMI->getOperand(3).getSubReg(); + if (SubIdx == PPC::sub_eq) + continue; + + return false; + } else + return false; + } + } + + // Get ready to iterate backward from CmpInstr. + MachineBasicBlock::iterator I = CmpInstr, E = MI, + B = CmpInstr->getParent()->begin(); + + // Scan forward to find the first use of the compare. + for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end(); + I != EL; ++I) { + bool FoundUse = false; + for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg), + JE = MRI->use_end(); J != JE; ++J) + if (&*J == &*I) { + FoundUse = true; + break; + } + + if (FoundUse) + break; + } + + // Early exit if we're at the beginning of the BB. + if (I == B) return false; + + // There are two possible candidates which can be changed to set CR[01]. + // One is MI, the other is a SUB instruction. + // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). + MachineInstr *Sub = NULL; + if (SrcReg2 != 0) + // MI is not a candidate for CMPrr. + MI = NULL; + // FIXME: Conservatively refuse to convert an instruction which isn't in the + // same BB as the comparison. This is to allow the check below to avoid calls + // (and other explicit clobbers); instead we should really check for these + // more explicitly (in at least a few predecessors). + else if (MI->getParent() != CmpInstr->getParent() || Value != 0) { + // PPC does not have a record-form SUBri. + return false; + } + + // Search for Sub. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + --I; + for (; I != E && !noSub; --I) { + const MachineInstr &Instr = *I; + unsigned IOpC = Instr.getOpcode(); + + if (&*I != CmpInstr && ( + Instr.modifiesRegister(CRRecReg, TRI) || + Instr.readsRegister(CRRecReg, TRI))) + // This instruction modifies or uses the record condition register after + // the one we want to change. While we could do this transformation, it + // would likely not be profitable. This transformation removes one + // instruction, and so even forcing RA to generate one move probably + // makes it unprofitable. + return false; + + // Check whether CmpInstr can be made redundant by the current instruction. + if ((OpC == PPC::CMPW || OpC == PPC::CMPLW || + OpC == PPC::CMPD || OpC == PPC::CMPLD) && + (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) && + ((Instr.getOperand(1).getReg() == SrcReg && + Instr.getOperand(2).getReg() == SrcReg2) || + (Instr.getOperand(1).getReg() == SrcReg2 && + Instr.getOperand(2).getReg() == SrcReg))) { + Sub = &*I; + break; + } + + if (isFP && (IOpC == PPC::FSUB || IOpC == PPC::FSUBS) && + ((Instr.getOperand(1).getReg() == SrcReg && + Instr.getOperand(2).getReg() == SrcReg2) || + (Instr.getOperand(1).getReg() == SrcReg2 && + Instr.getOperand(2).getReg() == SrcReg))) { + Sub = &*I; + break; + } + + if (I == B) + // The 'and' is below the comparison instruction. + return false; + } + + // Return false if no candidates exist. + if (!MI && !Sub) + return false; + + // The single candidate is called MI. + if (!MI) MI = Sub; + + int NewOpC = -1; + MIOpC = MI->getOpcode(); + if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8) + NewOpC = MIOpC; + else { + NewOpC = PPC::getRecordFormOpcode(MIOpC); + if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1) + NewOpC = MIOpC; + } + + // FIXME: On the non-embedded POWER architectures, only some of the record + // forms are fast, and we should use only the fast ones. + + // The defining instruction has a record form (or is already a record + // form). It is possible, however, that we'll need to reverse the condition + // code of the users. + if (NewOpC == -1) + return false; + + SmallVector, 4> + OperandsToUpdate; + SmallVector, 4> + OperandsToSwap; + + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP + // needs to be updated to be based on SUB. Push the condition code + // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the + // condition code of these operands will be modified. + bool ShouldSwap = false; + if (Sub) { + ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg; + + // The operands to subf are the opposite of sub, so only in the fixed-point + // case, invert the order. + if (!isFP) + ShouldSwap = !ShouldSwap; + } + + if (ShouldSwap) + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), + IE = MRI->use_end(); I != IE; ++I) { + MachineInstr *UseMI = &*I; + if (UseMI->getOpcode() == PPC::BCC) { + PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); + if (ShouldSwap) + OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)), + PPC::InvertPredicate(Pred))); + } else if (UseMI->getOpcode() == PPC::ISEL || + UseMI->getOpcode() == PPC::ISEL8) { + if (ShouldSwap) + OperandsToSwap.push_back(std::make_pair(&((*I).getOperand(1)), + &((*I).getOperand(2)))); + } else // We need to abort on a user we don't understand. + return false; + } + + // Create a new virtual register to hold the value of the CR set by the + // record-form instruction. If the instruction was not previously in + // record form, then set the kill flag on the CR. + CmpInstr->eraseFromParent(); + + MachineBasicBlock::iterator MII = MI; + BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(), + get(TargetOpcode::COPY), CRReg) + .addReg(CRRecReg, MIOpC != NewOpC ? RegState::Kill : 0); + + if (MIOpC != NewOpC) { + // We need to be careful here: we're replacing one instruction with + // another, and we need to make sure that we get all of the right + // implicit uses and defs. On the other hand, the caller may be holding + // an iterator to this instruction, and so we can't delete it (this is + // specifically the case if this is the instruction directly after the + // compare). + + const MCInstrDesc &NewDesc = get(NewOpC); + MI->setDesc(NewDesc); + + if (NewDesc.ImplicitDefs) + for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs(); + *ImpDefs; ++ImpDefs) + if (!MI->definesRegister(*ImpDefs)) + MI->addOperand(*MI->getParent()->getParent(), + MachineOperand::CreateReg(*ImpDefs, true, true)); + if (NewDesc.ImplicitUses) + for (const uint16_t *ImpUses = NewDesc.getImplicitUses(); + *ImpUses; ++ImpUses) + if (!MI->readsRegister(*ImpUses)) + MI->addOperand(*MI->getParent()->getParent(), + MachineOperand::CreateReg(*ImpUses, false, true)); + } + + // Modify the condition code of operands in OperandsToUpdate. + // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to + // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + + for (unsigned i = 0, e = OperandsToSwap.size(); i < e; i++) + std::swap(*OperandsToSwap[i].first, *OperandsToSwap[i].second); + + return true; +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index b5fcc85ce48..34a1a73a180 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -205,6 +205,18 @@ public: virtual bool isPredicable(MachineInstr *MI) const; + // Comparison optimization. + + + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const; + + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const; + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// diff --git a/test/CodeGen/PowerPC/optcmp.ll b/test/CodeGen/PowerPC/optcmp.ll new file mode 100644 index 00000000000..6d1f90637b5 --- /dev/null +++ b/test/CodeGen/PowerPC/optcmp.ll @@ -0,0 +1,101 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define signext i32 @foo(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 { +entry: + %sub = sub nsw i32 %a, %b + store i32 %sub, i32* %c, align 4, !tbaa !0 + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond + +; CHECK: @foo +; CHECK-NOT: subf. +} + +define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 { +entry: + %shl = shl i32 %a, %b + store i32 %shl, i32* %c, align 4, !tbaa !0 + %cmp = icmp sgt i32 %shl, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv + +; CHECK: @foo2 +; CHECK-NOT: slw. +} + +define i64 @fool(i64 %a, i64 %b, i64* nocapture %c) #0 { +entry: + %sub = sub nsw i64 %a, %b + store i64 %sub, i64* %c, align 8, !tbaa !3 + %cmp = icmp sgt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond + +; CHECK: @fool +; CHECK: subf. [[REG:[0-9]+]], 4, 3 +; CHECK: isel 3, 3, 4, 1 +; CHECK: std [[REG]], 0(5) +} + +define i64 @foolb(i64 %a, i64 %b, i64* nocapture %c) #0 { +entry: + %sub = sub nsw i64 %a, %b + store i64 %sub, i64* %c, align 8, !tbaa !3 + %cmp = icmp sle i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond + +; CHECK: @foolb +; CHECK: subf. [[REG:[0-9]+]], 4, 3 +; CHECK: isel 3, 4, 3, 1 +; CHECK: std [[REG]], 0(5) +} + +define i64 @foo2l(i64 %a, i64 %b, i64* nocapture %c) #0 { +entry: + %shl = shl i64 %a, %b + store i64 %shl, i64* %c, align 8, !tbaa !3 + %cmp = icmp sgt i64 %shl, 0 + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 + +; CHECK: @foo2l +; CHECK: sld. 4, 3, 4 +; CHECK: std 4, 0(5) +} + +define double @food(double %a, double %b, double* nocapture %c) #0 { +entry: + %sub = fsub double %a, %b + store double %sub, double* %c, align 8, !tbaa !3 + %cmp = fcmp ogt double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond + +; CHECK: @food +; CHECK: fsub. 0, 1, 2 +; CHECK: stfd 0, 0(5) +} + +define float @foof(float %a, float %b, float* nocapture %c) #0 { +entry: + %sub = fsub float %a, %b + store float %sub, float* %c, align 4, !tbaa !3 + %cmp = fcmp ogt float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond + +; CHECK: @foof +; CHECK: fsubs. 0, 1, 2 +; CHECK: stfs 0, 0(5) +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"long", metadata !1} +!4 = metadata !{metadata !"any pointer", metadata !1} + -- 2.34.1