#include "PPCHazardRecognizers.h"
#include "PPC.h"
#include "PPCInstrInfo.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
+ // FIXME: Move this.
+ if (isBCTRAfterSet(SU))
+ return true;
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->mayLoad())
+ return false;
+
+ // SU is a load; for any predecessors in this dispatch group, that are stores,
+ // and with which we have an ordering dependency, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || !PredMCID->mayStore())
+ continue;
+
+ if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->isBranch())
+ return false;
+
+ // SU is a branch; for any predecessors in this dispatch group, with which we
+ // have a data dependence and set the counter register, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
+ continue;
+
+ if (SU->Preds[i].isCtrl())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+// FIXME: Remove this when we don't need this:
+namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
+
+// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
+
+bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
+ unsigned &NSlots) {
+ // FIXME: Indirectly, this information is contained in the itinerary, and
+ // we should derive it from there instead of separately specifying it
+ // here.
+ unsigned IIC = MCID->getSchedClass();
+ switch (IIC) {
+ default:
+ NSlots = 1;
+ break;
+ case PPC::Sched::IIC_IntDivW:
+ case PPC::Sched::IIC_IntDivD:
+ case PPC::Sched::IIC_LdStLoadUpd:
+ case PPC::Sched::IIC_LdStLDU:
+ case PPC::Sched::IIC_LdStLFDU:
+ case PPC::Sched::IIC_LdStLFDUX:
+ case PPC::Sched::IIC_LdStLHA:
+ case PPC::Sched::IIC_LdStLHAU:
+ case PPC::Sched::IIC_LdStLWA:
+ case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTFDU:
+ NSlots = 2;
+ break;
+ case PPC::Sched::IIC_LdStLoadUpdX:
+ case PPC::Sched::IIC_LdStLDUX:
+ case PPC::Sched::IIC_LdStLHAUX:
+ case PPC::Sched::IIC_LdStLWARX:
+ case PPC::Sched::IIC_LdStLDARX:
+ case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTDCX:
+ case PPC::Sched::IIC_LdStSTWCX:
+ case PPC::Sched::IIC_BrMCRX: // mtcr
+ // FIXME: Add sync/isync (here and in the itinerary).
+ NSlots = 4;
+ break;
+ }
+
+ // FIXME: record-form instructions need a different itinerary class.
+ if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
+ NSlots = 2;
+
+ switch (IIC) {
+ default:
+ // All multi-slot instructions must come first.
+ return NSlots > 1;
+ case PPC::Sched::IIC_SprMFCR:
+ case PPC::Sched::IIC_SprMFCRF:
+ case PPC::Sched::IIC_SprMTSPR:
+ return true;
+ }
+}
+
+ScheduleHazardRecognizer::HazardType
+PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (Stalls == 0 && isLoadAfterStore(SU))
+ return NoopHazard;
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ unsigned NSlots;
+ if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
+ return true;
+
+ return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
+}
+
+unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ // We only need to fill out a maximum of 5 slots here: The 6th slot could
+ // only be a second branch, and otherwise the next instruction will start a
+ // new group.
+ if (isLoadAfterStore(SU) && CurSlots < 6) {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If we're using a special group-terminating nop, then we need only one.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
+ return 1;
+
+ return 5 - CurSlots;
+ }
+
+ return ScoreboardHazardRecognizer::PreEmitNoops(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID) {
+ if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
+ SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+
+ unsigned NSlots;
+ bool MustBeFirst = mustComeFirst(MCID, NSlots);
+
+ // If this instruction must come first, but does not, then it starts a
+ // new group.
+ if (MustBeFirst && CurSlots) {
+ CurSlots = CurBranches = 0;
+ CurGroup.clear();
+ }
+
+ CurSlots += NSlots;
+ CurGroup.push_back(SU);
+
+ if (MCID->isBranch())
+ ++CurBranches;
+ }
+ }
+
+ return ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
+ return ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("Bottom-up scheduling not supported");
+}
+
+void PPCDispatchGroupSBHazardRecognizer::Reset() {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ return ScoreboardHazardRecognizer::Reset();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If the group has now filled all of its slots, or if we're using a special
+ // group-terminating nop, the group is complete.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ CurSlots == 6) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ CurGroup.push_back(0);
+ ++CurSlots;
+ }
+}
+
//===----------------------------------------------------------------------===//
// PowerPC 970 Hazard Recognizer
//
namespace llvm {
+/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
+class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ScheduleDAG *DAG;
+ SmallVector<SUnit *, 7> CurGroup;
+ unsigned CurSlots, CurBranches;
+
+ bool isLoadAfterStore(SUnit *SU);
+ bool isBCTRAfterSet(SUnit *SU);
+ bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
+public:
+ PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG_) :
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
+ CurSlots(0), CurBranches(0) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual bool ShouldPreferAnother(SUnit* SU);
+ virtual unsigned PreEmitNoops(SUnit *SU);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void RecedeCycle();
+ virtual void Reset();
+ virtual void EmitNoop();
+};
+
/// PPCHazardRecognizer970 - This class defines a finite state automata that
/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
/// promotes good dispatch group formation and implements noop insertion to
let Addr = 0;
}
+class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = R;
+ let B = R;
+ let C = 0;
+}
+
class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
const ScheduleDAG *DAG) const {
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ if (Directive == PPC::DIR_PWR7)
+ return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
+
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
return new ScoreboardHazardRecognizer(II, DAG);
}
+
+int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
+ UseMI, UseIdx);
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool IsRegCR;
+ if (TRI->isVirtualRegister(Reg)) {
+ const MachineRegisterInfo *MRI =
+ &DefMI->getParent()->getParent()->getRegInfo();
+ IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
+ } else {
+ IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
+ PPC::CRBITRCRegClass.contains(Reg);
+ }
+
+ if (UseMI->isBranch() && IsRegCR) {
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
+
+ // On some cores, there is an additional delay between writing to a condition
+ // register, and using it from a branch.
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ switch (Directive) {
+ default: break;
+ case PPC::DIR_7400:
+ case PPC::DIR_750:
+ case PPC::DIR_970:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ Latency += 2;
+ break;
+ }
+ }
+
+ return Latency;
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
+ // This function is used for scheduling, and the nop wanted here is the type
+ // that terminates dispatch groups on the POWER cores.
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Opcode;
+ switch (Directive) {
+ default: Opcode = PPC::NOP; break;
+ case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
+ case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ }
+
DebugLoc DL;
- BuildMI(MBB, MI, DL, get(PPC::NOP));
+ BuildMI(MBB, MI, DL, get(Opcode));
}
-
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
+ UseNode, UseIdx);
+ }
+
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const;
def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
+
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
[]>;
+let isCodeGenOnly = 1 in {
+// The POWER6 and POWER7 have special group-terminating nops.
+def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
+ "ori 1, 1, 0", IIC_IntSimple, []>;
+def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
+ "ori 2, 2, 0", IIC_IntSimple, []>;
+}
+
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
"cmpwi $crD, $rA, $imm", IIC_IntCompare>;
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
+ // FIXME: Add record-form itinerary data.
InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<36, [P7_FX1, P7_FX2]>],
InstrStage<1, [P7_DU4], 0>,
InstrStage<1, [P7_LS1, P7_LS2]>],
[1, 1, 1]>,
- InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU4], 0>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
InstrStage<1, [P7_CRU]>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[3, 1]>, // mtcr
InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_CRU]>],
[3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1]>],
+ [4, 1]>, // mtctr
InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_VS1, P7_VS2]>],
--- /dev/null
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, float* nocapture %d) #0 {
+
+; CHECK-LABEL: @foo
+
+entry:
+ %0 = load float* %b, align 4
+ store float %0, float* %a, align 4
+ %1 = load float* %c, align 4
+ store float %1, float* %b, align 4
+ %2 = load float* %a, align 4
+ store float %2, float* %d, align 4
+ ret void
+
+; CHECK: lfs [[REG1:[0-9]+]], 0(4)
+; CHECK: stfs [[REG1]], 0(3)
+; CHECK: ori 2, 2, 0
+; CHECK: lfs [[REG2:[0-9]+]], 0(5)
+; CHECK: stfs [[REG2]], 0(4)
+; CHECK: ori 2, 2, 0
+; CHECK: lfs [[REG3:[0-9]+]], 0(3)
+; CHECK: stfs [[REG3]], 0(6)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+