From 1855b261dbd5e3771c005414d7709bfd77c087ae Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 2 Dec 2014 22:01:00 +0000 Subject: [PATCH 1/1] [PowerPC] Implement readcyclecounter for PPC32 We've long supported readcyclecounter on PPC64, but it is easier there (the read of the 64-bit time-base register can be accomplished via a single instruction). This now provides an implementation for PPC32 as well. On PPC32, the time-base register is still 64 bits, but can only be read 32 bits at a time via two separate SPRs. The ISA manual explains how to do this properly (it involves re-reading the upper bits and looping if the counter has wrapped while being read). This requires PPC to implement a custom integer splitting legalization for the READCYCLECOUNTER node, turning it into a target-specific SDAG node, which then gets turned into a pseudo-instruction, which is then expanded to the necessary sequence (which has three SPR reads, the comparison and the branch). Thanks to Paul Hargrove for pointing out to me that this was still unimplemented. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223161 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 11 +++++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 5 ++ lib/Target/PowerPC/PPCISelLowering.cpp | 57 ++++++++++++++++++++++ lib/Target/PowerPC/PPCISelLowering.h | 4 ++ lib/Target/PowerPC/PPCInstrInfo.td | 6 +++ test/CodeGen/PowerPC/ppc32-cyclecounter.ll | 19 ++++++++ 6 files changed, 102 insertions(+) create mode 100644 test/CodeGen/PowerPC/ppc32-cyclecounter.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index bd7dacf2bc6..ebf6b28259e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -921,6 +921,17 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // The target didn't want to custom lower it after all. return false; + // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to + // provide the same kind of custom splitting behavior. + if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { + // We've legalized a return type by splitting it. If there is a chain, + // replace that too. + SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); + if (N->getNumValues() > 1) + ReplaceValueWith(SDValue(N, 1), Results[2]); + return true; + } + // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 49ba58b3e4b..cb7d73a04c2 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1026,6 +1026,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0), InFlag); } + case PPCISD::READ_TIME_BASE: { + return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, + MVT::Other, N->getOperand(0)); + } + case ISD::SDIV: { // FIXME: since this depends on the setting of the carry flag from the srawi // we should really be making notes about that for the scheduler. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 7351d19120a..3668a87b0ec 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -602,6 +602,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM) if (Subtarget.has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + } else { + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); } if (!isPPC64) { @@ -776,6 +778,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; @@ -6497,6 +6500,15 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case ISD::READCYCLECOUNTER: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); + + Results.push_back(RTB); + Results.push_back(RTB.getValue(1)); + Results.push_back(RTB.getValue(2)); + break; + } case ISD::INTRINSIC_W_CHAIN: { if (cast(N->getOperand(1))->getZExtValue() != Intrinsic::ppc_is_decremented_ctr_nonzero) @@ -7149,6 +7161,51 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, TII->get(PPC::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + } else if (MI->getOpcode() == PPC::ReadTB) { + // To read the 64-bit time-base register on a 32-bit target, we read the + // two halves. Should the counter have wrapped while it was being read, we + // need to try again. + // ... + // readLoop: + // mfspr Rx,TBU # load from TBU + // mfspr Ry,TB # load from TB + // mfspr Rz,TBU # load from TBU + // cmpw crX,Rx,Rz # check if ‘old’=’new’ + // bne readLoop # branch if they're not equal + // ... + + MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + DebugLoc dl = MI->getDebugLoc(); + F->insert(It, readMBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(readMBB); + BB = readMBB; + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + unsigned LoReg = MI->getOperand(0).getReg(); + unsigned HiReg = MI->getOperand(1).getReg(); + + BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); + BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); + BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); + + unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + + BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) + .addReg(HiReg).addReg(ReadAgainReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); + + BB->addSuccessor(readMBB); + BB->addSuccessor(sinkMBB); } else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 4b4d25e3a3c..e9a71073f91 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -125,6 +125,10 @@ namespace llvm { /// implement truncation of i32 or i64 to i1. ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, + // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit + // target (returns (Lo, Hi)). It takes a chain operand. + READ_TIME_BASE, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. EH_SJLJ_SETJMP, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5ffe93c0595..ff8592f3778 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2072,6 +2072,12 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated; +// A pseudo-instruction used to implement the read of the 64-bit cycle counter +// on a 32-bit target. +let hasSideEffects = 1, usesCustomInserter = 1 in +def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins), + "#ReadTB", []>; + let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), "mfctr $rT", IIC_SprMFSPR>, diff --git a/test/CodeGen/PowerPC/ppc32-cyclecounter.ll b/test/CodeGen/PowerPC/ppc32-cyclecounter.ll new file mode 100644 index 00000000000..75d24c38348 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc32-cyclecounter.ll @@ -0,0 +1,19 @@ +target datalayout = "E-m:e-p:32:32-i64:64-n32" +target triple = "powerpc" +; RUN: llc < %s | FileCheck %s + +define i64 @test1() nounwind { +entry: + %r = call i64 @llvm.readcyclecounter() + ret i64 %r +} + +; CHECK: @test1 +; CHECK: mfspr 3, 269 +; CHECK: mfspr 4, 268 +; CHECK: mfspr [[REG:[0-9]+]], 269 +; CHECK: cmpw [[CR:[0-9]+]], 3, [[REG]] +; CHECK: bne [[CR]], .LBB + +declare i64 @llvm.readcyclecounter() + -- 2.34.1