From 27b7db549e4c5bff4579d209304de5628513edeb Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sat, 8 Mar 2008 00:58:38 +0000 Subject: [PATCH] Implement x86 support for @llvm.prefetch. It corresponds to prefetcht{0|1|2} and prefetchnta instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48042 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAGNodes.h | 5 ++++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 18 +++++++++++++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 14 +++++++---- lib/Target/ARM/ARMISelLowering.cpp | 1 + lib/Target/Alpha/AlphaISelLowering.cpp | 1 + lib/Target/CellSPU/SPUISelLowering.cpp | 1 + lib/Target/IA64/IA64ISelLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.cpp | 1 + lib/Target/PowerPC/PPCISelLowering.cpp | 1 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 1 + lib/Target/TargetSelectionDAG.td | 23 +++++++++++++------ lib/Target/X86/X86ISelLowering.cpp | 3 +++ lib/Target/X86/X86InstrSSE.td | 15 +++++++----- test/CodeGen/X86/prefetch.ll | 15 ++++++++++++ 15 files changed, 84 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/X86/prefetch.ll diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 025e9ecea7c..0d0158ac3a9 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -591,6 +591,11 @@ namespace ISD { // TRAP - Trapping instruction TRAP, + // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are + // their first operand. The other operands are the address to prefetch, + // read / write specifier, and locality specifier. + PREFETCH, + // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load, // store-store, device) // This corresponds to the memory.barrier intrinsic. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fd9cf154436..8e59b53d1cb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1142,6 +1142,24 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { } break; + case ISD::PREFETCH: + assert(Node->getNumOperands() == 4 && "Invalid Prefetch node!"); + switch (TLI.getOperationAction(ISD::PREFETCH, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the address. + Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the rw specifier. + Tmp4 = LegalizeOp(Node->getOperand(3)); // Legalize locality specifier. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4); + break; + case TargetLowering::Expand: + // It's a noop. + Result = LegalizeOp(Node->getOperand(0)); + break; + } + break; + case ISD::MEMBARRIER: { assert(Node->getNumOperands() == 6 && "Invalid MemBarrier node!"); switch (TLI.getOperationAction(ISD::MEMBARRIER, MVT::Other)) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1db78fe2a40..b3251b066a4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3788,6 +3788,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "<>"; } + case ISD::PREFETCH: return "Prefetch"; case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_LCS: return "AtomicLCS"; case ISD::ATOMIC_LAS: return "AtomicLAS"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 016bb105846..1140e20a440 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2996,10 +2996,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp)); return 0; } - case Intrinsic::prefetch: - // FIXME: Currently discarding prefetches. - return 0; - case Intrinsic::var_annotation: // Discard annotate attributes return 0; @@ -3050,6 +3046,16 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::TRAP, MVT::Other, getRoot())); return 0; } + case Intrinsic::prefetch: { + SDOperand Ops[4]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + DAG.setRoot(DAG.getNode(ISD::PREFETCH, MVT::Other, &Ops[0], 4)); + return 0; + } + case Intrinsic::memory_barrier: { SDOperand Ops[6]; Ops[0] = getRoot(); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 92ab18d5775..2e7d985bf8e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -211,6 +211,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + setOperationAction(ISD::PREFETCH , MVT::Other, Expand); if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 907415d73c3..368c736a7da 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -117,6 +117,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); // We want to legalize GlobalAddress and ConstantPool and // ExternalSymbols nodes into the appropriate instructions to diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 6d2a9f3d30b..6d0bd779e30 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -321,6 +321,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); + setOperationAction(ISD::PREFETCH , MVT::Other, Expand); // Cell SPU has instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp index 85db947477d..e4d4867888e 100644 --- a/lib/Target/IA64/IA64ISelLowering.cpp +++ b/lib/Target/IA64/IA64ISelLowering.cpp @@ -110,6 +110,7 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::PREFETCH , MVT::Other, Expand); // Thread Local Storage setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ed76621131b..5a0daf87646 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -85,6 +85,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::MEMSET, MVT::Other, Expand); setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 06098de0635..15e3f2d0f2a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -82,6 +82,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::MEMSET, MVT::Other, Expand); setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 95f4e086730..08af60779b2 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -197,6 +197,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::MEMSET, MVT::Other, Expand); setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); diff --git a/lib/Target/TargetSelectionDAG.td b/lib/Target/TargetSelectionDAG.td index 47d9ba48b7b..dc0fcb5443e 100644 --- a/lib/Target/TargetSelectionDAG.td +++ b/lib/Target/TargetSelectionDAG.td @@ -185,7 +185,11 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; -def STDMemBarrier : SDTypeProfile<0, 5, [ +def STDPrefetch : SDTypeProfile<0, 3, [ // prefetch + SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1> +]>; + +def STDMemBarrier : SDTypeProfile<0, 5, [ // memory barier SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>, SDTCisInt<0> ]>; @@ -340,15 +344,20 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; def ret : SDNode<"ISD::RET" , SDTNone, [SDNPHasChain]>; def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; -def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier, + +def prefetch : SDNode<"ISD::PREFETCH" , STDPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; + +def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier, [SDNPHasChain, SDNPSideEffect]>; + // Do not use atomic_* directly, use atomic_*_size (see below) -def atomic_lcs : SDNode<"ISD::ATOMIC_LCS", STDAtomic3, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; -def atomic_las : SDNode<"ISD::ATOMIC_LAS", STDAtomic2, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_lcs : SDNode<"ISD::ATOMIC_LCS" , STDAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_las : SDNode<"ISD::ATOMIC_LAS" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 22ddd832954..7c70d043bf3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -287,6 +287,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::MEMSET , MVT::Other, Custom); setOperationAction(ISD::MEMCPY , MVT::Other, Custom); + if (!Subtarget->hasSSE1()) + setOperationAction(ISD::PREFETCH , MVT::Other, Expand); + if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3f174c08cdc..55d92bdfa6a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -939,12 +939,15 @@ def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>; -// Prefetching loads. -// TODO: no intrinsics for these? -def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", []>; -def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", []>; -def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", []>; -def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", []>; +// Prefetch intrinsic. +def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; +def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; +def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; +def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; // Non-temporal stores def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll new file mode 100644 index 00000000000..bcc6e3ad3dd --- /dev/null +++ b/test/CodeGen/X86/prefetch.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse1 | grep prefetchnta +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse1 | grep prefetcht0 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse1 | grep prefetcht1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse1 | grep prefetcht2 + +define void @t(i8* %ptr) nounwind { +entry: + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 ) + ret void +} + +declare void @llvm.prefetch(i8*, i32, i32) nounwind -- 2.34.1