From: Bruno Cardoso Lopes Date: Tue, 14 Jun 2011 04:58:37 +0000 (+0000) Subject: Add one more argument to the prefetch intrinsic to indicate whether it's a data X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=9a767330f555f21d6ef311d3a348d3a44f306d35;p=oota-llvm.git Add one more argument to the prefetch intrinsic to indicate whether it's a data or instruction cache access. Update the targets to match it and also teach autoupgrade. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132976 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/LangRef.html b/docs/LangRef.html index a024efb0993..74678c7be8e 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -6064,7 +6064,7 @@ LLVM.

Syntax:
-  declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>)
+  declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
 
Overview:
@@ -6077,8 +6077,10 @@ LLVM.

address is the address to be prefetched, rw is the specifier determining if the fetch should be for a read (0) or write (1), and locality is a temporal locality specifier ranging from (0) - no - locality, to (3) - extremely local keep in cache. The rw - and locality arguments must be constant integers.

+ locality, to (3) - extremely local keep in cache. The cache type + specifies whether the prefetch is performed on the data (1) or instruction (0) + cache. The rw, locality and cache type arguments + must be constant integers.

Semantics:

This intrinsic does not modify the behavior of the program. In particular, diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index e765cadf4dc..498614e7fd5 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -580,7 +580,8 @@ namespace ISD { // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are // their first operand. The other operands are the address to prefetch, - // read / write specifier, and locality specifier. + // read / write specifier, locality specifier and instruction / data cache + // specifier. PREFETCH, // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load, diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index d8f249a2c0b..c1fbce4e054 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -211,7 +211,8 @@ def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>, // however it does conveniently prevent the prefetch from being reordered // with respect to nearby accesses to the same memory. def int_prefetch : Intrinsic<[], - [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [IntrReadWriteArgMem, NoCapture<0>]>; def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 672117f1447..285b8b1abda 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -197,8 +197,8 @@ def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3> ]>; -def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch - SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1> +def SDTPrefetch : SDTypeProfile<0, 4, [ // prefetch + SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisInt<1> ]>; def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3aff0ad5195..8a08fd7e300 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4788,15 +4788,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 339c85886ae..b012b501f92 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2281,12 +2281,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, // ARMv7 with MP extension has PLDW. return Op.getOperand(0); - if (Subtarget->isThumb()) + unsigned isData = cast(Op.getOperand(4))->getZExtValue(); + if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; - unsigned isData = Subtarget->isThumb() ? 0 : 1; + isData = ~isData & 1; + } - // Currently there is no intrinsic that matches pli. return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), DAG.getConstant(isData, MVT::i32)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3b1f8464a7a..fbddd126482 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,6 +62,9 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, + SDTCisInt<1>]>; + def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, @@ -130,7 +133,7 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain]>; -def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch, +def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b64c03a9b59..a38e3721f35 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2006,13 +2006,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), // Prefetch intrinsic. def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index f8f15caec91..9d4543ded48 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -284,6 +284,30 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { break; } + // This upgrades the llvm.prefetch intrinsic to accept one more parameter, + // which is a instruction / data cache identifier. The old version only + // implicitly accepted the data version. + if (Name.compare(5,8,"prefetch",8) == 0) { + // Don't do anything if it has the correct number of arguments already + if (FTy->getNumParams() == 4) + break; + + assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!"); + // We first need to change the name of the old (bad) intrinsic, because + // its type is incorrect, but we cannot overload that name. We + // arbitrarily unique it here allowing us to construct a correctly named + // and typed function below. + F->setName(""); + NewFn = cast(M->getOrInsertFunction(Name, + FTy->getReturnType(), + FTy->getParamType(0), + FTy->getParamType(1), + FTy->getParamType(2), + FTy->getParamType(2), + (Type*)0)); + return true; + } + break; case 'x': // This fixes the poorly named crc32 intrinsics @@ -1344,6 +1368,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); break; } + case Intrinsic::prefetch: { + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext()); + + // Add the extra "data cache" argument + Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), + llvm::ConstantInt::get(I32Ty, 1) }; + CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+4, + CI->getName(), CI); + NewCI->setTailCall(CI->isTailCall()); + NewCI->setCallingConv(CI->getCallingConv()); + // Handle any uses of the old CallInst. + if (!CI->use_empty()) + // Replace all uses of the old call with the new cast which has the + // correct type. + CI->replaceAllUsesWith(NewCI); + + // Clean up the old call now that it has been completely upgraded. + CI->eraseFromParent(); + break; + } } } diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index 417493f7168..20beb495667 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -109,3 +109,11 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) { call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D) ret void } + +declare void @llvm.prefetch(i8*, i32, i32) nounwind + +define void @p(i8* %ptr) { +; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1) + tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1) + ret void +} diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll index 95f082aa938..03a6947699a 100644 --- a/test/CodeGen/ARM/prefetch.ll +++ b/test/CodeGen/ARM/prefetch.ll @@ -17,8 +17,8 @@ entry: ; THUMB2: t1: ; THUMB2-NOT: pldw [r0] ; THUMB2: pld [r0] - tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 ) - tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 ) + tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) ret void } @@ -30,7 +30,7 @@ entry: ; THUMB2: t2: ; THUMB2: pld [r0, #1023] %tmp = getelementptr i8* %ptr, i32 1023 - tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 ) + tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 ) ret void } @@ -45,7 +45,7 @@ entry: %tmp1 = lshr i32 %offset, 2 %tmp2 = add i32 %base, %tmp1 %tmp3 = inttoptr i32 %tmp2 to i8* - tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 ) + tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 ) ret void } @@ -59,8 +59,8 @@ entry: %tmp1 = shl i32 %offset, 2 %tmp2 = add i32 %base, %tmp1 %tmp3 = inttoptr i32 %tmp2 to i8* - tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 ) + tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 ) ret void } -declare void @llvm.prefetch(i8*, i32, i32) nounwind +declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll index 48d2673e488..ebe11a5e8e4 100644 --- a/test/CodeGen/X86/prefetch.ll +++ b/test/CodeGen/X86/prefetch.ll @@ -6,11 +6,11 @@ entry: ; CHECK: prefetcht1 ; CHECK: prefetcht0 ; CHECK: prefetchnta - tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 ) - tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 ) - tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 ) - tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 ) ret void } -declare void @llvm.prefetch(i8*, i32, i32) nounwind +declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind