From d04764a8acabe0b3f0452b3099bc5964ba783884 Mon Sep 17 00:00:00 2001 From: Anders Carlsson Date: Fri, 14 Dec 2007 06:38:54 +0000 Subject: [PATCH] All MMX shift instructions took a <2 x i32> vector as the shift amount parameter. Change this to be <1 x i64> instead, which matches the assembler instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45027 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 16 +++--- lib/VMCore/AutoUpgrade.cpp | 68 +++++++++++++++++++++++++ test/Assembler/AutoUpgradeIntrinsics.ll | 29 +++++++++++ 3 files changed, 105 insertions(+), 8 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 73d104b55c6..12e76339da4 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -767,30 +767,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Shift left logical def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">, Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">, Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">, Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, - llvm_v2i32_ty], [IntrNoMem]>; + llvm_v1i64_ty], [IntrNoMem]>; } // Pack ops. diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 311941f0d7c..005b1f8a239 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/AutoUpgrade.h" +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/Instructions.h" @@ -109,6 +110,39 @@ static Function* UpgradeIntrinsicFunction1(Function *F) { break; } + break; + case 'x': + // This fixes all MMX shift intrinsic instructions to take a + // v1i64 instead of a v2i32 as the second parameter. + if (Name.compare(5,10,"x86.mmx.ps",10) == 0 && + (Name.compare(13,4,"psll", 4) == 0 || + Name.compare(13,4,"psra", 4) == 0 || + Name.compare(13,4,"psrl", 4) == 0)) { + + const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1); + + // We don't have to do anything if the parameter already has + // the correct type. + if (FTy->getParamType(1) == VT) + break; + + // We first need to change the name of the old (bad) intrinsic, because + // its type is incorrect, but we cannot overload that name. We + // arbitrarily unique it here allowing us to construct a correctly named + // and typed function below. + F->setName(""); + + assert(FTy->getNumParams() == 2 && "MMX shift intrinsics take 2 args!"); + + // Now construct the new intrinsic with the correct name and type. We + // leave the old function around in order to query its type, whatever it + // may be, and correctly convert up to the new type. + return cast(M->getOrInsertFunction(Name, + FTy->getReturnType(), + FTy->getParamType(0), + VT, + (Type *)0)); + } break; } @@ -141,6 +175,40 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { switch(NewFn->getIntrinsicID()) { default: assert(0 && "Unknown function for CallInst upgrade."); + case Intrinsic::x86_mmx_psll_d: + case Intrinsic::x86_mmx_psll_q: + case Intrinsic::x86_mmx_psll_w: + case Intrinsic::x86_mmx_psra_d: + case Intrinsic::x86_mmx_psra_w: + case Intrinsic::x86_mmx_psrl_d: + case Intrinsic::x86_mmx_psrl_q: + case Intrinsic::x86_mmx_psrl_w: { + SmallVector Operands; + + Operands.push_back(CI->getOperand(1)); + + // Cast the second parameter to the correct type. + BitCastInst *BC = new BitCastInst(CI->getOperand(2), + NewFn->getFunctionType()->getParamType(1), + "upgraded", CI); + Operands.push_back(BC); + + // Construct a new CallInst + CallInst *NewCI = new CallInst(NewFn, Operands.begin(), Operands.end(), + "upgraded."+CI->getName(), CI); + NewCI->setTailCall(CI->isTailCall()); + NewCI->setCallingConv(CI->getCallingConv()); + + // Handle any uses of the old CallInst. + if (!CI->use_empty()) + // Replace all uses of the old call with the new cast which has the + // correct type. + CI->replaceAllUsesWith(NewCI); + + // Clean up the old call now that it has been completely upgraded. + CI->eraseFromParent(); + break; + } case Intrinsic::ctlz: case Intrinsic::ctpop: case Intrinsic::cttz: diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index fad3d4e4e85..15f638ce06f 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -6,6 +6,8 @@ ; RUN: not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*} ; RUN: llvm-as < %s | llvm-dis | \ ; RUN: not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*} +; RUN: llvm-as < %s | llvm-dis | \ +; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {2 x i32> | count 6 declare i32 @llvm.ctpop.i28(i28 %val) declare i32 @llvm.cttz.i29(i29 %val) @@ -50,3 +52,30 @@ define i32 @test_bswap(i32 %A, i16 %B) { ret i32 %d } +declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone +declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone +declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone +define void @sh16(<4 x i16> %A, <2 x i32> %B) { + %r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0] + %r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0] + %r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0] + ret void +} + +declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone +declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone +declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone +define void @sh32(<2 x i32> %A, <2 x i32> %B) { + %r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0] + %r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0] + %r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0] + ret void +} + +declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone +define void @sh64(<1 x i64> %A, <2 x i32> %B) { + %r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0] + %r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0] + ret void +} -- 2.34.1