From 5bc44c76030a56140e877ff25356b71f143bcaad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 15 Aug 2014 17:30:25 +0000 Subject: [PATCH] R600/SI: Add intrinsic for ldexp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215734 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsR600.td | 3 +++ lib/Target/R600/AMDGPUISelLowering.cpp | 5 +++++ lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/AMDGPUInstrInfo.td | 6 ++++++ lib/Target/R600/SIInstructions.td | 4 ++-- test/CodeGen/R600/llvm.AMDGPU.ldexp.ll | 22 ++++++++++++++++++++++ 6 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/R600/llvm.AMDGPU.ldexp.ll diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td index ba69eaae089..2e711a99a43 100644 --- a/include/llvm/IR/IntrinsicsR600.td +++ b/include/llvm/IR/IntrinsicsR600.td @@ -69,4 +69,7 @@ def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">, def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">, + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; + } // End TargetPrefix = "AMDGPU" diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 83cb3f5cf25..b30c9441524 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -853,6 +853,10 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::AMDGPU_rsq_clamped: return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + case Intrinsic::AMDGPU_ldexp: + return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_imax: return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -2168,6 +2172,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RSQ_LEGACY) NODE_NAME_CASE(RSQ_CLAMPED) + NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(BFE_U32) NODE_NAME_CASE(BFE_I32) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 5001116f5a0..2e78ba6a918 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -203,6 +203,7 @@ enum { RSQ, RSQ_LEGACY, RSQ_CLAMPED, + LDEXP, DOT4, BFE_U32, // Extract range of bits with zero extension to 32-bits. BFE_I32, // Extract range of bits with sign extension to 32-bits. diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 820f1a80d75..06962eba6e2 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -23,6 +23,10 @@ def AMDGPUTrigPreOp : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] >; +def AMDGPULdExpOp : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] +>; + def AMDGPUDivScaleOp : SDTypeProfile<2, 3, [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] >; @@ -52,6 +56,8 @@ def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; +def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; + // out = max(a, b) a and b are floats def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, [SDNPCommutative, SDNPAssociative] diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 7371365f103..bbbe18d9369 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1390,7 +1390,7 @@ defm V_SUBBREV_U32 : VOP2bInst <0x0000002a, "V_SUBBREV_U32", } // End isCommutable = 1, Defs = [VCC] defm V_LDEXP_F32 : VOP2Inst <0x0000002b, "V_LDEXP_F32", - VOP_F32_F32_F32 + VOP_F32_F32_I32, AMDGPUldexp >; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; @@ -1509,7 +1509,7 @@ defm V_MAX_F64 : VOP3Inst <0x00000167, "V_MAX_F64", } // isCommutable = 1 defm V_LDEXP_F64 : VOP3Inst <0x00000168, "V_LDEXP_F64", - VOP_F32_F32_I32 + VOP_F64_F64_I32, AMDGPUldexp >; let isCommutable = 1 in { diff --git a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll new file mode 100644 index 00000000000..d59426c6754 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone +declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone + +; SI-LABEL: @test_ldexp_f32: +; SI: V_LDEXP_F32 +; SI: S_ENDPGM +define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { + %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_ldexp_f64: +; SI: V_LDEXP_F64 +; SI: S_ENDPGM +define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { + %result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} -- 2.34.1