AMDGPU/SI: Add llvm.amdgcn.v.interp.p[12] intrinsics

author Tom Stellard <thomas.stellard@amd.com>

Tue, 15 Dec 2015 17:02:49 +0000 (17:02 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 15 Dec 2015 17:02:49 +0000 (17:02 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Tue, 15 Dec 2015 17:02:49 +0000 (17:02 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 15 Dec 2015 17:02:49 +0000 (17:02 +0000)
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td

index a7c32f77b7c768c5a631b3373c94a0a0da310e80..98af638a15bc56ca33c1f484f23db5f90f9f5db5 100644 (file)
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -131,4 +131,19 @@ def int_amdgcn_dispatch_ptr :
    GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
    Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
  
+// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
+def int_amdgcn_interp_p1 :
+  GCCBuiltin<"__builtin_amdgcn_interp_p1">,
+  Intrinsic<[llvm_float_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem]>;  // This intrinsic reads from lds, but the memory
+                           // values are constant, so it behaves like IntrNoMem.
+
+// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
+def int_amdgcn_interp_p2 :
+  GCCBuiltin<"__builtin_amdgcn_interp_p2">,
+  Intrinsic<[llvm_float_ty],
+            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
+                           // IntrNoMem.
  }
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index e31552c55543f6e0907984f2d867af5ec93d5069..71864de6957621df92f0ab38330a450e54bfb6bf 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1252,6 +1252,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
                               Op.getOperand(1), Op.getOperand(2), Glue);
    }
+  case Intrinsic::amdgcn_interp_p1: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+    SDValue Glue = M0.getValue(1);
+    return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Glue);
+  }
+  case Intrinsic::amdgcn_interp_p2: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+    SDValue Glue = SDValue(M0.getNode(), 1);
+    return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+                       Glue);
+  }
    default:
      return AMDGPUTargetLowering::LowerOperation(Op, DAG);
    }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll

new file mode 100644 (file)

index 0000000..a28e1b1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -0,0 +1,30 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+
+;GCN-LABEL: {{^}}v_interp:
+;GCN-NOT: s_wqm
+;GCN: s_mov_b32 m0, s{{[0-9]+}}
+;GCN: v_interp_p1_f32
+;GCN: v_interp_p2_f32
+define void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
+main_body:
+  %i = extractelement <2 x i32> %4, i32 0
+  %j = extractelement <2 x i32> %4, i32 1
+  %p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3)
+  %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3)
+  %p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3)
+  %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3)
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
author	Tom Stellard <thomas.stellard@amd.com>
	Tue, 15 Dec 2015 17:02:49 +0000 (17:02 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 15 Dec 2015 17:02:49 +0000 (17:02 +0000)
include/llvm/IR/IntrinsicsAMDGPU.td		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll	[new file with mode: 0644]	patch \| blob