R600/SI: Fix div_scale intrinsic.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 23 Jun 2014 18:28:28 +0000 (18:28 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 23 Jun 2014 18:28:28 +0000 (18:28 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 23 Jun 2014 18:28:28 +0000 (18:28 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 23 Jun 2014 18:28:28 +0000 (18:28 +0000)
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td

index 89b3ef3636eedcaddc8c681e05677894e9b222f9..2566cc2561e7a803a9aa5b2e3cdea0829b8947c0 100644 (file)
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -38,8 +38,13 @@ defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
  
  let TargetPrefix = "AMDGPU" in {
  def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
+  // 1st parameter: Numerator
+  // 2nd parameter: Denominator
+  // 3rd parameter: Constant to select select between first and
+  //                second. (0 = first, 1 = second).
    Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
+            [IntrNoMem]>;
  
  def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
    Intrinsic<[llvm_anyfloat_ty],
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp

index 85d0f9dd69127ff6cf5fc00db638dc9d3984d96b..37071bc3f1cdadea7979fc1894589f3939e047c0 100644 (file)
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -86,6 +86,7 @@ private:
    bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  
    SDNode *SelectADD_SUB_I64(SDNode *N);
+  SDNode *SelectDIV_SCALE(SDNode *N);
  
    // Include the pieces autogenerated from the target description.
  #include "AMDGPUGenDAGISel.inc"
@@ -454,6 +455,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
                                    PackedOffsetWidth);
  
    }
+  case AMDGPUISD::DIV_SCALE: {
+    return SelectDIV_SCALE(N);
+  }
    }
    return SelectCode(N);
  }
@@ -695,6 +699,30 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
  }
  
+SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+
+  assert(VT == MVT::f32 || VT == MVT::f64);
+
+  unsigned Opc
+    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+
+  const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+
+  SDValue Ops[] = {
+    N->getOperand(0),
+    N->getOperand(1),
+    N->getOperand(2),
+    Zero,
+    Zero,
+    Zero,
+    Zero
+  };
+
+  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+}
+
  void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    const AMDGPUTargetLowering& Lowering =
      *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index 6ff1703c919345b2fc5bf00e7f2c9c4e1d5d41ec..ca8d0a1626bb0feb7ed3ec4b7fff8bce47bc3442 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -771,9 +771,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
                           Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
  
-    case Intrinsic::AMDGPU_div_scale:
+    case Intrinsic::AMDGPU_div_scale: {
+      // 3rd parameter required to be a constant.
+      const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+      if (!Param)
+        return DAG.getUNDEF(VT);
+
+      // Translate to the operands expected by the machine instruction. The
+      // first parameter must be the same as the first instruction.
+      SDValue Numerator = Op.getOperand(1);
+      SDValue Denominator = Op.getOperand(2);
+      SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+
        return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
-                         Op.getOperand(1), Op.getOperand(2));
+                         Src0, Denominator, Numerator);
+    }
  
      case Intrinsic::AMDGPU_div_fmas:
        return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td

index c4994a250a8ebf116e8e162e54976915c4c46466..dd9a6dc527fae5ef5cc52d3d089a050e44d05d79 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -433,6 +433,22 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
    opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
  >, VOP <opName>;
  
+
+class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
+                    string opName, list<dag> pattern> : VOP3 <
+  op, (outs vrc:$dst0, SReg_64:$dst1),
+  (ins arc:$src0, arc:$src1, arc:$src2,
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+  opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+
+class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+  VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
+  VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
  //===----------------------------------------------------------------------===//
  // Vector I/O classes
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index f888b8e6e05233939e4cf4708dfb05aebc5d8c9e..6a29dbe77dd5a6bc72ee85f101832e602cdcf675 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1455,8 +1455,10 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
  
  } // isCommutable = 1
  
-defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
-def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+
+// Double precision division pre-scale.
+def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
  
  defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
    [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll

index 1bcbe2f859edf7d99bada496e5922fd019d33130..527c8da10a3c9fda5e6a627cc33a881d6d0ad9d9 100644 (file)
--- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -1,23 +1,48 @@
-; XFAIL: *
  ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  
-declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone
-declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone
+declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
+declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
  
-; SI-LABEL @test_div_scale_f32:
-define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+; SI-LABEL @test_div_scale_f32_1:
+; SI: V_DIV_SCALE_F32
+define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
    %a = load float addrspace(1)* %aptr, align 4
    %b = load float addrspace(1)* %bptr, align 4
-  %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone
-  store float %result, float addrspace(1)* %out, align 4
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
    ret void
  }
  
-; SI-LABEL @test_div_scale_f64:
-define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind {
+; SI-LABEL @test_div_scale_f32_2:
+; SI: V_DIV_SCALE_F32
+define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_1:
+; SI: V_DIV_SCALE_F64
+define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load double addrspace(1)* %bptr, align 8
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_1:
+; SI: V_DIV_SCALE_F64
+define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
    %a = load double addrspace(1)* %aptr, align 8
    %b = load double addrspace(1)* %bptr, align 8
-  %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone
-  store double %result, double addrspace(1)* %out, align 8
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
    ret void
  }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 23 Jun 2014 18:28:28 +0000 (18:28 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 23 Jun 2014 18:28:28 +0000 (18:28 +0000)
include/llvm/IR/IntrinsicsR600.td		patch \| blob \| history
lib/Target/R600/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.td		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
test/CodeGen/R600/llvm.AMDGPU.div_scale.ll		patch \| blob \| history