R600/SI: Add intrinsics for various math instructions.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 19 Jun 2014 01:19:19 +0000 (01:19 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 19 Jun 2014 01:19:19 +0000 (01:19 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 19 Jun 2014 01:19:19 +0000 (01:19 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 19 Jun 2014 01:19:19 +0000 (01:19 +0000)
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td

index ecb5668d8e9591d77e0a4551ecc66d3b3ed7dd83..09607d5834d6938c1c20ed5e6f0a2be82b55a6ee 100644 (file)
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,4 +33,40 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
                                         "__builtin_r600_read_tgid">;
  defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
                                         "__builtin_r600_read_tidig">;
+
  } // End TargetPrefix = "r600"
+
+let TargetPrefix = "AMDGPU" in {
+def int_AMDGPU_div_scale :
+  Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_scale">;
+
+def int_AMDGPU_div_fmas :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_fmas">;
+
+def int_AMDGPU_div_fixup :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_fixup">;
+
+def int_AMDGPU_trig_preop :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_trig_preop">;
+
+def int_AMDGPU_rcp :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_rcp">;
+
+def int_AMDGPU_rsq :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_rsq">;
+
+
+} // End TargetPrefix = "AMDGPU"
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index 34c2b2bf61d075ad57276576c73e6d2ef3fee385..1aa92fadbeeac6b48e81e39cfe10b5a56265a389 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -842,6 +842,28 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
                           Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
  
+    case Intrinsic::AMDGPU_div_scale:
+      return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_div_fmas:
+      return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_div_fixup:
+      return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_trig_preop:
+      return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_rcp:
+      return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
+
+    case Intrinsic::AMDGPU_rsq:
+      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+
      case AMDGPUIntrinsic::AMDGPU_imax:
        return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
                                                    Op.getOperand(2));
@@ -2042,6 +2064,14 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    NODE_NAME_CASE(FMIN)
    NODE_NAME_CASE(SMIN)
    NODE_NAME_CASE(UMIN)
+  NODE_NAME_CASE(URECIP)
+  NODE_NAME_CASE(DIV_SCALE)
+  NODE_NAME_CASE(DIV_FMAS)
+  NODE_NAME_CASE(DIV_FIXUP)
+  NODE_NAME_CASE(TRIG_PREOP)
+  NODE_NAME_CASE(RCP)
+  NODE_NAME_CASE(RSQ)
+  NODE_NAME_CASE(DOT4)
    NODE_NAME_CASE(BFE_U32)
    NODE_NAME_CASE(BFE_I32)
    NODE_NAME_CASE(BFI)
@@ -2051,8 +2081,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    NODE_NAME_CASE(MUL_I24)
    NODE_NAME_CASE(MAD_U24)
    NODE_NAME_CASE(MAD_I24)
-  NODE_NAME_CASE(URECIP)
-  NODE_NAME_CASE(DOT4)
    NODE_NAME_CASE(EXPORT)
    NODE_NAME_CASE(CONST_ADDRESS)
    NODE_NAME_CASE(REGISTER_LOAD)
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h

index b2bb2579dcb369e2b3cfbf5f9d85613f9e404d28..e2000a04ba4ec1c6bd4ae675b5bc4492a78f05e9 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -175,6 +175,9 @@ enum {
    DWORDADDR,
    FRACT,
    CLAMP,
+
+  // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+  // Denormals handled on some parts.
    COS_HW,
    SIN_HW,
    FMAX,
@@ -184,6 +187,15 @@ enum {
    SMIN,
    UMIN,
    URECIP,
+  DIV_SCALE,
+  DIV_FMAS,
+  DIV_FIXUP,
+  TRIG_PREOP, // 1 ULP max error for f64
+
+  // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
+  //            For f64, max error 2^29 ULP, handles denormals.
+  RCP,
+  RSQ,
    DOT4,
    BFE_U32, // Extract range of bits with zero extension to 32-bits.
    BFE_I32, // Extract range of bits with sign extension to 32-bits.
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td

index 942a9e8ff351e8735e9edd504b4ee14f067f7467..d0ee40a67872b6cbd66490993ce72ace84b6e802 100644 (file)
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
    SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
  ]>;
  
+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
+  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
+>;
+
  //===----------------------------------------------------------------------===//
  // AMDGPU DAG Nodes
  //
@@ -29,6 +37,12 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
  // out = a - floor(a)
  def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
  
+// out = 1.0 / a
+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+
  // out = max(a, b) a and b are floats
  def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
    [SDNPCommutative, SDNPAssociative]
@@ -78,6 +92,21 @@ def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
  // e is rounding error
  def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
  
+// Special case divide preop and flags.
+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
+
+//  Special case divide FMA with scale and flags (src0 = Quotient,
+//  src1 = Denominator, src2 = Numerator).
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+
+// Single or double precision division fixup.
+// Special case divide fixup and flags(src0 = Quotient, src1 =
+// Denominator, src2 = Numerator).
+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
+
  def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
                            SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
                            [SDNPHasChain, SDNPMayLoad]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td

index 8bfc11cd468c781681f7c887726db65225c4483e..14bfd8cc18d522d395be5aa26af0b84e6c4b15d9 100644 (file)
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -519,6 +519,16 @@ multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
    >;
  }
  
+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
+  (fdiv FP_ONE, vt:$src),
+  (RcpInst $src)
+>;
+
+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
+  (AMDGPUrcp (fsqrt vt:$src)),
+  (RsqInst $src)
+>;
+
  include "R600Instructions.td"
  include "R700Instructions.td"
  include "EvergreenInstructions.td"
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td

index 6dc7612d46fbc330d5f372247c31859cb164de0f..538b4cd8af28bfb1193d2737c0a2bd359c4c5499 100644 (file)
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -30,8 +30,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
    def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
    def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
    def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
    def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
    def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
    def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index 58c704d8ec85166b0fbfc256c8c94e961918a8a1..47b7da0955d8e35ee940e457c3abeca4ae9bc372 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1083,7 +1083,7 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
  }
  
  class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
-  inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+  inst, "RECIPSQRT_CLAMPED", AMDGPUrsq
  > {
    let Itinerary = TransALU;
  }
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp

index a17fed7e7ea7490fa61ca7cea00eef2a3789f0ce..173332674ff198005c65cf06414604b3af392f0b 100644 (file)
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
    return Result;
  }
  
+// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
+// around other non-memory instructions.
  bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
    bool Changes = false;
  
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 60eb8f978180e3a4a8d2d99a1c3a434ba81bd3a6..26024dc4f2df079072f671de362c97596d85aa87 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1116,22 +1116,23 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
  defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
    [(set f32:$dst, (flog2 f32:$src0))]
  >;
+
  defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
  defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
  defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
-  [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+  [(set f32:$dst, (AMDGPUrcp f32:$src0))]
  >;
  defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
  defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
  defm V_RSQ_LEGACY_F32 : VOP1_32 <
    0x0000002d, "V_RSQ_LEGACY_F32",
-  [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
+  [(set f32:$dst, (AMDGPUrsq f32:$src0))]
  >;
  defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
    [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))]
  >;
  defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
-  [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
+  [(set f64:$dst, (AMDGPUrcp f64:$src0))]
  >;
  defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
  defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
@@ -1417,8 +1418,12 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
  //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
  defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
  ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
+  [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
+  [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
+>;
  
  def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
    [(set i64:$dst, (shl i64:$src0, i32:$src1))]
@@ -1452,12 +1457,19 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
  
  defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
  def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+
+defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
+  [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
+  [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+>;
  //def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
  //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
  //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
+  [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+>;
  
  //===----------------------------------------------------------------------===//
  // Pseudo Instructions
@@ -1748,6 +1760,15 @@ def : Pat <
    (S_BARRIER)
  >;
  
+//===----------------------------------------------------------------------===//
+// VOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : RcpPat<V_RCP_F32_e32, f32>;
+def : RcpPat<V_RCP_F64_e32, f64>;
+def : RsqPat<V_RSQ_F32_e32, f32>;
+def : RsqPat<V_RSQ_F64_e32, f64>;
+
  //===----------------------------------------------------------------------===//
  // VOP2 Patterns
  //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp

index d4bdd75fa82aa3daf5ed54ab1b5638fe3c0f4f7b..ff745641581455299acb5c2eba6414ce9fd3611d 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -922,6 +922,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      break;
    }
  
+  case Intrinsic::AMDGPU_rcp: {
+    if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+      const APFloat &ArgVal = C->getValueAPF();
+      APFloat Val(ArgVal.getSemantics(), 1.0);
+      APFloat::opStatus Status = Val.divide(ArgVal,
+                                            APFloat::rmNearestTiesToEven);
+      // Only do this if it was exact and therefore not dependent on the
+      // rounding mode.
+      if (Status == APFloat::opOK)
+        return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+    }
+
+    break;
+  }
    case Intrinsic::stackrestore: {
      // If the save is right next to the restore, remove the restore.  This can
      // happen when variable allocas are DCE'd.
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll

index 6b683769fe062e1b635d88ea62d8d29286149c12..511e8ef62951feb86adb3460c8b1956750c215d8 100644 (file)
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -101,7 +101,7 @@ IF137:                                            ; preds = %main_body
    %88 = insertelement <4 x float> %87, float %32, i32 2
    %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
    %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
-  %91 = call float @llvm.AMDGPU.rsq(float %90)
+  %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
    %92 = fmul float %30, %91
    %93 = fmul float %31, %91
    %94 = fmul float %32, %91
@@ -344,7 +344,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
    %325 = insertelement <4 x float> %324, float %318, i32 2
    %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
    %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
-  %328 = call float @llvm.AMDGPU.rsq(float %327)
+  %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
    %329 = fmul float %314, %328
    %330 = fmul float %316, %328
    %331 = fmul float %318, %328
@@ -377,7 +377,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
    %358 = insertelement <4 x float> %357, float %45, i32 2
    %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
    %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
-  %361 = call float @llvm.AMDGPU.rsq(float %360)
+  %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
    %362 = fmul float %45, %361
    %363 = call float @fabs(float %362)
    %364 = fmul float %176, 0x3FECCCCCC0000000
@@ -403,7 +403,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
    %384 = insertelement <4 x float> %383, float %45, i32 2
    %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
    %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
-  %387 = call float @llvm.AMDGPU.rsq(float %386)
+  %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
    %388 = fmul float %45, %387
    %389 = call float @fabs(float %388)
    %390 = fmul float %176, 0x3FF51EB860000000
@@ -1041,7 +1041,7 @@ IF179:                                            ; preds = %ENDIF175
    %896 = insertelement <4 x float> %895, float %45, i32 2
    %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
    %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
-  %899 = call float @llvm.AMDGPU.rsq(float %898)
+  %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
    %900 = fmul float %45, %899
    %901 = call float @fabs(float %900)
    %902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1150,7 +1150,7 @@ ENDIF178:                                         ; preds = %ENDIF175, %IF179
  declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
  
  ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
  
  ; Function Attrs: readnone
  declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll

new file mode 100644 (file)

index 0000000..c8c7357
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fixup_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fixup_f64:
+; SI: V_DIV_FIXUP_F64
+define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll

new file mode 100644 (file)

index 0000000..4f1e827
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fmas_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fmas_f64:
+; SI: V_DIV_FMAS_F64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll

new file mode 100644 (file)

index 0000000..1bcbe2f
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone
+
+; SI-LABEL @test_div_scale_f32:
+define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64:
+define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load double addrspace(1)* %bptr, align 8
+  %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll

new file mode 100644 (file)

index 0000000..ca5260d
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; FUNC-LABEL: @rcp_f32
+; SI: V_RCP_F32_e32
+define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_f64
+; SI: V_RCP_F64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f32
+; SI: V_RCP_F32_e32
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = fdiv float 1.0, %src
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f64
+; SI: V_RCP_F64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = fdiv double 1.0, %src
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f32
+; SI: V_RSQ_F32_e32
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f64
+; SI: V_RSQ_F64_e32
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll

new file mode 100644 (file)

index 0000000..1c736d4
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+
+; SI-LABEL: @test_trig_preop_f64:
+; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
+; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @test_trig_preop_f64_imm_segment:
+; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll

index f322bc71c6bddeefe1ad7bc397120fcc9316e790..55eb56d3fb1d8cca1a62b8212b73b1273501c479 100644 (file)
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -103,7 +103,7 @@ main_body:
    %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
    %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
    %97 = call float @fabs(float %96)
-  %98 = call float @llvm.AMDGPU.rsq(float %97)
+  %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
    %99 = fmul float %4, %98
    %100 = fmul float %5, %98
    %101 = fmul float %6, %98
@@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
  declare float @fabs(float) #2
  
  ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
  
  ; Function Attrs: readnone
  declare float @llvm.AMDIL.clamp.(float, float, float) #1
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll

index c581d86b99bde40637b9cf22bf8907994dda9184..c7d5bf90644e558c10a62ada93c2f2b1c55a5609 100644 (file)
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -70,7 +70,7 @@ main_body:
    %55 = fadd float %54, %53
    %56 = fmul float %45, %45
    %57 = fadd float %55, %56
-  %58 = call float @llvm.AMDGPU.rsq(float %57)
+  %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
    %59 = fmul float %43, %58
    %60 = fmul float %44, %58
    %61 = fmul float %45, %58
@@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
  declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
  
  ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #3
+declare float @llvm.AMDGPU.rsq.f32(float) #3
  
  ; Function Attrs: readnone
  declare float @llvm.AMDIL.exp.(float) #3
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll

index b34a757d9b65fb92ebcc4e270a9bbe3de83020f4..53a096513bbc58f06c845a597cec7824938ee02f 100644 (file)
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -203,7 +203,7 @@ main_body:
    %198 = fadd float %197, %196
    %199 = fmul float %97, %97
    %200 = fadd float %198, %199
-  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
    %202 = fmul float %95, %201
    %203 = fmul float %96, %201
    %204 = fmul float %202, %29
@@ -384,7 +384,7 @@ IF67:                                             ; preds = %LOOP65
    %355 = fadd float %354, %353
    %356 = fmul float %352, %352
    %357 = fadd float %355, %356
-  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
    %359 = fmul float %350, %358
    %360 = fmul float %351, %358
    %361 = fmul float %352, %358
@@ -512,7 +512,7 @@ IF67:                                             ; preds = %LOOP65
    %483 = fadd float %482, %481
    %484 = fmul float %109, %109
    %485 = fadd float %483, %484
-  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
    %487 = fmul float %107, %486
    %488 = fmul float %108, %486
    %489 = fmul float %109, %486
@@ -541,7 +541,7 @@ IF67:                                             ; preds = %LOOP65
    %512 = fadd float %511, %510
    %513 = fmul float %97, %97
    %514 = fadd float %512, %513
-  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
    %516 = fmul float %95, %515
    %517 = fmul float %96, %515
    %518 = fmul float %97, %515
@@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2
  declare float @ceil(float) #3
  
  ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #2
+declare float @llvm.AMDGPU.rsq.f32(float) #2
  
  ; Function Attrs: nounwind readnone
  declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -887,7 +887,7 @@ main_body:
    %212 = fadd float %211, %210
    %213 = fmul float %209, %209
    %214 = fadd float %212, %213
-  %215 = call float @llvm.AMDGPU.rsq(float %214)
+  %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
    %216 = fmul float %205, %215
    %217 = fmul float %207, %215
    %218 = fmul float %209, %215
@@ -1123,7 +1123,7 @@ IF189:                                            ; preds = %LOOP
    %434 = fsub float -0.000000e+00, %433
    %435 = fadd float 0x3FF00068E0000000, %434
    %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
-  %437 = call float @llvm.AMDGPU.rsq(float %436)
+  %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
    %438 = fmul float %437, %436
    %439 = fsub float -0.000000e+00, %436
    %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1147,7 +1147,7 @@ IF189:                                            ; preds = %LOOP
    %458 = fadd float %457, %456
    %459 = fmul float %455, %455
    %460 = fadd float %458, %459
-  %461 = call float @llvm.AMDGPU.rsq(float %460)
+  %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
    %462 = fmul float %451, %461
    %463 = fmul float %453, %461
    %464 = fmul float %455, %461
@@ -1257,7 +1257,7 @@ ENDIF197:                                         ; preds = %IF189, %IF198
    %559 = fadd float %558, %557
    %560 = fmul float %556, %556
    %561 = fadd float %559, %560
-  %562 = call float @llvm.AMDGPU.rsq(float %561)
+  %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
    %563 = fmul float %562, %561
    %564 = fsub float -0.000000e+00, %561
    %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll

new file mode 100644 (file)

index 0000000..1db6b0d
--- /dev/null
+++ b/test/Transforms/InstCombine/r600-intrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT:  ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT:  ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+  ret double %val
+}
+
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 19 Jun 2014 01:19:19 +0000 (01:19 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 19 Jun 2014 01:19:19 +0000 (01:19 +0000)
include/llvm/IR/IntrinsicsR600.td		patch \| blob \| history
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/R600/AMDGPUInstrInfo.td		patch \| blob \| history
lib/Target/R600/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/R600/AMDGPUIntrinsics.td		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
lib/Target/R600/SIInsertWaits.cpp		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
test/CodeGen/R600/big_alu.ll		patch \| blob \| history
test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/llvm.AMDGPU.div_scale.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/llvm.AMDGPU.rcp.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/pv.ll		patch \| blob \| history
test/CodeGen/R600/sgpr-copy.ll		patch \| blob \| history
test/CodeGen/R600/si-sgpr-spill.ll		patch \| blob \| history
test/Transforms/InstCombine/r600-intrinsics.ll	[new file with mode: 0644]	patch \| blob