def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
+defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>;
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
let Predicates = [isEGorCayman] in {
+// Should be predicated on FeatureFP64
+// def FMA_64 : R600_3OP <
+// 0xA, "FMA_64",
+// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
+// >;
+
// BFE_UINT - bit_extract, an optimization for mask and shift
// Src0 = Input
// Src1 = Offset
VecALU
>;
-def BFE_INT_eg : R600_3OP <0x4, "BFE_INT",
+def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
[(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))],
VecALU
>;
// XXX: This pattern is broken, disabling for now. See comment in
// AMDGPUInstructions.td for more info.
// def : BFEPattern <BFE_UINT_eg>;
+def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
+ [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
+ VecALU
+>;
-def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
-defm : BFIPatterns <BFI_INT_eg>;
+def : Pat<(i32 (sext_inreg i32:$src, i1)),
+ (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>;
+def : Pat<(i32 (sext_inreg i32:$src, i8)),
+ (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>;
+def : Pat<(i32 (sext_inreg i32:$src, i16)),
+ (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
+
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
+
+def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
+ [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
+ VecALU
+>;
def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
- [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
+ [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU
>;
+
+def : UMad24Pat<MULADD_UINT24_eg>;
+
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+def FMA_eg : FMA_Common<0x7>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
- [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU
+ [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))], VecALU
>;
def DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
+
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
+
let hasSideEffects = 1 in {
def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
}
def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
def GROUP_BARRIER : InstR600 <
- (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
+ (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
R600ALU_Word0,
R600ALU_Word1_OP2 <0x54> {
let ALUInst = 1;
}
+def : Pat <
+ (int_AMDGPU_barrier_global),
+ (GROUP_BARRIER)
+>;
+
//===----------------------------------------------------------------------===//
// LDS Instructions
//===----------------------------------------------------------------------===//