">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
AssemblerPredicate<"FeatureGCN">;
def isSI : Predicate<"Subtarget->getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ AssemblerPredicate<"FeatureSouthernIslands">;
+
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
+ smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
+ smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
+ smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
+ smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
+ smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
+
+defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
+ int_amdgcn_s_dcache_inv>;
//===----------------------------------------------------------------------===//
// SOP1 Instructions
defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
+
+let Uses = [M0] in {
defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
+} // End Uses = [M0]
+
defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
let Defs = [SCC] in {
def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_AMDGPU_barrier_local)]
> {
+ let SchedRW = [WriteBarrier];
let simm16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
+ let isConvergent = 1;
}
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
-//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
+
+let SubtargetPredicate = isSI in {
+defm BUFFER_WBINVL1_SC : MUBUF_Invalidate <mubuf<0x70>, "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI
+}
+
+defm BUFFER_WBINVL1 : MUBUF_Invalidate <mubuf<0x71, 0x3e>, "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-let vdst = 0, src0 = 0 in {
-defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
}
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
VOP_F32_F32
>;
-let vdst = 0, src0 = 0 in {
-defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
- "v_clrexcp"
->;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NONE>;
}
+
+let Uses = [M0, EXEC] in {
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
+} // End Uses = [M0, EXEC]
// These instruction only exist on SI and CI
let SubtargetPredicate = isSICI in {
// VINTRP Instructions
//===----------------------------------------------------------------------===//
-let Uses = [M0] in {
+let Uses = [M0, EXEC] in {
// FIXME: Specify SchedRW for VINTRP insturctions.
[(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;
-} // End Uses = [M0]
+} // End Uses = [M0, EXEC]
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
multiclass V_CNDMASK <vop2 op, string name> {
- defm _e32 : VOP2_m <
- op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
- name, name>;
+ defm _e32 : VOP2_m <op, name, VOP_CNDMASK, [], name>;
defm _e64 : VOP3_m <
op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
} // End isCommutable = 1
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+let isCommutable = 1 in {
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
// but the VI instructions behave the same as the SI versions.
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
- VOP_I32_I32_I32, add
+ VOP2b_I32_I1_I32_I32
>;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
- VOP_I32_I32_I32, null_frag, "v_sub_i32"
+ VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
>;
-let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_I1
>;
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_I1
>;
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
- VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+ VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
>;
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
+} // End isCommutable = 1
defm V_READLANE_B32 : VOP2SI_3VI_m <
vop3 <0x001, 0x289>,
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteFloatFMA, WriteSALU] in {
-defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
+ VOP3b_F32_I1_F32_F32_F32
+>;
}
let SchedRW = [WriteDouble, WriteSALU] in {
// Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
+defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
+ VOP3b_F64_I1_F64_F64_F64
+>;
} // let SchedRW = [WriteDouble]
-let isCommutable = 1, Uses = [VCC] in {
+let isCommutable = 1, Uses = [VCC, EXEC] in {
let SchedRW = [WriteFloatFMA] in {
// v_div_fmas_f32:
>;
} // End SchedRW = [WriteDouble]
-} // End isCommutable = 1
+} // End isCommutable = 1, Uses = [VCC, EXEC]
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
(ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, SALU = 1 in {
def SGPR_USE : InstSI <(outs),(ins), "", []>;
}
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
-//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
-
-let UseNamedOperandTable = 1 in {
-
-def SI_RegisterLoad : InstSI <
- (outs VGPR_32:$dst, SReg_64:$temp),
- (ins FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterLoad = 1;
- let mayLoad = 1;
-}
-
-class SIRegStore<dag outs> : InstSI <
- outs,
- (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterStore = 1;
- let mayStore = 1;
-}
-
-let usesCustomInserter = 1 in {
-def SI_RegisterStorePseudo : SIRegStore<(outs)>;
-} // End usesCustomInserter = 1
-def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
-
-
-} // End UseNamedOperandTable = 1
-
-def SI_INDIRECT_SRC : InstSI <
+class SI_INDIRECT_SRC<RegisterClass rc> : InstSI <
(outs VGPR_32:$dst, SReg_64:$temp),
- (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ (ins rc:$src, VSrc_32:$idx, i32imm:$off),
"si_indirect_src $dst, $temp, $src, $idx, $off",
[]
>;
let Constraints = "$src = $dst";
}
+// TODO: We can support indirect SGPR access.
+def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
+def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
+def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
+def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
+def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
+
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
- let UseNamedOperandTable = 1 in {
+ let UseNamedOperandTable = 1, Uses = [EXEC] in {
def _SAVE : InstSI <
(outs),
- (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
- SReg_32:$scratch_offset),
+ (ins sgpr_class:$src, i32imm:$frame_idx),
"", []
- >;
+ > {
+ let mayStore = 1;
+ let mayLoad = 0;
+ }
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx),
"", []
- >;
+ > {
+ let mayStore = 0;
+ let mayLoad = 1;
+ }
} // End UseNamedOperandTable = 1
}
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
- let UseNamedOperandTable = 1, VGPRSpill = 1 in {
+ let UseNamedOperandTable = 1, VGPRSpill = 1, Uses = [EXEC] in {
def _SAVE : InstSI <
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
- >;
+ > {
+ let mayStore = 1;
+ let mayLoad = 0;
+ }
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
- >;
+ > {
+ let mayStore = 0;
+ let mayLoad = 1;
+ }
} // End UseNamedOperandTable = 1, VGPRSpill = 1
}
(outs SReg_64:$dst),
(ins),
"", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
->;
+> {
+ let SALU = 1;
+}
} // End Defs = [SCC]
(S_MOV_B32 0), sub1))
>;
+def : Pat <
+ (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+ (S_ABS_I32 $x)
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
+//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
+//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
+//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
+//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
+
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
+
+def : BitConvert <v2i64, v4i32, SReg_128>;
+def : BitConvert <v4i32, v2i64, SReg_128>;
+
+def : BitConvert <v2f64, v4i32, VReg_128>;
+def : BitConvert <v4i32, v2f64, VReg_128>;
+
+
+
+
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8i32, v32i8, SReg_256>;
// Prevent expanding both fneg and fabs.
-// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f32:$src)),
- (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+ (S_OR_B32 $src, 0x80000000) /* Set sign bit */
>;
// FIXME: Should use S_OR_B32
// -1. For the non-rtn variants, the manual says it does
// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
// will always do the increment so I'm assuming it's the same.
-//
-// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
-// needs to be a VGPR. The SGPR copy pass will fix this, and it's
-// easier since there is no v_mov_b64.
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
// 32-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_add_local>;
+ V_MOV_B32_e32, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_sub_local>;
+ V_MOV_B32_e32, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
// 64-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_add_local>;
+ V_MOV_B64_PSEUDO, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_sub_local>;
+ V_MOV_B64_PSEUDO, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
/********** Indirect adressing **********/
/********** ====================== **********/
-multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
// 1. Extract with offset
def : Pat<
(eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
- (SI_INDIRECT_SRC $vec, $idx, imm:$off)
+ (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
(eltvt (vector_extract vt:$vec, i32:$idx)),
- (SI_INDIRECT_SRC $vec, $idx, 0)
+ (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst $vec, $idx, imm:$off, $val)
+ (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst $vec, $idx, 0, $val)
+ (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val)
>;
}
-defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
+defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
+defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
+defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
-defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
+defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
+defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
+defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
//===----------------------------------------------------------------------===//
// Conversion Patterns
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
+ (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
>;
def : Pat <
(i1 (trunc i64:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
+ (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
(EXTRACT_SUBREG $a, sub0)), 1)
>;