(outs),
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
- "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
[] > {
let EXP_CNT = 1;
multiclass EXP_m {
let isPseudo = 1 in {
- def "" : EXPCommon, SIMCInstr <"EXP", SISubtarget.NONE> ;
+ def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
}
- def _si : EXPCommon, SIMCInstr <"EXP", SISubtarget.SI>, EXPe;
+ def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
}
//===----------------------------------------------------------------------===//
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
+ 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+ 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+ 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+ 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+ 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
-//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
//===----------------------------------------------------------------------===//
// SOP1 Instructions
//===----------------------------------------------------------------------===//
let isMoveImm = 1 in {
-def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
-def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
-def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
-def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+def S_MOV_B32 : SOP1_32 <0x00000003, "s_mov_b32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "s_mov_b64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "s_cmov_b32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "s_cmov_b64", []>;
} // End isMoveImm = 1
-def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
+def S_NOT_B32 : SOP1_32 <0x00000007, "s_not_b32",
[(set i32:$dst, (not i32:$src0))]
>;
-def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
+def S_NOT_B64 : SOP1_64 <0x00000008, "s_not_b64",
[(set i64:$dst, (not i64:$src0))]
>;
-def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
-def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+def S_WQM_B32 : SOP1_32 <0x00000009, "s_wqm_b32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "s_wqm_b64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "s_brev_b32",
[(set i32:$dst, (AMDGPUbrev i32:$src0))]
>;
-def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
+def S_BREV_B64 : SOP1_64 <0x0000000c, "s_brev_b64", []>;
-////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
-////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "s_bcnt0_i32_b32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "s_bcnt0_i32_b64", []>;
+def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "s_bcnt1_i32_b32",
[(set i32:$dst, (ctpop i32:$src0))]
>;
-def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
+def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "s_bcnt1_i32_b64", []>;
-////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
-////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "s_ff0_i32_b32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "s_ff0_i32_b64", []>;
+def S_FF1_I32_B32 : SOP1_32 <0x00000013, "s_ff1_i32_b32",
[(set i32:$dst, (cttz_zero_undef i32:$src0))]
>;
-////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "s_ff1_i32_b64", []>;
-def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "s_flbit_i32_b32",
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
>;
-//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
-def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
-//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "s_flbit_i32_b64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "s_flbit_i32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "s_flbit_i32_i64", []>;
+def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
>;
-def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
+def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "s_sext_i32_i16",
[(set i32:$dst, (sext_inreg i32:$src0, i16))]
>;
-////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
-////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
-////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
-////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "s_bitset0_b32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "s_bitset0_b64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "s_bitset1_b32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "s_bitset1_b64", []>;
def S_GETPC_B64 : SOP1 <
- 0x0000001f, (outs SReg_64:$dst), (ins), "S_GETPC_B64 $dst", []
+ 0x0000001f, (outs SReg_64:$dst), (ins), "s_getpc_b64 $dst", []
> {
let SSRC0 = 0;
}
-def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
-def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
-def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+def S_SETPC_B64 : SOP1_64 <0x00000020, "s_setpc_b64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "s_swappc_b64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "s_rfe_b64", []>;
let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
-def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
-def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
-def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
-def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
-def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
-def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
-def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
-def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "s_and_saveexec_b64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "s_or_saveexec_b64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "s_xor_saveexec_b64", []>;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "s_andn2_saveexec_b64", []>;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "s_orn2_saveexec_b64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "s_nand_saveexec_b64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "s_nor_saveexec_b64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "s_xnor_saveexec_b64", []>;
} // End hasSideEffects = 1
-def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
-def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
-def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
-def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
-def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
-def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
-//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
-def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
-def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
-def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "s_quadmask_b32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "s_quadmask_b64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "s_movrels_b32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "s_movrels_b64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "s_movreld_b32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "s_movreld_b64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "s_cbranch_join", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "s_mov_regrd_b32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "s_abs_i32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "s_mov_fed_b32", []>;
//===----------------------------------------------------------------------===//
// SOP2 Instructions
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
-def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
+def S_ADD_U32 : SOP2_32 <0x00000000, "s_add_u32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "s_add_i32",
[(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
>;
} // End isCommutable = 1
-def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
+def S_SUB_U32 : SOP2_32 <0x00000001, "s_sub_u32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "s_sub_i32",
[(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
>;
let Uses = [SCC] in { // Carry in comes from SCC
let isCommutable = 1 in {
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
+def S_ADDC_U32 : SOP2_32 <0x00000004, "s_addc_u32",
[(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End isCommutable = 1
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
+def S_SUBB_U32 : SOP2_32 <0x00000005, "s_subb_u32",
[(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End Uses = [SCC]
} // End Defs = [SCC]
-def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
+def S_MIN_I32 : SOP2_32 <0x00000006, "s_min_i32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
>;
-def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
+def S_MIN_U32 : SOP2_32 <0x00000007, "s_min_u32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
>;
-def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
+def S_MAX_I32 : SOP2_32 <0x00000008, "s_max_i32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
>;
-def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
+def S_MAX_U32 : SOP2_32 <0x00000009, "s_max_u32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
- (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "s_cselect_b32",
[]
>;
-def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "s_cselect_b64", []>;
-def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
+def S_AND_B32 : SOP2_32 <0x0000000e, "s_and_b32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
-def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+def S_AND_B64 : SOP2_64 <0x0000000f, "s_and_b64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
-def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
+def S_OR_B32 : SOP2_32 <0x00000010, "s_or_b32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
-def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
+def S_OR_B64 : SOP2_64 <0x00000011, "s_or_b64",
[(set i64:$dst, (or i64:$src0, i64:$src1))]
>;
-def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
+def S_XOR_B32 : SOP2_32 <0x00000012, "s_xor_b32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
+def S_XOR_B64 : SOP2_64 <0x00000013, "s_xor_b64",
[(set i64:$dst, (xor i64:$src0, i64:$src1))]
>;
-def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
-def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
-def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
-def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
-def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
-def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
-def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
-def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
-def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
-def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "s_andn2_b32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "s_andn2_b64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "s_orn2_b32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "s_orn2_b64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "s_nand_b32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "s_nand_b64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "s_nor_b32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "s_nor_b64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "s_xnor_b32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "s_xnor_b64", []>;
// Use added complexity so these patterns are preferred to the VALU patterns.
let AddedComplexity = 1 in {
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "s_lshl_b32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
-def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "s_lshl_b64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+def S_LSHR_B32 : SOP2_32 <0x00000020, "s_lshr_b32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
-def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "s_lshr_b64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+def S_ASHR_I32 : SOP2_32 <0x00000022, "s_ashr_i32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
-def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "s_ashr_i64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
-def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
-def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
-def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32",
+def S_BFM_B32 : SOP2_32 <0x00000024, "s_bfm_b32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "s_bfm_b64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "s_mul_i32",
[(set i32:$dst, (mul i32:$src0, i32:$src1))]
>;
} // End AddedComplexity = 1
-def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
-def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
-def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
-def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
-//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
-def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "s_bfe_u32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "s_bfe_i32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "s_bfe_u64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "s_bfe_i64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "s_cbranch_g_fork", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "s_absdiff_i32", []>;
//===----------------------------------------------------------------------===//
// SOPC Instructions
//===----------------------------------------------------------------------===//
-def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
-def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
-def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
-def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32">;
-def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32">;
-def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32">;
-def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32">;
-def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32">;
-def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32">;
-def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32">;
-def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32">;
-def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
-////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
-////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
-////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
-////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
-//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "s_cmp_lg_i32">;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "s_cmp_gt_i32">;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "s_cmp_ge_i32">;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "s_cmp_lt_i32">;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "s_cmp_le_i32">;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "s_cmp_eq_u32">;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "s_cmp_lg_u32">;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "s_cmp_gt_u32">;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "s_cmp_ge_u32">;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "s_cmp_lt_u32">;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "s_bitcmp0_b32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "s_bitcmp1_b32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "s_bitcmp0_b64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "s_bitcmp1_b64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "s_setvskip", []>;
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
-def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
-def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+def S_MOVK_I32 : SOPK_32 <0x00000000, "s_movk_i32", []>;
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "s_cmovk_i32", []>;
/*
This instruction is disabled for now until we can figure out how to teach
def S_CMPK_EQ_I32 : SOPK <
0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
- "S_CMPK_EQ_I32",
+ "s_cmpk_eq_i32",
[(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
>;
*/
let isCompare = 1, Defs = [SCC] in {
-def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "s_cmpk_lg_i32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "s_cmpk_gt_i32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "s_cmpk_ge_i32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "s_cmpk_lt_i32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "s_cmpk_le_i32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "s_cmpk_eq_u32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "s_cmpk_lg_u32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "s_cmpk_gt_u32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "s_cmpk_ge_u32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "s_cmpk_lt_u32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "s_cmpk_le_u32", []>;
} // End isCompare = 1, Defs = [SCC]
let Defs = [SCC], isCommutable = 1 in {
- def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
- def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+ def S_ADDK_I32 : SOPK_32 <0x0000000f, "s_addk_i32", []>;
+ def S_MULK_I32 : SOPK_32 <0x00000010, "s_mulk_i32", []>;
}
-//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
-def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
-def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
-def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
-//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
-//def EXP : EXP_ <0x00000000, "EXP", []>;
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "s_cbranch_i_fork", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "s_getreg_b32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "s_setreg_b32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "s_getreg_regrd_b32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "s_setreg_imm32_b32", []>;
+//def EXP : EXP_ <0x00000000, "exp", []>;
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
-def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "S_NOP $simm16", []>;
+def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16", []>;
let isTerminator = 1 in {
-def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
[(IL_retflag)]> {
let simm16 = 0;
let isBarrier = 1;
let isBranch = 1 in {
def S_BRANCH : SOPP <
- 0x00000002, (ins sopp_brtarget:$simm16), "S_BRANCH $simm16",
+ 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
[(br bb:$simm16)]> {
let isBarrier = 1;
}
let DisableEncoding = "$scc" in {
def S_CBRANCH_SCC0 : SOPP <
0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
- "S_CBRANCH_SCC0 $simm16", []
+ "s_cbranch_scc0 $simm16", []
>;
def S_CBRANCH_SCC1 : SOPP <
0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
- "S_CBRANCH_SCC1 $simm16",
+ "s_cbranch_scc1 $simm16",
[]
>;
} // End DisableEncoding = "$scc"
def S_CBRANCH_VCCZ : SOPP <
0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
- "S_CBRANCH_VCCZ $simm16",
+ "s_cbranch_vccz $simm16",
[]
>;
def S_CBRANCH_VCCNZ : SOPP <
0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
- "S_CBRANCH_VCCNZ $simm16",
+ "s_cbranch_vccnz $simm16",
[]
>;
let DisableEncoding = "$exec" in {
def S_CBRANCH_EXECZ : SOPP <
0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
- "S_CBRANCH_EXECZ $simm16",
+ "s_cbranch_execz $simm16",
[]
>;
def S_CBRANCH_EXECNZ : SOPP <
0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
- "S_CBRANCH_EXECNZ $simm16",
+ "s_cbranch_execnz $simm16",
[]
>;
} // End DisableEncoding = "$exec"
} // End isTerminator = 1
let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
+def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_AMDGPU_barrier_local)]
> {
let simm16 = 0;
let mayStore = 1;
}
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
[]
>;
-//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
-//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
-//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+//def S_SETHALT : SOPP_ <0x0000000d, "s_sethalt", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "s_sleep", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "s_setprio", []>;
let Uses = [EXEC] in {
- def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
[(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
> {
let DisableEncoding = "$m0";
}
} // End Uses = [EXEC]
-//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
-//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
-//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
-//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
-//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
-//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "s_sendmsghalt", []>;
+//def S_TRAP : SOPP_ <0x00000012, "s_trap", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "s_icache_inv", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "s_incperflevel", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "s_decperflevel", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "s_ttracedata", []>;
} // End hasSideEffects
//===----------------------------------------------------------------------===//
let isCompare = 1 in {
-defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0>, "V_CMP_F_F32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1>, "V_CMP_LT_F32", COND_OLT>;
-defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2>, "V_CMP_EQ_F32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3>, "V_CMP_LE_F32", COND_OLE>;
-defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4>, "V_CMP_GT_F32", COND_OGT>;
-defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5>, "V_CMP_LG_F32">;
-defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6>, "V_CMP_GE_F32", COND_OGE>;
-defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7>, "V_CMP_O_F32", COND_O>;
-defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8>, "V_CMP_U_F32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9>, "V_CMP_NGE_F32">;
-defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa>, "V_CMP_NLG_F32">;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb>, "V_CMP_NGT_F32">;
-defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc>, "V_CMP_NLE_F32">;
-defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd>, "V_CMP_NEQ_F32", COND_UNE>;
-defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe>, "V_CMP_NLT_F32">;
-defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf>, "V_CMP_TRU_F32">;
+defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0>, "v_cmp_f_f32">;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2>, "v_cmp_eq_f32", COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4>, "v_cmp_gt_f32", COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5>, "v_cmp_lg_f32">;
+defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6>, "v_cmp_ge_f32", COND_OGE>;
+defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7>, "v_cmp_o_f32", COND_O>;
+defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8>, "v_cmp_u_f32", COND_UO>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9>, "v_cmp_nge_f32">;
+defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa>, "v_cmp_nlg_f32">;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb>, "v_cmp_ngt_f32">;
+defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc>, "v_cmp_nle_f32">;
+defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd>, "v_cmp_neq_f32", COND_UNE>;
+defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe>, "v_cmp_nlt_f32">;
+defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf>, "v_cmp_tru_f32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10>, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11>, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12>, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13>, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14>, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15>, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16>, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17>, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18>, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19>, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a>, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b>, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c>, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d>, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e>, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f>, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10>, "v_cmpx_f_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11>, "v_cmpx_lt_f32">;
+defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12>, "v_cmpx_eq_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13>, "v_cmpx_le_f32">;
+defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14>, "v_cmpx_gt_f32">;
+defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15>, "v_cmpx_lg_f32">;
+defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16>, "v_cmpx_ge_f32">;
+defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17>, "v_cmpx_o_f32">;
+defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18>, "v_cmpx_u_f32">;
+defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19>, "v_cmpx_nge_f32">;
+defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a>, "v_cmpx_nlg_f32">;
+defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b>, "v_cmpx_ngt_f32">;
+defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c>, "v_cmpx_nle_f32">;
+defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d>, "v_cmpx_neq_f32">;
+defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e>, "v_cmpx_nlt_f32">;
+defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f>, "v_cmpx_tru_f32">;
} // End hasSideEffects = 1
-defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20>, "V_CMP_F_F64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21>, "V_CMP_LT_F64", COND_OLT>;
-defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22>, "V_CMP_EQ_F64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23>, "V_CMP_LE_F64", COND_OLE>;
-defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24>, "V_CMP_GT_F64", COND_OGT>;
-defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25>, "V_CMP_LG_F64">;
-defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26>, "V_CMP_GE_F64", COND_OGE>;
-defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27>, "V_CMP_O_F64", COND_O>;
-defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28>, "V_CMP_U_F64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29>, "V_CMP_NGE_F64">;
-defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a>, "V_CMP_NLG_F64">;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b>, "V_CMP_NGT_F64">;
-defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c>, "V_CMP_NLE_F64">;
-defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d>, "V_CMP_NEQ_F64", COND_UNE>;
-defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e>, "V_CMP_NLT_F64">;
-defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f>, "V_CMP_TRU_F64">;
+defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20>, "v_cmp_f_f64">;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22>, "v_cmp_eq_f64", COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24>, "v_cmp_gt_f64", COND_OGT>;
+defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25>, "v_cmp_lg_f64">;
+defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26>, "v_cmp_ge_f64", COND_OGE>;
+defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27>, "v_cmp_o_f64", COND_O>;
+defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28>, "v_cmp_u_f64", COND_UO>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29>, "v_cmp_nge_f64">;
+defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a>, "v_cmp_nlg_f64">;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b>, "v_cmp_ngt_f64">;
+defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c>, "v_cmp_nle_f64">;
+defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d>, "v_cmp_neq_f64", COND_UNE>;
+defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e>, "v_cmp_nlt_f64">;
+defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f>, "v_cmp_tru_f64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30>, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31>, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32>, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33>, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34>, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35>, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36>, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37>, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38>, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39>, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a>, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b>, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c>, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d>, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e>, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f>, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30>, "v_cmpx_f_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31>, "v_cmpx_lt_f64">;
+defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32>, "v_cmpx_eq_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33>, "v_cmpx_le_f64">;
+defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34>, "v_cmpx_gt_f64">;
+defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35>, "v_cmpx_lg_f64">;
+defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36>, "v_cmpx_ge_f64">;
+defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37>, "v_cmpx_o_f64">;
+defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38>, "v_cmpx_u_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39>, "v_cmpx_nge_f64">;
+defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a>, "v_cmpx_nlg_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c>, "v_cmpx_nle_f64">;
+defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d>, "v_cmpx_neq_f64">;
+defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e>, "v_cmpx_nlt_f64">;
+defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f>, "v_cmpx_tru_f64">;
} // End hasSideEffects = 1
-defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "V_CMPS_F_F32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "V_CMPS_LT_F32">;
-defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "V_CMPS_EQ_F32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "V_CMPS_LE_F32">;
-defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "V_CMPS_GT_F32">;
-defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "V_CMPS_LG_F32">;
-defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "V_CMPS_GE_F32">;
-defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "V_CMPS_O_F32">;
-defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "V_CMPS_U_F32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "V_CMPS_NGE_F32">;
-defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "V_CMPS_NLG_F32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "V_CMPS_NGT_F32">;
-defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "V_CMPS_NLE_F32">;
-defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "V_CMPS_NEQ_F32">;
-defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "V_CMPS_NLT_F32">;
-defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "V_CMPS_TRU_F32">;
+defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
+defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
+defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
+defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
+defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
+defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
+defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
+defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
let hasSideEffects = 1 in {
-defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
+defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
+defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
+defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
+defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
+defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
+defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
} // End hasSideEffects = 1
-defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "V_CMPS_F_F64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "V_CMPS_LT_F64">;
-defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "V_CMPS_EQ_F64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "V_CMPS_LE_F64">;
-defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "V_CMPS_GT_F64">;
-defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "V_CMPS_LG_F64">;
-defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "V_CMPS_GE_F64">;
-defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "V_CMPS_O_F64">;
-defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "V_CMPS_U_F64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "V_CMPS_NGE_F64">;
-defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "V_CMPS_NLG_F64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "V_CMPS_NGT_F64">;
-defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "V_CMPS_NLE_F64">;
-defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "V_CMPS_NEQ_F64">;
-defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "V_CMPS_NLT_F64">;
-defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "V_CMPS_TRU_F64">;
+defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
+defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
+defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
+defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
+defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
+defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
+defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
+defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "V_CMPSX_F_F64">;
-defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "V_CMPSX_LT_F64">;
-defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "V_CMPSX_EQ_F64">;
-defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "V_CMPSX_LE_F64">;
-defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "V_CMPSX_GT_F64">;
-defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "V_CMPSX_LG_F64">;
-defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "V_CMPSX_GE_F64">;
-defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "V_CMPSX_O_F64">;
-defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "V_CMPSX_U_F64">;
-defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "V_CMPSX_NGE_F64">;
-defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "V_CMPSX_NLG_F64">;
-defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "V_CMPSX_NGT_F64">;
-defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "V_CMPSX_NLE_F64">;
-defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "V_CMPSX_NEQ_F64">;
-defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "V_CMPSX_NLT_F64">;
-defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "V_CMPSX_TRU_F64">;
+defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
+defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
+defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
+defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
+defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End hasSideEffects = 1, Defs = [EXEC]
-defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80>, "V_CMP_F_I32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81>, "V_CMP_LT_I32", COND_SLT>;
-defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82>, "V_CMP_EQ_I32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83>, "V_CMP_LE_I32", COND_SLE>;
-defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84>, "V_CMP_GT_I32", COND_SGT>;
-defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85>, "V_CMP_NE_I32", COND_NE>;
-defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86>, "V_CMP_GE_I32", COND_SGE>;
-defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87>, "V_CMP_T_I32">;
+defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80>, "v_cmp_f_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82>, "v_cmp_eq_i32", COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84>, "v_cmp_gt_i32", COND_SGT>;
+defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85>, "v_cmp_ne_i32", COND_NE>;
+defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86>, "v_cmp_ge_i32", COND_SGE>;
+defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87>, "v_cmp_t_i32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90>, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91>, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92>, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93>, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94>, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95>, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96>, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97>, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90>, "v_cmpx_f_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91>, "v_cmpx_lt_i32">;
+defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92>, "v_cmpx_eq_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93>, "v_cmpx_le_i32">;
+defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94>, "v_cmpx_gt_i32">;
+defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95>, "v_cmpx_ne_i32">;
+defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96>, "v_cmpx_ge_i32">;
+defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97>, "v_cmpx_t_i32">;
} // End hasSideEffects = 1
-defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0>, "V_CMP_F_I64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1>, "V_CMP_LT_I64", COND_SLT>;
-defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2>, "V_CMP_EQ_I64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3>, "V_CMP_LE_I64", COND_SLE>;
-defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4>, "V_CMP_GT_I64", COND_SGT>;
-defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5>, "V_CMP_NE_I64", COND_NE>;
-defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6>, "V_CMP_GE_I64", COND_SGE>;
-defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7>, "V_CMP_T_I64">;
+defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0>, "v_cmp_f_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2>, "v_cmp_eq_i64", COND_EQ>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4>, "v_cmp_gt_i64", COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5>, "v_cmp_ne_i64", COND_NE>;
+defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6>, "v_cmp_ge_i64", COND_SGE>;
+defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7>, "v_cmp_t_i64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0>, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1>, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2>, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3>, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4>, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5>, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6>, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7>, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0>, "v_cmpx_f_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1>, "v_cmpx_lt_i64">;
+defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2>, "v_cmpx_eq_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3>, "v_cmpx_le_i64">;
+defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4>, "v_cmpx_gt_i64">;
+defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5>, "v_cmpx_ne_i64">;
+defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6>, "v_cmpx_ge_i64">;
+defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7>, "v_cmpx_t_i64">;
} // End hasSideEffects = 1
-defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0>, "V_CMP_F_U32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1>, "V_CMP_LT_U32", COND_ULT>;
-defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2>, "V_CMP_EQ_U32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3>, "V_CMP_LE_U32", COND_ULE>;
-defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4>, "V_CMP_GT_U32", COND_UGT>;
-defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5>, "V_CMP_NE_U32", COND_NE>;
-defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6>, "V_CMP_GE_U32", COND_UGE>;
-defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7>, "V_CMP_T_U32">;
+defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0>, "v_cmp_f_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2>, "v_cmp_eq_u32", COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4>, "v_cmp_gt_u32", COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5>, "v_cmp_ne_u32", COND_NE>;
+defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6>, "v_cmp_ge_u32", COND_UGE>;
+defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7>, "v_cmp_t_u32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0>, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1>, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2>, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3>, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4>, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5>, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6>, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7>, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0>, "v_cmpx_f_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1>, "v_cmpx_lt_u32">;
+defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2>, "v_cmpx_eq_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3>, "v_cmpx_le_u32">;
+defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4>, "v_cmpx_gt_u32">;
+defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5>, "v_cmpx_ne_u32">;
+defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6>, "v_cmpx_ge_u32">;
+defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7>, "v_cmpx_t_u32">;
} // End hasSideEffects = 1
-defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0>, "V_CMP_F_U64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1>, "V_CMP_LT_U64", COND_ULT>;
-defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2>, "V_CMP_EQ_U64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3>, "V_CMP_LE_U64", COND_ULE>;
-defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4>, "V_CMP_GT_U64", COND_UGT>;
-defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5>, "V_CMP_NE_U64", COND_NE>;
-defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6>, "V_CMP_GE_U64", COND_UGE>;
-defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7>, "V_CMP_T_U64">;
+defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0>, "v_cmp_f_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2>, "v_cmp_eq_u64", COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4>, "v_cmp_gt_u64", COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5>, "v_cmp_ne_u64", COND_NE>;
+defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6>, "v_cmp_ge_u64", COND_UGE>;
+defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7>, "v_cmp_t_u64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0>, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1>, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2>, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3>, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4>, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5>, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6>, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7>, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0>, "v_cmpx_f_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1>, "v_cmpx_lt_u64">;
+defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2>, "v_cmpx_eq_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3>, "v_cmpx_le_u64">;
+defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4>, "v_cmpx_gt_u64">;
+defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5>, "v_cmpx_ne_u64">;
+defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6>, "v_cmpx_ge_u64">;
+defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7>, "v_cmpx_t_u64">;
} // End hasSideEffects = 1
-defm V_CMP_CLASS_F32 : VOPC_F32 <vopc<0x88>, "V_CMP_CLASS_F32">;
+defm V_CMP_CLASS_F32 : VOPC_F32 <vopc<0x88>, "v_cmp_class_f32">;
let hasSideEffects = 1 in {
-defm V_CMPX_CLASS_F32 : VOPCX_F32 <vopc<0x98>, "V_CMPX_CLASS_F32">;
+defm V_CMPX_CLASS_F32 : VOPCX_F32 <vopc<0x98>, "v_cmpx_class_f32">;
} // End hasSideEffects = 1
-defm V_CMP_CLASS_F64 : VOPC_F64 <vopc<0xa8>, "V_CMP_CLASS_F64">;
+defm V_CMP_CLASS_F64 : VOPC_F64 <vopc<0xa8>, "v_cmp_class_f64">;
let hasSideEffects = 1 in {
-defm V_CMPX_CLASS_F64 : VOPCX_F64 <vopc<0xb8>, "V_CMPX_CLASS_F64">;
+defm V_CMPX_CLASS_F64 : VOPCX_F64 <vopc<0xb8>, "v_cmpx_class_f64">;
} // End hasSideEffects = 1
} // End isCompare = 1
//===----------------------------------------------------------------------===//
-def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
-def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
-def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
-def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>;
-def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>;
-def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
-def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
-def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
-def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
-def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
-def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
-def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
-def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
-def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
-def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
-def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
-def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
-
-def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32, "DS_ADD_U32">;
-def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32, "DS_SUB_U32">;
-def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32, "DS_RSUB_U32">;
-def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32, "DS_INC_U32">;
-def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32, "DS_DEC_U32">;
-def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32, "DS_MIN_I32">;
-def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32, "DS_MAX_I32">;
-def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32, "DS_MIN_U32">;
-def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32, "DS_MAX_U32">;
-def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32, "DS_AND_B32">;
-def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32, "DS_OR_B32">;
-def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32, "DS_XOR_B32">;
-def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32, "DS_MSKOR_B32">;
-def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32, "DS_WRXCHG2_B32">;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32, "DS_WRXCHG2ST64_B32">;
-def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32, "DS_CMPST_B32">;
-def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32, "DS_CMPST_F32">;
-def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32, "DS_MIN_F32">;
-def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32, "DS_MAX_F32">;
+def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VReg_32>;
+def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VReg_32>;
+def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VReg_32>;
+def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VReg_32>;
+def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VReg_32>;
+def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VReg_32>;
+def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VReg_32>;
+def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VReg_32>;
+def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VReg_32>;
+def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VReg_32>;
+def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VReg_32>;
+def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VReg_32>;
+def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VReg_32>;
+def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VReg_32>;
+def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VReg_32>;
+def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VReg_32>;
+def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VReg_32>;
+
+def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VReg_32, "ds_add_u32">;
+def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VReg_32, "ds_sub_u32">;
+def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VReg_32, "ds_rsub_u32">;
+def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VReg_32, "ds_inc_u32">;
+def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VReg_32, "ds_dec_u32">;
+def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VReg_32, "ds_min_i32">;
+def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VReg_32, "ds_max_i32">;
+def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VReg_32, "ds_min_u32">;
+def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VReg_32, "ds_max_u32">;
+def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VReg_32, "ds_and_b32">;
+def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VReg_32, "ds_or_b32">;
+def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VReg_32, "ds_xor_b32">;
+def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VReg_32, "ds_mskor_b32">;
+def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VReg_32>;
+//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VReg_32, "ds_wrxchg2_b32">;
+//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VReg_32, "ds_wrxchg2st64_b32">;
+def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VReg_32, "ds_cmpst_b32">;
+def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VReg_32, "ds_cmpst_f32">;
+def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VReg_32, "ds_min_f32">;
+def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VReg_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
-def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32, "DS_WRAP_F32">;
+def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VReg_32, "ds_wrap_f32">;
} // End isCI
-def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_64>;
-def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_64>;
-def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_64>;
-def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_64>;
-def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_64>;
-def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
-def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
-def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
-def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
-def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
-def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
-def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
-def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
-def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
-def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
-def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
-def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
-
-def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64, "DS_ADD_U64">;
-def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64, "DS_SUB_U64">;
-def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64, "DS_RSUB_U64">;
-def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64, "DS_INC_U64">;
-def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64, "DS_DEC_U64">;
-def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64, "DS_MIN_I64">;
-def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64, "DS_MAX_I64">;
-def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64, "DS_MIN_U64">;
-def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64, "DS_MAX_U64">;
-def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64, "DS_AND_B64">;
-def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64, "DS_OR_B64">;
-def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64, "DS_XOR_B64">;
-def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64, "DS_MSKOR_B64">;
-def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64, "DS_WRXCHG_B64">;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64, "DS_WRXCHG2_B64">;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64, "DS_WRXCHG2ST64_B64">;
-def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64, "DS_CMPST_B64">;
-def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64, "DS_CMPST_F64">;
-def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64, "DS_MIN_F64">;
-def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64, "DS_MAX_F64">;
+def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
+def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
+def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
+def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
+def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
+def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
+def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
+def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
+def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
+def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
+def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
+def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
+def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
+def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
+def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
+def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
+
+def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
+def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
+def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
+def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
+def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
+def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
+def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
+def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
+def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
+def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
+def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
+def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
+def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
+//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
+//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
+def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
+def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">;
+def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">;
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
// TODO: _SRC2_* forms
-def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
-def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
-def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
-def DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "DS_WRITE_B64", VReg_64>;
+def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VReg_32>;
+def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VReg_32>;
+def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VReg_32>;
+def DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
-def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
-def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
-def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
-def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
-def DS_READ_B64 : DS_Load_Helper <0x00000076, "DS_READ_B64", VReg_64>;
+def DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VReg_32>;
+def DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VReg_32>;
+def DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VReg_32>;
+def DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VReg_32>;
+def DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VReg_32>;
+def DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
// 2 forms.
-def DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "DS_WRITE2_B32", VReg_32>;
-def DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "DS_WRITE2ST64_B32", VReg_32>;
-def DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "DS_WRITE2_B64", VReg_64>;
-def DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "DS_WRITE2ST64_B64", VReg_64>;
+def DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VReg_32>;
+def DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VReg_32>;
+def DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
+def DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-def DS_READ2_B32 : DS_Load2_Helper <0x00000037, "DS_READ2_B32", VReg_64>;
-def DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "DS_READ2ST64_B32", VReg_64>;
-def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "DS_READ2_B64", VReg_128>;
-def DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "DS_READ2ST64_B64", VReg_128>;
+def DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
+def DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
+def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
+def DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
- 0x00000008, "BUFFER_LOAD_UBYTE", VReg_32, i32, az_extloadi8_global
+ 0x00000008, "buffer_load_ubyte", VReg_32, i32, az_extloadi8_global
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
- 0x00000009, "BUFFER_LOAD_SBYTE", VReg_32, i32, sextloadi8_global
+ 0x00000009, "buffer_load_sbyte", VReg_32, i32, sextloadi8_global
>;
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
- 0x0000000a, "BUFFER_LOAD_USHORT", VReg_32, i32, az_extloadi16_global
+ 0x0000000a, "buffer_load_ushort", VReg_32, i32, az_extloadi16_global
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
- 0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32, i32, sextloadi16_global
+ 0x0000000b, "buffer_load_sshort", VReg_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- 0x0000000c, "BUFFER_LOAD_DWORD", VReg_32, i32, global_load
+ 0x0000000c, "buffer_load_dword", VReg_32, i32, global_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- 0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64, v2i32, global_load
+ 0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- 0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128, v4i32, global_load
+ 0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- 0x00000018, "BUFFER_STORE_BYTE", VReg_32, i32, truncstorei8_global
+ 0x00000018, "buffer_store_byte", VReg_32, i32, truncstorei8_global
>;
defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- 0x0000001a, "BUFFER_STORE_SHORT", VReg_32, i32, truncstorei16_global
+ 0x0000001a, "buffer_store_short", VReg_32, i32, truncstorei16_global
>;
defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32, global_store
+ 0x0000001c, "buffer_store_dword", VReg_32, i32, global_store
>;
defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, v2i32, global_store
+ 0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store
+ 0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store
>;
-//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>;
defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
- 0x00000030, "BUFFER_ATOMIC_SWAP", VReg_32, i32, atomic_swap_global
+ 0x00000030, "buffer_atomic_swap", VReg_32, i32, atomic_swap_global
>;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>;
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
- 0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
+ 0x00000032, "buffer_atomic_add", VReg_32, i32, atomic_add_global
>;
defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
- 0x00000033, "BUFFER_ATOMIC_SUB", VReg_32, i32, atomic_sub_global
+ 0x00000033, "buffer_atomic_sub", VReg_32, i32, atomic_sub_global
>;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>;
defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
- 0x00000035, "BUFFER_ATOMIC_SMIN", VReg_32, i32, atomic_min_global
+ 0x00000035, "buffer_atomic_smin", VReg_32, i32, atomic_min_global
>;
defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
- 0x00000036, "BUFFER_ATOMIC_UMIN", VReg_32, i32, atomic_umin_global
+ 0x00000036, "buffer_atomic_umin", VReg_32, i32, atomic_umin_global
>;
defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
- 0x00000037, "BUFFER_ATOMIC_SMAX", VReg_32, i32, atomic_max_global
+ 0x00000037, "buffer_atomic_smax", VReg_32, i32, atomic_max_global
>;
defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
- 0x00000038, "BUFFER_ATOMIC_UMAX", VReg_32, i32, atomic_umax_global
+ 0x00000038, "buffer_atomic_umax", VReg_32, i32, atomic_umax_global
>;
defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
- 0x00000039, "BUFFER_ATOMIC_AND", VReg_32, i32, atomic_and_global
+ 0x00000039, "buffer_atomic_and", VReg_32, i32, atomic_and_global
>;
defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
- 0x0000003a, "BUFFER_ATOMIC_OR", VReg_32, i32, atomic_or_global
+ 0x0000003a, "buffer_atomic_or", VReg_32, i32, atomic_or_global
>;
defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
- 0x0000003b, "BUFFER_ATOMIC_XOR", VReg_32, i32, atomic_xor_global
->;
-//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
-//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
-//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
-//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
-//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
-//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
-//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
-//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
-//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
-//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
-//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
-//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
-//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
-//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
-//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
-//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+ 0x0000003b, "buffer_atomic_xor", VReg_32, i32, atomic_xor_global
+>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
//===----------------------------------------------------------------------===//
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VReg_32>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>;
//===----------------------------------------------------------------------===//
// MIMG Instructions
//===----------------------------------------------------------------------===//
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
-//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
-//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
-//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
-//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
-//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
-//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
-//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
-//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">;
-//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
-//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
-//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
-//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
-//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
-//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
-//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
-//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
-//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
-//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
-//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
-//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
-//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
-//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
-//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
-//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
-//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
-defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "IMAGE_SAMPLE_CL">;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "IMAGE_SAMPLE_D_CL">;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
-defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "IMAGE_SAMPLE_B_CL">;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "IMAGE_SAMPLE_LZ">;
-defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "IMAGE_SAMPLE_C_CL">;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "IMAGE_SAMPLE_C_D_CL">;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "IMAGE_SAMPLE_C_B_CL">;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "IMAGE_SAMPLE_C_LZ">;
-defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "IMAGE_SAMPLE_O">;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "IMAGE_SAMPLE_CL_O">;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "IMAGE_SAMPLE_D_O">;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "IMAGE_SAMPLE_D_CL_O">;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "IMAGE_SAMPLE_L_O">;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "IMAGE_SAMPLE_B_O">;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "IMAGE_SAMPLE_B_CL_O">;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "IMAGE_SAMPLE_LZ_O">;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "IMAGE_SAMPLE_C_O">;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "IMAGE_SAMPLE_C_CL_O">;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "IMAGE_SAMPLE_C_D_O">;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "IMAGE_SAMPLE_C_D_CL_O">;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "IMAGE_SAMPLE_C_L_O">;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "IMAGE_SAMPLE_C_B_O">;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "IMAGE_SAMPLE_C_B_CL_O">;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "IMAGE_SAMPLE_C_LZ_O">;
-defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
-defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
-defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
-defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
-defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
-defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
-defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
-defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
-defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
-defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "IMAGE_SAMPLE_CD">;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "IMAGE_SAMPLE_CD_CL">;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "IMAGE_SAMPLE_C_CD">;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "IMAGE_SAMPLE_C_CD_CL">;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "IMAGE_SAMPLE_CD_O">;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "IMAGE_SAMPLE_CD_CL_O">;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "IMAGE_SAMPLE_C_CD_O">;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "IMAGE_SAMPLE_C_CD_CL_O">;
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"image_store", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"image_store_mip", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"image_atomic_swap", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"image_atomic_cmpswap", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"image_atomic_add", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"image_atomic_sub", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"image_atomic_smin", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"image_atomic_umin", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"image_atomic_smax", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"image_atomic_umax", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"image_atomic_and", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"image_atomic_or", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"image_atomic_xor", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"image_atomic_inc", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"image_atomic_dec", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
+defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
+defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
+defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
+defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
+defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
+defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
+defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
+defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
//===----------------------------------------------------------------------===//
// Flat Instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasFlatAddressSpace] in {
-def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "FLAT_LOAD_UBYTE", VReg_32>;
-def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "FLAT_LOAD_SBYTE", VReg_32>;
-def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "FLAT_LOAD_USHORT", VReg_32>;
-def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "FLAT_LOAD_SSHORT", VReg_32>;
-def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "FLAT_LOAD_DWORD", VReg_32>;
-def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "FLAT_LOAD_DWORDX2", VReg_64>;
-def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "FLAT_LOAD_DWORDX4", VReg_128>;
-def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "FLAT_LOAD_DWORDX3", VReg_96>;
+def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "flat_load_ubyte", VReg_32>;
+def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "flat_load_sbyte", VReg_32>;
+def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "flat_load_ushort", VReg_32>;
+def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "flat_load_sshort", VReg_32>;
+def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "flat_load_dword", VReg_32>;
+def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "flat_load_dwordx2", VReg_64>;
+def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "flat_load_dwordx4", VReg_128>;
+def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "flat_load_dwordx3", VReg_96>;
def FLAT_STORE_BYTE : FLAT_Store_Helper <
- 0x00000018, "FLAT_STORE_BYTE", VReg_32
+ 0x00000018, "flat_store_byte", VReg_32
>;
def FLAT_STORE_SHORT : FLAT_Store_Helper <
- 0x0000001a, "FLAT_STORE_SHORT", VReg_32
+ 0x0000001a, "flat_store_short", VReg_32
>;
def FLAT_STORE_DWORD : FLAT_Store_Helper <
- 0x0000001c, "FLAT_STORE_DWORD", VReg_32
+ 0x0000001c, "flat_store_dword", VReg_32
>;
def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
- 0x0000001d, "FLAT_STORE_DWORDX2", VReg_64
+ 0x0000001d, "flat_store_dwordx2", VReg_64
>;
def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
- 0x0000001e, "FLAT_STORE_DWORDX4", VReg_128
+ 0x0000001e, "flat_store_dwordx4", VReg_128
>;
def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
- 0x0000001e, "FLAT_STORE_DWORDX3", VReg_96
->;
-
-//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "FLAT_ATOMIC_SWAP", []>;
-//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "FLAT_ATOMIC_CMPSWAP", []>;
-//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "FLAT_ATOMIC_ADD", []>;
-//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "FLAT_ATOMIC_SUB", []>;
-//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "FLAT_ATOMIC_RSUB", []>;
-//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "FLAT_ATOMIC_SMIN", []>;
-//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "FLAT_ATOMIC_UMIN", []>;
-//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "FLAT_ATOMIC_SMAX", []>;
-//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "FLAT_ATOMIC_UMAX", []>;
-//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "FLAT_ATOMIC_AND", []>;
-//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "FLAT_ATOMIC_OR", []>;
-//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "FLAT_ATOMIC_XOR", []>;
-//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "FLAT_ATOMIC_INC", []>;
-//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "FLAT_ATOMIC_DEC", []>;
-//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "FLAT_ATOMIC_FCMPSWAP", []>;
-//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "FLAT_ATOMIC_FMIN", []>;
-//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "FLAT_ATOMIC_FMAX", []>;
-//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "FLAT_ATOMIC_SWAP_X2", []>;
-//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "FLAT_ATOMIC_CMPSWAP_X2", []>;
-//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "FLAT_ATOMIC_ADD_X2", []>;
-//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "FLAT_ATOMIC_SUB_X2", []>;
-//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "FLAT_ATOMIC_RSUB_X2", []>;
-//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "FLAT_ATOMIC_SMIN_X2", []>;
-//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "FLAT_ATOMIC_UMIN_X2", []>;
-//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "FLAT_ATOMIC_SMAX_X2", []>;
-//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "FLAT_ATOMIC_UMAX_X2", []>;
-//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "FLAT_ATOMIC_AND_X2", []>;
-//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "FLAT_ATOMIC_OR_X2", []>;
-//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "FLAT_ATOMIC_XOR_X2", []>;
-//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "FLAT_ATOMIC_INC_X2", []>;
-//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "FLAT_ATOMIC_DEC_X2", []>;
-//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "FLAT_ATOMIC_FCMPSWAP_X2", []>;
-//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "FLAT_ATOMIC_FMIN_X2", []>;
-//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "FLAT_ATOMIC_FMAX_X2", []>;
+ 0x0000001e, "flat_store_dwordx3", VReg_96
+>;
+
+//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "flat_atomic_swap", []>;
+//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "flat_atomic_cmpswap", []>;
+//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "flat_atomic_add", []>;
+//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "flat_atomic_sub", []>;
+//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "flat_atomic_rsub", []>;
+//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "flat_atomic_smin", []>;
+//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "flat_atomic_umin", []>;
+//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "flat_atomic_smax", []>;
+//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "flat_atomic_umax", []>;
+//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "flat_atomic_and", []>;
+//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "flat_atomic_or", []>;
+//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "flat_atomic_xor", []>;
+//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "flat_atomic_inc", []>;
+//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "flat_atomic_dec", []>;
+//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "flat_atomic_fcmpswap", []>;
+//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "flat_atomic_fmin", []>;
+//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "flat_atomic_fmax", []>;
+//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "flat_atomic_swap_x2", []>;
+//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "flat_atomic_cmpswap_x2", []>;
+//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "flat_atomic_add_x2", []>;
+//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "flat_atomic_sub_x2", []>;
+//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "flat_atomic_rsub_x2", []>;
+//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "flat_atomic_smin_x2", []>;
+//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "flat_atomic_umin_x2", []>;
+//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "flat_atomic_smax_x2", []>;
+//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "flat_atomic_umax_x2", []>;
+//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "flat_atomic_and_x2", []>;
+//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "flat_atomic_or_x2", []>;
+//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "flat_atomic_xor_x2", []>;
+//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "flat_atomic_inc_x2", []>;
+//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "flat_atomic_dec_x2", []>;
+//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "flat_atomic_fcmpswap_x2", []>;
+//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "flat_atomic_fmin_x2", []>;
+//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "flat_atomic_fmax_x2", []>;
} // End HasFlatAddressSpace predicate
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+//def V_NOP : VOP1_ <0x00000000, "v_nop", []>;
let isMoveImm = 1 in {
-defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "V_MOV_B32", VOP_I32_I32>;
+defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
} // End isMoveImm = 1
let Uses = [EXEC] in {
0x00000002,
(outs SReg_32:$vdst),
(ins VReg_32:$src0),
- "V_READFIRSTLANE_B32 $vdst, $src0",
+ "v_readfirstlane_b32 $vdst, $src0",
[]
>;
}
-defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "V_CVT_I32_F64",
+defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
VOP_I32_F64, fp_to_sint
>;
-defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "V_CVT_F64_I32",
+defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
VOP_F64_I32, sint_to_fp
>;
-defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "V_CVT_F32_I32",
+defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
VOP_F32_I32, sint_to_fp
>;
-defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "V_CVT_F32_U32",
+defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
VOP_F32_I32, uint_to_fp
>;
-defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "V_CVT_U32_F32",
+defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
VOP_I32_F32, fp_to_uint
>;
-defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "V_CVT_I32_F32",
+defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
VOP_I32_F32, fp_to_sint
>;
-defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "V_MOV_FED_B32", VOP_I32_I32>;
-defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "V_CVT_F16_F32",
+defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
+defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
VOP_I32_F32, fp_to_f16
>;
-defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "V_CVT_F32_F16",
+defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
VOP_F32_I32, f16_to_fp
>;
-//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
-//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
-//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "V_CVT_F32_F64",
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
+defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
VOP_F32_F64, fround
>;
-defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "V_CVT_F64_F32",
+defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
VOP_F64_F32, fextend
>;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "V_CVT_F32_UBYTE0",
+defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
VOP_F32_I32, AMDGPUcvt_f32_ubyte0
>;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "V_CVT_F32_UBYTE1",
+defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
VOP_F32_I32, AMDGPUcvt_f32_ubyte1
>;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "V_CVT_F32_UBYTE2",
+defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
VOP_F32_I32, AMDGPUcvt_f32_ubyte2
>;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "V_CVT_F32_UBYTE3",
+defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
VOP_F32_I32, AMDGPUcvt_f32_ubyte3
>;
-defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "V_CVT_U32_F64",
+defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
VOP_I32_F64, fp_to_uint
>;
-defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "V_CVT_F64_U32",
+defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
VOP_F64_I32, uint_to_fp
>;
-defm V_FRACT_F32 : VOP1Inst <vop1<0x20>, "V_FRACT_F32",
+defm V_FRACT_F32 : VOP1Inst <vop1<0x20>, "v_fract_f32",
VOP_F32_F32, AMDGPUfract
>;
-defm V_TRUNC_F32 : VOP1Inst <vop1<0x21>, "V_TRUNC_F32",
+defm V_TRUNC_F32 : VOP1Inst <vop1<0x21>, "v_trunc_f32",
VOP_F32_F32, ftrunc
>;
-defm V_CEIL_F32 : VOP1Inst <vop1<0x22>, "V_CEIL_F32",
+defm V_CEIL_F32 : VOP1Inst <vop1<0x22>, "v_ceil_f32",
VOP_F32_F32, fceil
>;
-defm V_RNDNE_F32 : VOP1Inst <vop1<0x23>, "V_RNDNE_F32",
+defm V_RNDNE_F32 : VOP1Inst <vop1<0x23>, "v_rndne_f32",
VOP_F32_F32, frint
>;
-defm V_FLOOR_F32 : VOP1Inst <vop1<0x24>, "V_FLOOR_F32",
+defm V_FLOOR_F32 : VOP1Inst <vop1<0x24>, "v_floor_f32",
VOP_F32_F32, ffloor
>;
-defm V_EXP_F32 : VOP1Inst <vop1<0x25>, "V_EXP_F32",
+defm V_EXP_F32 : VOP1Inst <vop1<0x25>, "v_exp_f32",
VOP_F32_F32, fexp2
>;
-defm V_LOG_CLAMP_F32 : VOP1Inst <vop1<0x26>, "V_LOG_CLAMP_F32", VOP_F32_F32>;
-defm V_LOG_F32 : VOP1Inst <vop1<0x27>, "V_LOG_F32",
+defm V_LOG_CLAMP_F32 : VOP1Inst <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
+defm V_LOG_F32 : VOP1Inst <vop1<0x27>, "v_log_f32",
VOP_F32_F32, flog2
>;
-defm V_RCP_CLAMP_F32 : VOP1Inst <vop1<0x28>, "V_RCP_CLAMP_F32", VOP_F32_F32>;
-defm V_RCP_LEGACY_F32 : VOP1Inst <vop1<0x29>, "V_RCP_LEGACY_F32", VOP_F32_F32>;
-defm V_RCP_F32 : VOP1Inst <vop1<0x2a>, "V_RCP_F32",
+defm V_RCP_CLAMP_F32 : VOP1Inst <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
+defm V_RCP_LEGACY_F32 : VOP1Inst <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
+defm V_RCP_F32 : VOP1Inst <vop1<0x2a>, "v_rcp_f32",
VOP_F32_F32, AMDGPUrcp
>;
-defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b>, "V_RCP_IFLAG_F32", VOP_F32_F32>;
-defm V_RSQ_CLAMP_F32 : VOP1Inst <vop1<0x2c>, "V_RSQ_CLAMP_F32",
+defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b>, "v_rcp_iflag_f32", VOP_F32_F32>;
+defm V_RSQ_CLAMP_F32 : VOP1Inst <vop1<0x2c>, "v_rsq_clamp_f32",
VOP_F32_F32, AMDGPUrsq_clamped
>;
-defm V_RSQ_LEGACY_F32 : VOP1Inst <vop1<0x2d>, "V_RSQ_LEGACY_F32",
+defm V_RSQ_LEGACY_F32 : VOP1Inst <vop1<0x2d>, "v_rsq_legacy_f32",
VOP_F32_F32, AMDGPUrsq_legacy
>;
-defm V_RSQ_F32 : VOP1Inst <vop1<0x2e>, "V_RSQ_F32",
+defm V_RSQ_F32 : VOP1Inst <vop1<0x2e>, "v_rsq_f32",
VOP_F32_F32, AMDGPUrsq
>;
-defm V_RCP_F64 : VOP1Inst <vop1<0x2f>, "V_RCP_F64",
+defm V_RCP_F64 : VOP1Inst <vop1<0x2f>, "v_rcp_f64",
VOP_F64_F64, AMDGPUrcp
>;
-defm V_RCP_CLAMP_F64 : VOP1Inst <vop1<0x30>, "V_RCP_CLAMP_F64", VOP_F64_F64>;
-defm V_RSQ_F64 : VOP1Inst <vop1<0x31>, "V_RSQ_F64",
+defm V_RCP_CLAMP_F64 : VOP1Inst <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
+defm V_RSQ_F64 : VOP1Inst <vop1<0x31>, "v_rsq_f64",
VOP_F64_F64, AMDGPUrsq
>;
-defm V_RSQ_CLAMP_F64 : VOP1Inst <vop1<0x32>, "V_RSQ_CLAMP_F64",
+defm V_RSQ_CLAMP_F64 : VOP1Inst <vop1<0x32>, "v_rsq_clamp_f64",
VOP_F64_F64, AMDGPUrsq_clamped
>;
-defm V_SQRT_F32 : VOP1Inst <vop1<0x33>, "V_SQRT_F32",
+defm V_SQRT_F32 : VOP1Inst <vop1<0x33>, "v_sqrt_f32",
VOP_F32_F32, fsqrt
>;
-defm V_SQRT_F64 : VOP1Inst <vop1<0x34>, "V_SQRT_F64",
+defm V_SQRT_F64 : VOP1Inst <vop1<0x34>, "v_sqrt_f64",
VOP_F64_F64, fsqrt
>;
-defm V_SIN_F32 : VOP1Inst <vop1<0x35>, "V_SIN_F32",
+defm V_SIN_F32 : VOP1Inst <vop1<0x35>, "v_sin_f32",
VOP_F32_F32, AMDGPUsin
>;
-defm V_COS_F32 : VOP1Inst <vop1<0x36>, "V_COS_F32",
+defm V_COS_F32 : VOP1Inst <vop1<0x36>, "v_cos_f32",
VOP_F32_F32, AMDGPUcos
>;
-defm V_NOT_B32 : VOP1Inst <vop1<0x37>, "V_NOT_B32", VOP_I32_I32>;
-defm V_BFREV_B32 : VOP1Inst <vop1<0x38>, "V_BFREV_B32", VOP_I32_I32>;
-defm V_FFBH_U32 : VOP1Inst <vop1<0x39>, "V_FFBH_U32", VOP_I32_I32>;
-defm V_FFBL_B32 : VOP1Inst <vop1<0x3a>, "V_FFBL_B32", VOP_I32_I32>;
-defm V_FFBH_I32 : VOP1Inst <vop1<0x3b>, "V_FFBH_I32", VOP_I32_I32>;
-//defm V_FREXP_EXP_I32_F64 : VOPInst <0x0000003c, "V_FREXP_EXP_I32_F64", VOP_I32_F32>;
-defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d>, "V_FREXP_MANT_F64", VOP_F64_F64>;
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e>, "V_FRACT_F64", VOP_F64_F64>;
-//defm V_FREXP_EXP_I32_F32 : VOPInst <0x0000003f, "V_FREXP_EXP_I32_F32", VOP_I32_F32>;
-defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40>, "V_FREXP_MANT_F32", VOP_F32_F32>;
-//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
-defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42>, "V_MOVRELD_B32", VOP_I32_I32>;
-defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43>, "V_MOVRELS_B32", VOP_I32_I32>;
-defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44>, "V_MOVRELSD_B32", VOP_I32_I32>;
+defm V_NOT_B32 : VOP1Inst <vop1<0x37>, "v_not_b32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <vop1<0x38>, "v_bfrev_b32", VOP_I32_I32>;
+defm V_FFBH_U32 : VOP1Inst <vop1<0x39>, "v_ffbh_u32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <vop1<0x3a>, "v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <vop1<0x3b>, "v_ffbh_i32", VOP_I32_I32>;
+//defm V_FREXP_EXP_I32_F64 : VOPInst <0x0000003c, "v_frexp_exp_i32_f64", VOP_I32_F32>;
+defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d>, "v_frexp_mant_f64", VOP_F64_F64>;
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e>, "v_fract_f64", VOP_F64_F64>;
+//defm V_FREXP_EXP_I32_F32 : VOPInst <0x0000003f, "v_frexp_exp_i32_f32", VOP_I32_F32>;
+defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40>, "v_frexp_mant_f32", VOP_F32_F32>;
+//def V_CLREXCP : VOP1_ <0x00000041, "v_clrexcp", []>;
+defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42>, "v_movreld_b32", VOP_I32_I32>;
+defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43>, "v_movrels_b32", VOP_I32_I32>;
+defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44>, "v_movrelsd_b32", VOP_I32_I32>;
//===----------------------------------------------------------------------===//
0x00000000,
(outs VReg_32:$dst),
(ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
+ "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
0x00000001,
(outs VReg_32:$dst),
(ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
+ "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
[]> {
let Constraints = "$src0 = $dst";
0x00000002,
(outs VReg_32:$dst),
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
+ "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
- "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
+ "v_cndmask_b32_e32 $dst, $src0, $src1, [$vcc]",
[]
>{
let DisableEncoding = "$vcc";
def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
- "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2",
+ "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
[(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
> {
let src0_modifiers = 0;
0x00000001,
(outs SReg_32:$vdst),
(ins VReg_32:$src0, SSrc_32:$vsrc1),
- "V_READLANE_B32 $vdst, $src0, $vsrc1",
+ "v_readlane_b32 $vdst, $src0, $vsrc1",
[]
>;
0x00000002,
(outs VReg_32:$vdst),
(ins SReg_32:$src0, SSrc_32:$vsrc1),
- "V_WRITELANE_B32 $vdst, $src0, $vsrc1",
+ "v_writelane_b32 $vdst, $src0, $vsrc1",
[]
>;
let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2Inst <vop2<0x3>, "V_ADD_F32",
+defm V_ADD_F32 : VOP2Inst <vop2<0x3>, "v_add_f32",
VOP_F32_F32_F32, fadd
>;
-defm V_SUB_F32 : VOP2Inst <vop2<0x4>, "V_SUB_F32", VOP_F32_F32_F32, fsub>;
-defm V_SUBREV_F32 : VOP2Inst <vop2<0x5>, "V_SUBREV_F32",
- VOP_F32_F32_F32, null_frag, "V_SUB_F32"
+defm V_SUB_F32 : VOP2Inst <vop2<0x4>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUBREV_F32 : VOP2Inst <vop2<0x5>, "v_subrev_f32",
+ VOP_F32_F32_F32, null_frag, "v_sub_f32"
>;
} // End isCommutable = 1
-defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "V_MAC_LEGACY_F32",
+defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
VOP_F32_F32_F32
>;
let isCommutable = 1 in {
-defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7>, "V_MUL_LEGACY_F32",
+defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7>, "v_mul_legacy_f32",
VOP_F32_F32_F32, int_AMDGPU_mul
>;
-defm V_MUL_F32 : VOP2Inst <vop2<0x8>, "V_MUL_F32",
+defm V_MUL_F32 : VOP2Inst <vop2<0x8>, "v_mul_f32",
VOP_F32_F32_F32, fmul
>;
-defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9>, "V_MUL_I32_I24",
+defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9>, "v_mul_i32_i24",
VOP_I32_I32_I32, AMDGPUmul_i24
>;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
-defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb>, "V_MUL_U32_U24",
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb>, "v_mul_u32_u24",
VOP_I32_I32_I32, AMDGPUmul_u24
>;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
-defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "V_MIN_LEGACY_F32",
+defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmin
>;
-defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "V_MAX_LEGACY_F32",
+defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmax
>;
-defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "V_MIN_F32", VOP_F32_F32_F32, fminnum>;
-defm V_MAX_F32 : VOP2Inst <vop2<0x10>, "V_MAX_F32", VOP_F32_F32_F32, fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <vop2<0x11>, "V_MIN_I32", VOP_I32_I32_I32, AMDGPUsmin>;
-defm V_MAX_I32 : VOP2Inst <vop2<0x12>, "V_MAX_I32", VOP_I32_I32_I32, AMDGPUsmax>;
-defm V_MIN_U32 : VOP2Inst <vop2<0x13>, "V_MIN_U32", VOP_I32_I32_I32, AMDGPUumin>;
-defm V_MAX_U32 : VOP2Inst <vop2<0x14>, "V_MAX_U32", VOP_I32_I32_I32, AMDGPUumax>;
+defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "v_min_f32", VOP_F32_F32_F32, fminnum>;
+defm V_MAX_F32 : VOP2Inst <vop2<0x10>, "v_max_f32", VOP_F32_F32_F32, fmaxnum>;
+defm V_MIN_I32 : VOP2Inst <vop2<0x11>, "v_min_i32", VOP_I32_I32_I32, AMDGPUsmin>;
+defm V_MAX_I32 : VOP2Inst <vop2<0x12>, "v_max_i32", VOP_I32_I32_I32, AMDGPUsmax>;
+defm V_MIN_U32 : VOP2Inst <vop2<0x13>, "v_min_u32", VOP_I32_I32_I32, AMDGPUumin>;
+defm V_MAX_U32 : VOP2Inst <vop2<0x14>, "v_max_u32", VOP_I32_I32_I32, AMDGPUumax>;
-defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "V_LSHR_B32", VOP_I32_I32_I32, srl>;
+defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
defm V_LSHRREV_B32 : VOP2Inst <
- vop2<0x16>, "V_LSHRREV_B32", VOP_I32_I32_I32, null_frag, "V_LSHR_B32"
+ vop2<0x16>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32"
>;
-defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "V_ASHR_I32",
+defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
VOP_I32_I32_I32, sra
>;
defm V_ASHRREV_I32 : VOP2Inst <
- vop2<0x18>, "V_ASHRREV_I32", VOP_I32_I32_I32, null_frag, "V_ASHR_I32"
+ vop2<0x18>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32"
>;
let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "V_LSHL_B32", VOP_I32_I32_I32, shl>;
+defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
}
defm V_LSHLREV_B32 : VOP2Inst <
- vop2<0x1a>, "V_LSHLREV_B32", VOP_I32_I32_I32, null_frag, "V_LSHL_B32"
+ vop2<0x1a>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32"
>;
-defm V_AND_B32 : VOP2Inst <vop2<0x1b>, "V_AND_B32",
+defm V_AND_B32 : VOP2Inst <vop2<0x1b>, "v_and_b32",
VOP_I32_I32_I32, and>;
-defm V_OR_B32 : VOP2Inst <vop2<0x1c>, "V_OR_B32",
+defm V_OR_B32 : VOP2Inst <vop2<0x1c>, "v_or_b32",
VOP_I32_I32_I32, or
>;
-defm V_XOR_B32 : VOP2Inst <vop2<0x1d>, "V_XOR_B32",
+defm V_XOR_B32 : VOP2Inst <vop2<0x1d>, "v_xor_b32",
VOP_I32_I32_I32, xor
>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "V_BFM_B32",
+defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32",
VOP_I32_I32_I32, AMDGPUbfm>;
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f>, "V_MAC_F32", VOP_F32_F32_F32>;
-defm V_MADMK_F32 : VOP2Inst <vop2<0x20>, "V_MADMK_F32", VOP_F32_F32_F32>;
-defm V_MADAK_F32 : VOP2Inst <vop2<0x21>, "V_MADAK_F32", VOP_F32_F32_F32>;
-defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "V_BCNT_U32_B32", VOP_I32_I32_I32>;
-defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "V_MBCNT_LO_U32_B32",
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f>, "v_mac_f32", VOP_F32_F32_F32>;
+defm V_MADMK_F32 : VOP2Inst <vop2<0x20>, "v_madmk_f32", VOP_F32_F32_F32>;
+defm V_MADAK_F32 : VOP2Inst <vop2<0x21>, "v_madak_f32", VOP_F32_F32_F32>;
+defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
VOP_I32_I32_I32
>;
-defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "V_MBCNT_HI_U32_B32",
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
VOP_I32_I32_I32
>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2bInst <vop2<0x25>, "V_ADD_I32",
+defm V_ADD_I32 : VOP2bInst <vop2<0x25>, "v_add_i32",
VOP_I32_I32_I32, add
>;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26>, "V_SUB_I32",
+defm V_SUB_I32 : VOP2bInst <vop2<0x26>, "v_sub_i32",
VOP_I32_I32_I32, sub
>;
-defm V_SUBREV_I32 : VOP2bInst <vop2<0x27>, "V_SUBREV_I32",
- VOP_I32_I32_I32, null_frag, "V_SUB_I32"
+defm V_SUBREV_I32 : VOP2bInst <vop2<0x27>, "v_subrev_i32",
+ VOP_I32_I32_I32, null_frag, "v_sub_i32"
>;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2bInst <vop2<0x28>, "V_ADDC_U32",
+defm V_ADDC_U32 : VOP2bInst <vop2<0x28>, "v_addc_u32",
VOP_I32_I32_I32_VCC, adde
>;
-defm V_SUBB_U32 : VOP2bInst <vop2<0x29>, "V_SUBB_U32",
+defm V_SUBB_U32 : VOP2bInst <vop2<0x29>, "v_subb_u32",
VOP_I32_I32_I32_VCC, sube
>;
-defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a>, "V_SUBBREV_U32",
- VOP_I32_I32_I32_VCC, null_frag, "V_SUBB_U32"
+defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a>, "v_subbrev_u32",
+ VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
>;
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
-defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "V_LDEXP_F32",
+defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "V_CVT_PKRTZ_F16_F32",
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
VOP_I32_F32_F32, int_SI_packf16
>;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
//===----------------------------------------------------------------------===//
// VOP3 Instructions
//===----------------------------------------------------------------------===//
-defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140>, "V_MAD_LEGACY_F32",
+defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140>, "v_mad_legacy_f32",
VOP_F32_F32_F32_F32
>;
-defm V_MAD_F32 : VOP3Inst <vop3<0x141>, "V_MAD_F32",
+defm V_MAD_F32 : VOP3Inst <vop3<0x141>, "v_mad_f32",
VOP_F32_F32_F32_F32, fmad
>;
-defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142>, "V_MAD_I32_I24",
+defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142>, "v_mad_i32_i24",
VOP_I32_I32_I32_I32, AMDGPUmad_i24
>;
-defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143>, "V_MAD_U32_U24",
+defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143>, "v_mad_u32_u24",
VOP_I32_I32_I32_I32, AMDGPUmad_u24
>;
-defm V_CUBEID_F32 : VOP3Inst <vop3<0x144>, "V_CUBEID_F32",
+defm V_CUBEID_F32 : VOP3Inst <vop3<0x144>, "v_cubeid_f32",
VOP_F32_F32_F32_F32
>;
-defm V_CUBESC_F32 : VOP3Inst <vop3<0x145>, "V_CUBESC_F32",
+defm V_CUBESC_F32 : VOP3Inst <vop3<0x145>, "v_cubesc_f32",
VOP_F32_F32_F32_F32
>;
-defm V_CUBETC_F32 : VOP3Inst <vop3<0x146>, "V_CUBETC_F32",
+defm V_CUBETC_F32 : VOP3Inst <vop3<0x146>, "v_cubetc_f32",
VOP_F32_F32_F32_F32
>;
-defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147>, "V_CUBEMA_F32",
+defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147>, "v_cubema_f32",
VOP_F32_F32_F32_F32
>;
-defm V_BFE_U32 : VOP3Inst <vop3<0x148>, "V_BFE_U32",
+defm V_BFE_U32 : VOP3Inst <vop3<0x148>, "v_bfe_u32",
VOP_I32_I32_I32_I32, AMDGPUbfe_u32
>;
-defm V_BFE_I32 : VOP3Inst <vop3<0x149>, "V_BFE_I32",
+defm V_BFE_I32 : VOP3Inst <vop3<0x149>, "v_bfe_i32",
VOP_I32_I32_I32_I32, AMDGPUbfe_i32
>;
-defm V_BFI_B32 : VOP3Inst <vop3<0x14a>, "V_BFI_B32",
+defm V_BFI_B32 : VOP3Inst <vop3<0x14a>, "v_bfi_b32",
VOP_I32_I32_I32_I32, AMDGPUbfi
>;
-defm V_FMA_F32 : VOP3Inst <vop3<0x14b>, "V_FMA_F32",
+defm V_FMA_F32 : VOP3Inst <vop3<0x14b>, "v_fma_f32",
VOP_F32_F32_F32_F32, fma
>;
-defm V_FMA_F64 : VOP3Inst <vop3<0x14c>, "V_FMA_F64",
+defm V_FMA_F64 : VOP3Inst <vop3<0x14c>, "v_fma_f64",
VOP_F64_F64_F64_F64, fma
>;
-//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
-defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e>, "V_ALIGNBIT_B32",
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>;
+defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e>, "v_alignbit_b32",
VOP_I32_I32_I32_I32
>;
-defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f>, "V_ALIGNBYTE_B32",
+defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f>, "v_alignbyte_b32",
VOP_I32_I32_I32_I32
>;
-defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "V_MULLIT_F32",
+defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
VOP_F32_F32_F32_F32>;
-////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
-////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
-////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
-////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
-////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
-////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
-////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
-////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
-////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
-//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
-//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
-//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
-defm V_SAD_U32 : VOP3Inst <vop3<0x15d>, "V_SAD_U32",
+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "v_min3_f32", []>;
+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "v_min3_i32", []>;
+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "v_min3_u32", []>;
+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "v_max3_f32", []>;
+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "v_max3_i32", []>;
+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "v_max3_u32", []>;
+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
+defm V_SAD_U32 : VOP3Inst <vop3<0x15d>, "v_sad_u32",
VOP_I32_I32_I32_I32
>;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
defm V_DIV_FIXUP_F32 : VOP3Inst <
- vop3<0x15f>, "V_DIV_FIXUP_F32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
+ vop3<0x15f>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
>;
defm V_DIV_FIXUP_F64 : VOP3Inst <
- vop3<0x160>, "V_DIV_FIXUP_F64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
+ vop3<0x160>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
>;
-defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "V_LSHL_B64",
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
VOP_I64_I64_I32, shl
>;
-defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "V_LSHR_B64",
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
VOP_I64_I64_I32, srl
>;
-defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "V_ASHR_I64",
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
VOP_I64_I64_I32, sra
>;
let isCommutable = 1 in {
-defm V_ADD_F64 : VOP3Inst <vop3<0x164>, "V_ADD_F64",
+defm V_ADD_F64 : VOP3Inst <vop3<0x164>, "v_add_f64",
VOP_F64_F64_F64, fadd
>;
-defm V_MUL_F64 : VOP3Inst <vop3<0x165>, "V_MUL_F64",
+defm V_MUL_F64 : VOP3Inst <vop3<0x165>, "v_mul_f64",
VOP_F64_F64_F64, fmul
>;
-defm V_MIN_F64 : VOP3Inst <vop3<0x166>, "V_MIN_F64",
+defm V_MIN_F64 : VOP3Inst <vop3<0x166>, "v_min_f64",
VOP_F64_F64_F64, fminnum
>;
-defm V_MAX_F64 : VOP3Inst <vop3<0x167>, "V_MAX_F64",
+defm V_MAX_F64 : VOP3Inst <vop3<0x167>, "v_max_f64",
VOP_F64_F64_F64, fmaxnum
>;
} // isCommutable = 1
-defm V_LDEXP_F64 : VOP3Inst <vop3<0x168>, "V_LDEXP_F64",
+defm V_LDEXP_F64 : VOP3Inst <vop3<0x168>, "v_ldexp_f64",
VOP_F64_F64_I32, AMDGPUldexp
>;
let isCommutable = 1 in {
-defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169>, "V_MUL_LO_U32",
+defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169>, "v_mul_lo_u32",
VOP_I32_I32_I32
>;
-defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a>, "V_MUL_HI_U32",
+defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a>, "v_mul_hi_u32",
VOP_I32_I32_I32
>;
-defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b>, "V_MUL_LO_I32",
+defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b>, "v_mul_lo_i32",
VOP_I32_I32_I32
>;
-defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c>, "V_MUL_HI_I32",
+defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c>, "v_mul_hi_i32",
VOP_I32_I32_I32
>;
} // isCommutable = 1
-defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d>, "V_DIV_SCALE_F32", []>;
+defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d>, "v_div_scale_f32", []>;
// Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e>, "V_DIV_SCALE_F64", []>;
+defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e>, "v_div_scale_f64", []>;
-defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f>, "V_DIV_FMAS_F32",
+defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
-defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170>, "V_DIV_FMAS_F64",
+defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170>, "v_div_fmas_f64",
VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
>;
-//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
-//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
-//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
defm V_TRIG_PREOP_F64 : VOP3Inst <
- vop3<0x174>, "V_TRIG_PREOP_F64", VOP_F64_F64_I32, AMDGPUtrig_preop
+ vop3<0x174>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
>;
//===----------------------------------------------------------------------===//
def SI_LOOP : InstSI <
(outs),
(ins SReg_64:$saved, brtarget:$target),
- "SI_LOOP $saved, $target",
+ "si_loop $saved, $target",
[(int_SI_loop i64:$saved, bb:$target)]
>;
def SI_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src),
- "SI_ELSE $dst, $src",
+ "si_else $dst, $src",
[(set i64:$dst, (int_SI_break i64:$src))]
>;
def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, SReg_64:$src),
- "SI_IF_BREAK $dst, $vcc, $src",
+ "si_if_break $dst, $vcc, $src",
[(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
>;
def SI_ELSE_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src0, SReg_64:$src1),
- "SI_ELSE_BREAK $dst, $src0, $src1",
+ "si_else_break $dst, $src0, $src1",
[(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
>;
def SI_END_CF : InstSI <
(outs),
(ins SReg_64:$saved),
- "SI_END_CF $saved",
+ "si_end_cf $saved",
[(int_SI_end_cf i64:$saved)]
>;
def SI_KILL : InstSI <
(outs),
(ins VSrc_32:$src),
- "SI_KILL $src",
+ "si_kill $src",
[(int_AMDGPU_kill f32:$src)]
>;
def SI_INDIRECT_SRC : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
- "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ "si_indirect_src $dst, $temp, $src, $idx, $off",
[]
>;
class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
(outs rc:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
- "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ "si_indirect_dst $dst, $temp, $src, $idx, $off, $val",
[]
> {
let Constraints = "$src = $dst";
def V_SUB_F64 : InstSI <
(outs VReg_64:$dst),
(ins VReg_64:$src0, VReg_64:$src1),
- "V_SUB_F64 $dst, $src0, $src1",
+ "v_sub_f64 $dst, $src0, $src1",
[(set f64:$dst, (fsub f64:$src0, f64:$src1))]
>;
def FCLAMP_SI : AMDGPUShaderInst <
(outs VReg_32:$dst),
(ins VSrc_32:$src0),
- "FCLAMP_SI $dst, $src0",
+ "fclamp_si $dst, $src0",
[]
> {
let usesCustomInserter = 1;
let SubtargetPredicate = isCI in {
// Sea island new arithmetic instructinos
-defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "V_TRUNC_F64",
+defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
VOP_F64_F64, ftrunc
>;
-defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "V_CEIL_F64",
+defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
VOP_F64_F64, fceil
>;
-defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "V_FLOOR_F64",
+defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
VOP_F64_F64, ffloor
>;
-defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "V_RNDNE_F64",
+defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
VOP_F64_F64, frint
>;
-defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "V_QSAD_PK_U16_U8",
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
VOP_I32_I32_I32
>;
-defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "V_MQSAD_U16_U8",
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
VOP_I32_I32_I32
>;
-defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "V_MQSAD_U32_U8",
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
VOP_I32_I32_I32
>;
-defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "V_MAD_U64_U32",
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
VOP_I64_I32_I32_I64
>;
// XXX - Does this set VCC?
-defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "V_MAD_I64_I32",
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
VOP_I64_I32_I32_I64
>;
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
; SI-CHECK: {{^}}v4i32_kernel_arg:
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK: buffer_store_dwordx4
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
; SI-CHECK: {{^}}v4f32_kernel_arg:
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK: buffer_store_dwordx4
define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out
; instructions with B64, U64, and I64 take 64-bit operands.
; FUNC-LABEL: {{^}}local_address_load:
-; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
+; CHECK: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
+; CHECK: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
}
; FUNC-LABEL: {{^}}local_address_gep:
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 [[VPTR]]
+; CHECK: s_add_i32 [[SPTR:s[0-9]]]
+; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: ds_read_b32 [[VPTR]]
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 %offset
}
; FUNC-LABEL: {{^}}local_address_gep_const_offset:
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
+; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: ds_read_b32 v{{[0-9]+}}, [[VPTR]], 0x4,
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 1
; Offset too large, can't fold into 16-bit immediate offset.
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 [[VPTR]]
+; CHECK: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
+; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: ds_read_b32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 16385
}
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
-; CHECK: V_CMP_NE_I32
-; CHECK-NOT: V_CMP_NE_I32
+; CHECK: v_cmp_ne_i32
+; CHECK-NOT: v_cmp_ne_i32
; CHECK: V_CNDMASK_B32
define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
%cmp = icmp ne i32 addrspace(3)* %lds, null
}
; FUNC-LABEL: {{^}}mul_32bit_ptr:
-; CHECK: V_MUL_LO_I32
-; CHECK-NEXT: V_ADD_I32_e32
-; CHECK-NEXT: DS_READ_B32
+; CHECK: v_mul_lo_i32
+; CHECK-NEXT: v_add_i32_e32
+; CHECK-NEXT: ds_read_b32
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
%ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
%val = load float addrspace(3)* %ptr
@g_lds = addrspace(3) global float zeroinitializer, align 4
; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; CHECK: ds_read_b32 v{{[0-9]+}}, [[REG]]
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
%val = load float addrspace(3)* @g_lds
store float %val, float addrspace(1)* %out
@dst = addrspace(3) global [16384 x i32] zeroinitializer
; FUNC-LABEL: {{^}}global_ptr:
-; CHECK: DS_WRITE_B32
+; CHECK: ds_write_b32
define void @global_ptr() nounwind {
store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
; FUNC-LABEL: {{^}}local_address_store:
-; CHECK: DS_WRITE_B32
+; CHECK: ds_write_b32
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
store i32 %val, i32 addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_address_gep_store:
-; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
-; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
-; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
+; CHECK: s_add_i32 [[SADDR:s[0-9]+]],
+; CHECK: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
+; CHECK: ds_write_b32 [[ADDR]], v{{[0-9]+}},
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
%gep = getelementptr i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
}
; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
+; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
+; CHECK: ds_write_b32 [[VPTR]], [[VAL]], 0x4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
; Offset too large, can't fold into 16-bit immediate offset.
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
+; CHECK: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
+; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: ds_write_b32 [[VPTR]], v{{[0-9]+}}, 0
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
; SI-CHECK: {{^}}f64_kernel_arg:
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
+; SI-CHECK: buffer_store_dwordx2
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
store double %in, double addrspace(1)* %out
;FUNC-LABEL: {{^}}test1:
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
;SI-CHECK-NOT: [[REG]]
-;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
+;SI-CHECK: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
entry:
%0 = add <8 x i32> %a, %b
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
entry:
%0 = add <16 x i32> %a, %b
}
; FUNC-LABEL: {{^}}add64:
-; SI-CHECK: S_ADD_U32
-; SI-CHECK: S_ADDC_U32
+; SI-CHECK: s_add_u32
+; SI-CHECK: s_addc_u32
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = add i64 %a, %b
ret void
}
-; The V_ADDC_U32 and V_ADD_I32 instruction can't read SGPRs, because they
+; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
; use VCC. The test is designed so that %a will be stored in an SGPR and
; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
; to a VGPR before doing the add.
; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
-; SI-CHECK-NOT: V_ADDC_U32_e32 s
+; SI-CHECK-NOT: v_addc_u32_e32 s
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
%0 = load i64 addrspace(1)* %in
; Test i64 add inside a branch.
; FUNC-LABEL: {{^}}add64_in_branch:
-; SI-CHECK: S_ADD_U32
-; SI-CHECK: S_ADDC_U32
+; SI-CHECK: s_add_u32
+; SI-CHECK: s_addc_u32
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
declare i32 @llvm.r600.read.tidig.x() readnone
; SI-LABEL: {{^}}test_i64_vreg:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
; Check that the SGPR add operand is correctly moved to a VGPR.
; SI-LABEL: {{^}}sgpr_operand:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
%foo = load i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
; SGPR as other operand.
;
; SI-LABEL: {{^}}sgpr_operand_reversed:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
%foo = load i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo
; SI-LABEL: {{^}}test_v2i64_sreg:
-; SI: S_ADD_U32
-; SI: S_ADDC_U32
-; SI: S_ADD_U32
-; SI: S_ADDC_U32
+; SI: s_add_u32
+; SI: s_addc_u32
+; SI: s_add_u32
+; SI: s_addc_u32
define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
%result = add <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
}
; SI-LABEL: {{^}}test_v2i64_vreg:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI: v_add_i32
+; SI: v_addc_u32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
}
; SI-LABEL: {{^}}trunc_i64_add_to_i32:
-; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
-; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI-NOT: ADDC
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: s_load_dword s[[SREG0:[0-9]+]]
+; SI: s_load_dword s[[SREG1:[0-9]+]]
+; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI-NOT: addc
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = add i64 %b, %a
%trunc = trunc i64 %add to i32
; already in a VGPR after the first read.
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
-; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
-; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]] offset:12
-; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
+; CHECK: s_load_dword [[SREG1:s[0-9]+]],
+; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
}
; FUNC-LABEL: {{^}}s_and_i32:
-; SI: S_AND_B32
+; SI: s_and_b32
define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%and = and i32 %a, %b
store i32 %and, i32 addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}s_and_constant_i32:
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}v_and_i32:
-; SI: V_AND_B32
+; SI: v_and_b32
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
}
; FUNC-LABEL: {{^}}v_and_constant_i32:
-; SI: V_AND_B32
+; SI: v_and_b32
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 1234567
}
; FUNC-LABEL: {{^}}s_and_i64:
-; SI: S_AND_B64
+; SI: s_and_b64
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
; FIXME: Should use SGPRs
; FUNC-LABEL: {{^}}s_and_i1:
-; SI: V_AND_B32
+; SI: v_and_b32
define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
%and = and i1 %a, %b
store i1 %and, i1 addrspace(1)* %out
}
; FUNC-LABEL: {{^}}s_and_constant_i64:
-; SI: S_AND_B64
+; SI: s_and_b64
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%and = and i64 %a, 281474976710655
store i64 %and, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}v_and_i64:
-; SI: V_AND_B32
-; SI: V_AND_B32
+; SI: v_and_b32
+; SI: v_and_b32
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%b = load i64 addrspace(1)* %bptr, align 8
}
; FUNC-LABEL: {{^}}v_and_i64_br:
-; SI: V_AND_B32
-; SI: V_AND_B32
+; SI: v_and_b32
+; SI: v_and_b32
define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
}
; FUNC-LABEL: {{^}}v_and_constant_i64:
-; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
; FIXME: Replace and 0 with mov 0
; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 64
}
; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
-; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}anyext_i1_i32:
-; CHECK: V_CNDMASK_B32_e64
+; CHECK: v_cndmask_b32_e64
define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp eq i32 %cond, 0
; FIXME: We end up with zero argument for ADD, because
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset. We should fold this into the store.
-; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
-; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
+; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
-; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 16
-; SI-PROMOTE: DS_WRITE_B32 [[PTRREG]]
+; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
+; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
%tid = call i32 @llvm.SI.tid() readnone
declare i32 @llvm.SI.tid() readnone
; SI-LABEL: {{^}}test_array_ptr_calc:
-; SI: V_MUL_LO_I32
-; SI: V_MUL_HI_I32
+; SI: v_mul_lo_i32
+; SI: v_mul_hi_i32
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.SI.tid() readnone
%a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
-; SI: S_ENDPGM
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
-; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
+; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
+; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
-; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
-; SI: S_ENDPGM
+; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
-; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
-; SI: S_ENDPGM
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
+; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
-; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
-; SI: S_ENDPGM
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
+; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
+; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
; FUNC-LABEL: {{^}}atomic_add_local:
; R600: LDS_ADD *
-; SI: DS_ADD_U32
+; SI: ds_add_u32
define void @atomic_add_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
ret void
; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
; R600: LDS_ADD *
-; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
; FUNC-LABEL: {{^}}atomic_add_ret_local:
; R600: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32
+; SI: ds_add_rtn_u32
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
; R600: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
; FUNC-LABEL: {{^}}atomic_sub_local:
; R600: LDS_SUB *
-; SI: DS_SUB_U32
+; SI: ds_sub_u32
define void @atomic_sub_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
ret void
; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
; R600: LDS_SUB *
-; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
; FUNC-LABEL: {{^}}atomic_sub_ret_local:
; R600: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32
+; SI: ds_sub_rtn_u32
define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
; R600: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
+; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
; R600-CHECK: {{^}}bfi_def:
; R600-CHECK: BFI_INT
; SI-CHECK: @bfi_def
-; SI-CHECK: V_BFI_B32
+; SI-CHECK: v_bfi_b32
define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %x, -1
; R600-CHECK: {{^}}bfi_sha256_ch:
; R600-CHECK: BFI_INT
; SI-CHECK: @bfi_sha256_ch
-; SI-CHECK: V_BFI_B32
+; SI-CHECK: v_bfi_b32
define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %y, %z
; R600-CHECK: {{^}}bfi_sha256_ma:
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
+; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
%1 = load <32 x i8> addrspace(2)* %0
}
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
}
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%val = load <2 x i32> addrspace(1)* %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
}
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
%val = load double addrspace(1)* %in, align 8
%add = fadd double %val, 4.0
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
; FUNC-LABEL: @test_bswap_i32
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; SI-DAG: V_ALIGNBIT_B32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
-; SI-DAG: V_ALIGNBIT_B32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
-; SI-DAG: S_MOV_B32 [[K:s[0-9]+]], 0xff00ff
-; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
+; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
+; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff
+; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
}
; FUNC-LABEL: @test_bswap_v2i32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI: S_ENDPGM
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
%val = load <2 x i32> addrspace(1)* %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
}
; FUNC-LABEL: @test_bswap_v4i32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI: S_ENDPGM
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
%val = load <4 x i32> addrspace(1)* %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
}
; FUNC-LABEL: @test_bswap_v8i32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_BFI_B32
-; SI: S_ENDPGM
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
%val = load <8 x i32> addrspace(1)* %in, align 32
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
; R600-CHECK: MOV
; R600-CHECK-NOT: MOV
; SI-CHECK: {{^}}build_vector2:
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
+; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
; R600-CHECK: MOV
; R600-CHECK-NOT: MOV
; SI-CHECK: {{^}}build_vector4:
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
-; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
+; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
+; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
+; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
; OPT: mul nsw i32
; OPT-NEXT: sext
; SI-LLC-LABEL: {{^}}test:
-; SI-LLC: S_MUL_I32
-; SI-LLC-NOT: MUL
+; SI-LLC: s_mul_i32
+; SI-LLC-NOT: mul
define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3
declare float @llvm.fabs.f32(float) #1
; FUNC-LABEL: @commute_add_imm_fabs_f32
-; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
-; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
+; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
}
; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
-; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: V_MUL_F32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
-; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
+; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
}
; FUNC-LABEL: @commute_mul_imm_fneg_f32
-; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: V_MUL_F32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
-; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
+; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
; instructions that access scratch memory. Bit 23, which is the add_tid_enable
; bit, is only set for scratch access, so we can check for the absence of this
; value if we want to ensure scratch memory is not being used.
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}test_concat_v2i32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16
}
; FUNC-LABEL: {{^}}test_concat_v4i32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32
}
; FUNC-LABEL: {{^}}test_concat_v8i32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
%concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64
}
; FUNC-LABEL: {{^}}test_concat_v16i32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
%concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128
}
; FUNC-LABEL: {{^}}test_concat_v1f32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
%concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}test_concat_v2f32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16
}
; FUNC-LABEL: {{^}}test_concat_v4f32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32
}
; FUNC-LABEL: {{^}}test_concat_v8f32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64
}
; FUNC-LABEL: {{^}}test_concat_v16f32:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128
}
; FUNC-LABEL: {{^}}test_concat_v1i64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
}
; FUNC-LABEL: {{^}}test_concat_v2i64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
}
; FUNC-LABEL: {{^}}test_concat_v4i64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
}
; FUNC-LABEL: {{^}}test_concat_v8i64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
}
; FUNC-LABEL: {{^}}test_concat_v16i64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
}
; FUNC-LABEL: {{^}}test_concat_v1f64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
}
; FUNC-LABEL: {{^}}test_concat_v2f64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
}
; FUNC-LABEL: {{^}}test_concat_v4f64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
}
; FUNC-LABEL: {{^}}test_concat_v8f64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
}
; FUNC-LABEL: {{^}}test_concat_v16f64:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
}
; FUNC-LABEL: {{^}}test_concat_v1i1:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
%concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
store <2 x i1> %concat, <2 x i1> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}test_concat_v2i1:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
%concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i1> %concat, <4 x i1> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}test_concat_v4i1:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
%concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i1> %concat, <8 x i1> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}test_concat_v8i1:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
%concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i1> %concat, <16 x i1> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}test_concat_v16i1:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
%concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i1> %concat, <32 x i1> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}test_concat_v32i1:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
%concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
store <64 x i1> %concat, <64 x i1> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}test_concat_v1i16:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
%concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}test_concat_v2i16:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
%concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}test_concat_v4i16:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
%concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16
}
; FUNC-LABEL: {{^}}test_concat_v8i16:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
%concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32
}
; FUNC-LABEL: {{^}}test_concat_v16i16:
-; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
-; SI-NOT: MOVREL
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
%concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64
; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_copy_v4i8:
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
}
; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
; After scalarizing v4i8 loads is fixed.
-; XSI: BUFFER_LOAD_DWORD
+; XSI: buffer_load_dword
; XSI: V_BFE
; XSI: V_ADD
; XSI: V_ADD
; XSI: V_ADD
-; XSI: BUFFER_STORE_DWORD
-; XSI: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
+; XSI: buffer_store_dword
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
-; XSI: BUFFER_LOAD_DWORD
+; XSI: buffer_load_dword
; XSI: BFE
-; XSI: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
; XSI: V_ADD
-; XSI: BUFFER_STORE_DWORD
-; XSI-NEXT: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
+; XSI-NEXT: buffer_store_dword
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
}
; FUNC-LABEL: {{^}}test_copy_v3i8:
-; SI-NOT: BFE
-; SI-NOT: BFI
-; SI: S_ENDPGM
+; SI-NOT: bfe
+; SI-NOT: bfi
+; SI: s_endpgm
define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8> addrspace(1)* %in, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: s_endpgm
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load volatile <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx4
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i32:
-; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
-; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[SVAL:s[0-9]+]],
+; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
-; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
-; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL0:v[0-9]+]],
+; SI: buffer_load_dword [[VAL1:v[0-9]+]],
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
}
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
-; SI-NEXT: S_WAITCNT
-; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
-; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL0:v[0-9]+]],
+; SI-NEXT: s_waitcnt
+; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; SI-NEXT: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
%val0 = load i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
}
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
}
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
}
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
}
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
+; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
; but there are some cases when the should be allowed.
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
-; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[RESULT]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
+; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
entry:
declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i64:
-; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
}
; FUNC-LABEL: {{^}}v_ctpop_i64:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
-; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
}
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_ENDPGM
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_endpgm
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
}
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_ENDPGM
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_endpgm
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
}
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: S_ENDPGM
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
}
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: S_ENDPGM
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
; but there are some cases when the should be allowed.
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
-; SI: S_BCNT1_I32_B64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
-; SI: V_MOV_B32_e32 v[[VLO:[0-9]+]], [[RESULT]]
-; SI: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
-; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
-; SI: S_ENDPGM
+; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
+; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
+; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
+; SI: s_endpgm
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_ff1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbl_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx4
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}load_i8_to_f32:
-; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORD [[CONV]],
+; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dword [[CONV]],
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8 addrspace(1)* %in, align 1
%cvt = uitofp i8 %load to float
}
; SI-LABEL: {{^}}load_v2i8_to_v2f32:
-; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-NOT: AND
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI: buffer_load_ushort [[LOADREG:v[0-9]+]],
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-NOT: and
+; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <2 x i8> %load to <2 x float>
}
; SI-LABEL: {{^}}load_v3i8_to_v3f32:
-; SI-NOT: BFE
-; SI-NOT: V_CVT_F32_UBYTE3_e32
-; SI-DAG: V_CVT_F32_UBYTE2_e32
-; SI-DAG: V_CVT_F32_UBYTE1_e32
-; SI-DAG: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-NOT: bfe
+; SI-NOT: v_cvt_f32_ubyte3_e32
+; SI-DAG: v_cvt_f32_ubyte2_e32
+; SI-DAG: v_cvt_f32_ubyte1_e32
+; SI-DAG: v_cvt_f32_ubyte0_e32
+; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <3 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <3 x i8> %load to <3 x float>
}
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
-; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
-; BUFFER_LOAD_DWORD requires dword alignment.
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; We can't use buffer_load_dword here, because the load is byte aligned, and
+; buffer_load_dword requires dword alignment.
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
ret void
}
-; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
+; XXX - This should really still be able to use the v_cvt_f32_ubyte0
; for each component, but computeKnownBits doesn't handle vectors very
; well.
; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: V_CVT_F32_UBYTE0_e32
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
; XXX - replace with this when v4i8 loads aren't scalarized anymore.
-; XSI: BUFFER_LOAD_DWORD
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
+; XSI: buffer_load_dword
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; SI: s_endpgm
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
; Make sure this doesn't crash.
; SI-LABEL: {{^}}load_v7i8_to_v7f32:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <7 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
}
; SI-LABEL: {{^}}load_v8i8_to_v8f32:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <8 x i8> %load to <8 x float>
}
; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
-; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
-; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
-; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
-; SI: BUFFER_STORE_DWORD [[CONV]],
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
+; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
+; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 2
; Function Attrs: nounwind
; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
; CHECK: BB0_1:
-; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
-; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]]
-; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
-; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]]
-; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
-; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]]
-; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
-; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]]
-; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
-; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]]
+; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
-; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
-; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
-; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]] offset:256
-; CHECK: S_ENDPGM
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
+; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
+; CHECK: s_endpgm
define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
entry:
%x.i = tail call i32 @llvm.r600.read.tidig.x() #0
@lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
; SI-LABEL: @simple_read2_f32
-; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @simple_read2_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2_f32_max_offset
-; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2_f32_too_far
-; SI-NOT DS_READ2_B32
-; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
-; SI: S_ENDPGM
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
+; SI: s_endpgm
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2_f32_x2
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
; Make sure there is an instruction between the two sets of reads.
; SI-LABEL: @simple_read2_f32_x2_barrier
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
-; SI: S_BARRIER
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: s_barrier
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
; element results in only folding the inner pair.
; SI-LABEL: @simple_read2_f32_x2_nonzero_base
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
; register. We can't safely merge this.
; SI-LABEL: @read2_ptr_is_subreg_arg_f32
-; SI-NOT: DS_READ2_B32
-; SI: DS_READ_B32
-; SI: DS_READ_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
; subregisters.
; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32
-; SI-NOT: DS_READ2_B32
-; SI: DS_READ_B32
-; SI: DS_READ_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
}
; We should be able to merge in this case, but probably not worth the effort.
-; SI-NOT: DS_READ2_B32
-; SI: DS_READ_B32
-; SI: DS_READ_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
}
; SI-LABEL: @simple_read2_f32_volatile_0
-; SI-NOT DS_READ2_B32
-; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
-; SI: S_ENDPGM
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
+; SI: s_endpgm
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2_f32_volatile_1
-; SI-NOT DS_READ2_B32
-; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
-; SI: S_ENDPGM
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
+; SI: s_endpgm
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
; XXX: This isn't really testing anything useful now. I think CI
; allows unaligned LDS accesses, which would be a problem here.
; SI-LABEL: @unaligned_read2_f32
-; SI-NOT: DS_READ2_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2_b32
+; SI: s_endpgm
define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
}
; SI-LABEL: @misaligned_2_simple_read2_f32
-; SI-NOT: DS_READ2_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2_b32
+; SI: s_endpgm
define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
}
; SI-LABEL: @simple_read2_f64
-; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
-; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
-; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
+; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
define void @simple_read2_f64(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2_f64_max_offset
-; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
-; SI: S_ENDPGM
+; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: s_endpgm
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2_f64_too_far
-; SI-NOT DS_READ2_B64
-; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
-; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
-; SI: S_ENDPGM
+; SI-NOT ds_read2_b64
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
+; SI: s_endpgm
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
; Alignment only 4
; SI-LABEL: @misaligned_read2_f64
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
+; SI: s_endpgm
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
; SI-LABEL: @load_constant_adjacent_offsets
-; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
%val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
%val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
}
; SI-LABEL: @load_constant_disjoint_offsets
-; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
%val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
%val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
; SI-LABEL: @load_misaligned64_constant_offsets
-; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
; SI-LABEL: @load_misaligned64_constant_large_offsets
-; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
-; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
-; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
-; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
-; SI: S_ENDPGM
+; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI: s_endpgm
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
; SI-LABEL: @simple_read2st64_f32_0_1
-; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2st64_f32_1_2
-; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
}
; SI-LABEL: @simple_read2st64_f32_max_offset
-; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
}
; SI-LABEL: @simple_read2st64_f32_over_max_offset
-; SI-NOT: DS_READ2ST64_B32
-; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
-; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
-; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]]
-; SI: S_ENDPGM
+; SI-NOT: ds_read2st64_b32
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
+; SI: s_endpgm
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
}
; SI-LABEL: @odd_invalid_read2st64_f32_0
-; SI-NOT: DS_READ2ST64_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2st64_b32
+; SI: s_endpgm
define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
}
; SI-LABEL: @odd_invalid_read2st64_f32_1
-; SI-NOT: DS_READ2ST64_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_read2st64_b32
+; SI: s_endpgm
define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
}
; SI-LABEL: @simple_read2st64_f64_0_1
-; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
}
; SI-LABEL: @simple_read2st64_f64_1_2
-; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
; Alignment only
; SI-LABEL: @misaligned_read2st64_f64
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
-; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
+; SI: s_endpgm
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
; SI-LABEL: @simple_read2st64_f64_max_offset
-; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
-; SI: S_WAITCNT lgkmcnt(0)
-; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 256
}
; SI-LABEL: @simple_read2st64_f64_over_max_offset
-; SI-NOT: DS_READ2ST64_B64
-; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
-; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
-; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
-; SI: S_ENDPGM
+; SI-NOT: ds_read2st64_b64
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
+; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
}
; SI-LABEL: @invalid_read2st64_f64_odd_offset
-; SI-NOT: DS_READ2ST64_B64
-; SI: S_ENDPGM
+; SI-NOT: ds_read2st64_b64
+; SI: s_endpgm
define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
; stride in elements, not bytes, is a multiple of 64.
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
-; SI-NOT: DS_READ2ST_B64
-; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
-; SI: S_ENDPGM
+; SI-NOT: ds_read2st_b64
+; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: s_endpgm
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
; SI-LABEL: @simple_write2_one_val_f32
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_f32
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_f32_volatile_0
-; SI-NOT: DS_WRITE2_B32
-; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
-; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
-; SI: S_ENDPGM
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
+; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_f32_volatile_1
-; SI-NOT: DS_WRITE2_B32
-; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
-; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
-; SI: S_ENDPGM
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
+; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
; 2 data subregisters from different super registers.
; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
-; SI: BUFFER_LOAD_DWORDX2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
-; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
+; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_subreg2_f32
-; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_subreg4_f32
-; SI-DAG: BUFFER_LOAD_DWORDX4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_max_offset_f32
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: s_endpgm
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_too_far_f32
-; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
-; SI: S_ENDPGM
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
+; SI: s_endpgm
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_f32_x2
-; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
-; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
-; SI: S_ENDPGM
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
+; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
+; SI: s_endpgm
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
}
; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
-; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
-; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
-; SI: S_ENDPGM
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
+; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
+; SI: s_endpgm
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
}
; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
-; SI-NOT: DS_WRITE2_B32
-; SI: DS_WRITE_B32
-; SI: DS_WRITE_B32
-; SI: S_ENDPGM
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: s_endpgm
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
}
; SI-LABEL: @simple_write2_one_val_f64
-; SI: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]],
-; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @misaligned_simple_write2_one_val_f64
-; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
-; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
+; SI: s_endpgm
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2_two_val_f64
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
; SI-LABEL: @store_constant_adjacent_offsets
-; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
define void @store_constant_adjacent_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
}
; SI-LABEL: @store_constant_disjoint_offsets
-; SI-DAG: V_MOV_B32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
-; SI-DAG: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: DS_WRITE2_B32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
+; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
+; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
define void @store_constant_disjoint_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
; SI-LABEL: @store_misaligned64_constant_offsets
-; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
define void @store_misaligned64_constant_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
; SI-LABEL: @store_misaligned64_constant_large_offsets
-; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
-; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
-; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI: S_ENDPGM
+; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
+; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_endpgm
define void @store_misaligned64_constant_large_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
+; SI: s_endpgm
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2st64_two_val_f32_2_5
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
+; SI: s_endpgm
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2st64_two_val_max_offset_f32
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @simple_write2st64_two_val_max_offset_f64
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
-; SI-DAG: V_ADD_I32_e32 [[VPTR:v[0-9]+]],
-; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
+; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
+; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
+; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
}
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
-; SI-NOT: DS_WRITE2ST64_B64
-; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
-; SI: S_ENDPGM
+; SI-NOT: ds_write2st64_b64
+; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
+; SI: s_endpgm
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
}
; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
-; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in, align 8
%ext = sext i8 %a to i64
}
; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
-; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in, align 8
%ext = sext i16 %a to i64
}
; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in, align 8
%ext = sext i32 %a to i64
}
; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
-; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
-; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
+; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
+; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
+; SI: buffer_store_dwordx2
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in, align 8
%ext = zext i8 %a to i64
}
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
-; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
-; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
+; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
+; SI-DAG: buffer_load_ushort [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
+; SI: buffer_store_dwordx2
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in, align 8
%ext = zext i16 %a to i64
}
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
-; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
+; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
+; SI-DAG: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
+; SI: buffer_store_dwordx2
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in, align 8
%ext = zext i32 %a to i64
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
-; SI: BUFFER_STORE_SHORT
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
%p0 = extractelement <2 x i16> %foo, i32 0
%p1 = extractelement <2 x i16> %foo, i32 1
}
; FUNC-LABEL: {{^}}extract_vector_elt_v4i16:
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
-; SI: BUFFER_STORE_SHORT
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
%p0 = extractelement <4 x i16> %foo, i32 0
%p1 = extractelement <4 x i16> %foo, i32 2
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
; FUNC-LABEL: {{^}}v_fabs_f64:
-; SI: V_AND_B32
-; SI: S_ENDPGM
+; SI: v_and_b32
+; SI: s_endpgm
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tidext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}fabs_f64:
-; SI: V_AND_B32
-; SI-NOT: V_AND_B32
-; SI: S_ENDPGM
+; SI: v_and_b32
+; SI-NOT: v_and_b32
+; SI: s_endpgm
define void @fabs_f64(double addrspace(1)* %out, double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
store double %fabs, double addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fabs_v2f64:
-; SI: V_AND_B32
-; SI: V_AND_B32
-; SI: S_ENDPGM
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: s_endpgm
define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
store <2 x double> %fabs, <2 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fabs_v4f64:
-; SI: V_AND_B32
-; SI: V_AND_B32
-; SI: V_AND_B32
-; SI: V_AND_B32
-; SI: S_ENDPGM
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: s_endpgm
define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
store <4 x double> %fabs, <4 x double> addrspace(1)* %out
}
; SI-LABEL: {{^}}fabs_fold_f64:
-; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NOT: AND
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
-; SI: S_ENDPGM
+; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: and
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: s_endpgm
define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @llvm.fabs.f64(double %in0)
%fmul = fmul double %fabs, %in1
}
; SI-LABEL: {{^}}fabs_fn_fold_f64:
-; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NOT: AND
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
-; SI: S_ENDPGM
+; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: and
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: s_endpgm
define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @fabs(double %in0)
%fmul = fmul double %fabs, %in1
}
; FUNC-LABEL: {{^}}fabs_free_f64:
-; SI: V_AND_B32
-; SI: S_ENDPGM
+; SI: v_and_b32
+; SI: s_endpgm
define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
%bc= bitcast i64 %in to double
%fabs = call double @llvm.fabs.f64(double %bc)
}
; FUNC-LABEL: {{^}}fabs_fn_free_f64:
-; SI: V_AND_B32
-; SI: S_ENDPGM
+; SI: v_and_b32
+; SI: s_endpgm
define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
%bc= bitcast i64 %in to double
%fabs = call double @fabs(double %bc)
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
-; SI: V_AND_B32
+; SI: v_and_b32
define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
-; SI: V_AND_B32
+; SI: v_and_b32
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
; FUNC-LABEL: {{^}}fabs_f32:
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI: V_AND_B32
+; SI: v_and_b32
define void @fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
store float %fabs, float addrspace(1)* %out
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI: V_AND_B32
-; SI: V_AND_B32
+; SI: v_and_b32
+; SI: v_and_b32
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI: V_AND_B32
-; SI: V_AND_B32
-; SI: V_AND_B32
-; SI: V_AND_B32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
}
; SI-LABEL: {{^}}fabs_fn_fold:
-; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: AND
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: and
+; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @fabs(float %in0)
%fmul = fmul float %fabs, %in1
}
; SI-LABEL: {{^}}fabs_fold:
-; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: AND
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: and
+; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1
; FUNC-LABEL: {{^}}fadd_f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI: V_ADD_F32
+; SI: v_add_f32
define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
%add = fadd float %a, %b
store float %add, float addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}fadd_v2f32:
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
-; SI: V_ADD_F32
-; SI: V_ADD_F32
+; SI: v_add_f32
+; SI: v_add_f32
define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
%add = fadd <2 x float> %a, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1)* %in, align 16
; R600: ADD
; R600: ADD
; R600: ADD
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
-; SI: V_ADD_F32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
%add = fadd <8 x float> %a, %b
store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fadd_f64:
-; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
+; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
; FUNC-LABEL: {{^}}fceil_f32:
-; SI: V_CEIL_F32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: CEIL {{\*? *}}[[RESULT]]
define void @fceil_f32(float addrspace(1)* %out, float %x) {
}
; FUNC-LABEL: {{^}}fceil_v2f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
}
; FUNC-LABEL: {{^}}fceil_v3f32:
-; FIXME-SI: V_CEIL_F32_e32
-; FIXME-SI: V_CEIL_F32_e32
-; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: v_ceil_f32_e32
+; FIXME-SI: v_ceil_f32_e32
+; FIXME-SI: v_ceil_f32_e32
; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
}
; FUNC-LABEL: {{^}}fceil_v4f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
}
; FUNC-LABEL: {{^}}fceil_v8f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
}
; FUNC-LABEL: {{^}}fceil_v16f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}fceil_f64:
-; CI: V_CEIL_F64_e32
-; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI-DAG: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
+; CI: v_ceil_f64_e32
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_f64
+; SI: cndmask_b32
+; SI: cmp_ne_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: v_add_f64
define void @fceil_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fceil_v2f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
}
; FIXME-FUNC-LABEL: {{^}}fceil_v3f64:
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: v_ceil_f64_e32
+; FIXME-CI: v_ceil_f64_e32
+; FIXME-CI: v_ceil_f64_e32
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; }
; FUNC-LABEL: {{^}}fceil_v4f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fceil_v8f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fceil_v16f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}flt_f64:
-; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
}
; CHECK: {{^}}fle_f64:
-; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
}
; CHECK: {{^}}fgt_f64:
-; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
}
; CHECK: {{^}}fge_f64:
-; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
}
; CHECK: {{^}}fne_f64:
-; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
}
; CHECK: {{^}}feq_f64:
-; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+; CHECK: v_cmp_eq_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fconst_f64:
-; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
-; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
+; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
+; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r1 = load double addrspace(1)* %in
; Try to identify arg based on higher address.
; FUNC-LABEL: {{^}}test_copysign_f32:
-; SI: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
-; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
-; SI-DAG: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
-; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
+; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
+; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BFI_INT
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
}
; FUNC-LABEL: {{^}}test_copysign_v2f32:
-; SI: S_ENDPGM
+; SI: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
}
; FUNC-LABEL: {{^}}test_copysign_v4f32:
-; SI: S_ENDPGM
+; SI: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
; FUNC-LABEL: {{^}}test_copysign_f64:
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
-; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
-; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
-; SI: S_ENDPGM
+; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; SI: s_endpgm
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
store double %result, double addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}test_copysign_v2f64:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}test_copysign_v4f64:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fdiv float %a, %b
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fdiv <2 x float> %a, %b
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fdiv_f64:
-; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}ffloor_f64:
-; CI: V_FLOOR_F64_e32
+; CI: v_floor_f64_e32
-; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI-DAG: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_LT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_lt_f64
+; SI: cndmask_b32
+; SI: cmp_ne_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: v_add_f64
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.floor.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
}
; FUNC-LABEL: {{^}}ffloor_v2f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
}
; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
-; FIXME-CI: V_FLOOR_F64_e32
-; FIXME-CI: V_FLOOR_F64_e32
-; FIXME-CI: V_FLOOR_F64_e32
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; }
; FUNC-LABEL: {{^}}ffloor_v4f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}ffloor_v8f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}ffloor_v16f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
; CHECK-LABEL: {{^}}branch_use_flat_i32:
-; CHECK: FLAT_STORE_DWORD {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
-; CHECK: S_ENDPGM
+; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
+; CHECK: s_endpgm
define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
entry:
%cmp = icmp ne i32 %c, 0
; remove generic pointers.
; CHECK-LABEL: {{^}}store_flat_i32:
-; CHECK: V_MOV_B32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
-; CHECK: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
-; CHECK: V_MOV_B32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
-; CHECK: FLAT_STORE_DWORD v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
+; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
store i32 %x, i32 addrspace(4)* %fptr, align 4
}
; CHECK-LABEL: {{^}}store_flat_i64:
-; CHECK: FLAT_STORE_DWORDX2
+; CHECK: flat_store_dwordx2
define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
store i64 %x, i64 addrspace(4)* %fptr, align 8
}
; CHECK-LABEL: {{^}}store_flat_v4i32:
-; CHECK: FLAT_STORE_DWORDX4
+; CHECK: flat_store_dwordx4
define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
}
; CHECK-LABEL: {{^}}store_flat_trunc_i16:
-; CHECK: FLAT_STORE_SHORT
+; CHECK: flat_store_short
define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
%y = trunc i32 %x to i16
}
; CHECK-LABEL: {{^}}store_flat_trunc_i8:
-; CHECK: FLAT_STORE_BYTE
+; CHECK: flat_store_byte
define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
%y = trunc i32 %x to i8
; CHECK-LABEL @load_flat_i32:
-; CHECK: FLAT_LOAD_DWORD
+; CHECK: flat_load_dword
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
%fload = load i32 addrspace(4)* %fptr, align 4
}
; CHECK-LABEL @load_flat_i64:
-; CHECK: FLAT_LOAD_DWORDX2
+; CHECK: flat_load_dwordx2
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
%fload = load i64 addrspace(4)* %fptr, align 4
}
; CHECK-LABEL @load_flat_v4i32:
-; CHECK: FLAT_LOAD_DWORDX4
+; CHECK: flat_load_dwordx4
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
%fload = load <4 x i32> addrspace(4)* %fptr, align 4
}
; CHECK-LABEL @sextload_flat_i8:
-; CHECK: FLAT_LOAD_SBYTE
+; CHECK: flat_load_sbyte
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
%fload = load i8 addrspace(4)* %fptr, align 4
}
; CHECK-LABEL @zextload_flat_i8:
-; CHECK: FLAT_LOAD_UBYTE
+; CHECK: flat_load_ubyte
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
%fload = load i8 addrspace(4)* %fptr, align 4
}
; CHECK-LABEL @sextload_flat_i16:
-; CHECK: FLAT_LOAD_SSHORT
+; CHECK: flat_load_sshort
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
%fload = load i16 addrspace(4)* %fptr, align 4
}
; CHECK-LABEL @zextload_flat_i16:
-; CHECK: FLAT_LOAD_USHORT
+; CHECK: flat_load_ushort
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
%fload = load i16 addrspace(4)* %fptr, align 4
; Check for prologue initializing special SGPRs pointing to scratch.
; CHECK-LABEL: {{^}}store_flat_scratch:
-; CHECK: S_MOVK_I32 flat_scratch_lo, 0
-; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 40
-; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0
-; CHECK: FLAT_STORE_DWORD
-; CHECK: S_BARRIER
-; CHECK: FLAT_LOAD_DWORD
+; CHECK: s_movk_i32 flat_scratch_lo, 0
+; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 40
+; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0
+; CHECK: flat_store_dword
+; CHECK: s_barrier
+; CHECK: flat_load_dword
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4
%x = call i32 @llvm.r600.read.tidig.x() #3
; FUNC-LABEL: {{^}}fma_f64:
-; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
%r0 = load double addrspace(1)* %in1
}
; FUNC-LABEL: {{^}}fma_v2f64:
-; SI: V_FMA_F64
-; SI: V_FMA_F64
+; SI: v_fma_f64
+; SI: v_fma_f64
define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
%r0 = load <2 x double> addrspace(1)* %in1
}
; FUNC-LABEL: {{^}}fma_v4f64:
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
+; SI: v_fma_f64
+; SI: v_fma_f64
+; SI: v_fma_f64
+; SI: v_fma_f64
define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
%r0 = load <4 x double> addrspace(1)* %in1
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
; FUNC-LABEL: {{^}}fma_f32:
-; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
; EG: FMA {{\*? *}}[[RES]]
}
; FUNC-LABEL: {{^}}fma_v2f32:
-; SI: V_FMA_F32
-; SI: V_FMA_F32
+; SI: v_fma_f32
+; SI: v_fma_f32
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
}
; FUNC-LABEL: {{^}}fma_v4f32:
-; SI: V_FMA_F32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].{{[XYZW][XYZW][XYZW][XYZW]}}, {{T[0-9]\.[XYZW]}},
; EG-DAG: FMA {{\*? *}}[[RES]].X
declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
; FUNC-LABEL: @test_fmax_f64
-; SI: V_MAX_F64
+; SI: v_max_f64
define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
%val = call double @llvm.maxnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* %out, align 8
}
; FUNC-LABEL: @test_fmax_v2f64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
+; SI: v_max_f64
+; SI: v_max_f64
define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
}
; FUNC-LABEL: @test_fmax_v4f64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
}
; FUNC-LABEL: @test_fmax_v8f64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
}
; FUNC-LABEL: @test_fmax_v16f64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
-; SI: V_MAX_F64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
define void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
declare double @llvm.maxnum.f64(double, double)
; FUNC-LABEL: @test_fmax_f32
-; SI: V_MAX_F32_e32
+; SI: v_max_f32_e32
define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @test_fmax_v2f32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
}
; FUNC-LABEL: @test_fmax_v4f32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
}
; FUNC-LABEL: @test_fmax_v8f32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
}
; FUNC-LABEL: @test_fmax_v16f32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
-; SI: V_MAX_F32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
}
; FUNC-LABEL: @constant_fold_fmax_f32
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x7fc00000
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
-; SI-NOT: V_MAX_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmax_var_immediate_f32
-; SI: V_MAX_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmax_immediate_var_f32
-; SI: V_MAX_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmax_var_literal_f32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: V_MAX_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmax_literal_var_f32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: V_MAX_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
; FUNC-LABEL: @test_fmin_f64
-; SI: V_MIN_F64
+; SI: v_min_f64
define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
%val = call double @llvm.minnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* %out, align 8
}
; FUNC-LABEL: @test_fmin_v2f64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
+; SI: v_min_f64
+; SI: v_min_f64
define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
}
; FUNC-LABEL: @test_fmin_v4f64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
}
; FUNC-LABEL: @test_fmin_v8f64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
}
; FUNC-LABEL: @test_fmin_v16f64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
-; SI: V_MIN_F64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
define void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
; FUNC-LABEL: @test_fmin_f32
-; SI: V_MIN_F32_e32
+; SI: v_min_f32_e32
define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.minnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @test_fmin_v2f32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
}
; FUNC-LABEL: @test_fmin_v4f32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
}
; FUNC-LABEL: @test_fmin_v8f32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
}
; FUNC-LABEL: @test_fmin_v16f32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
-; SI: V_MIN_F32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
}
; FUNC-LABEL: @constant_fold_fmin_f32
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_nan_nan
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x7fc00000
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_val_nan
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_nan_val
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_p0_p0
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_p0_n0
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0
-; SI-NOT: V_MIN_F32_e32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
-; SI: BUFFER_STORE_DWORD [[REG]]
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmin_var_immediate_f32
-; SI: V_MIN_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmin_immediate_var_f32
-; SI: V_MIN_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmin_var_literal_f32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: V_MIN_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
}
; FUNC-LABEL: @fmin_literal_var_f32
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
-; SI: V_MIN_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}fmul_f32:
; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI: V_MUL_F32
+; SI: v_mul_f32
define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fmul float %a, %b
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-; SI: V_MUL_F32
-; SI: V_MUL_F32
+; SI: v_mul_f32
+; SI: v_mul_f32
define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fmul <2 x float> %a, %b
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_MUL_F32
-; SI: V_MUL_F32
-; SI: V_MUL_F32
-; SI: V_MUL_F32
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in
}
; FUNC-LABEL: {{^}}test_mul_2_k:
-; SI: V_MUL_F32
-; SI-NOT: V_MUL_F32
-; SI: S_ENDPGM
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+; SI: s_endpgm
define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
%y = fmul float %x, 2.0
%z = fmul float %y, 3.0
}
; FUNC-LABEL: {{^}}test_mul_2_k_inv:
-; SI: V_MUL_F32
-; SI-NOT: V_MUL_F32
-; SI-NOT: V_MAD_F32
-; SI: S_ENDPGM
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+; SI-NOT: v_mad_f32
+; SI: s_endpgm
define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
%y = fmul float %x, 3.0
%z = fmul float %y, 2.0
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; FUNC-LABEL: {{^}}fmul_f64:
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
}
; FUNC-LABEL: {{^}}fmul_v2f64:
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) {
%r0 = load <2 x double> addrspace(1)* %in1
}
; FUNC-LABEL: {{^}}fmul_v4f64:
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2) {
%r0 = load <4 x double> addrspace(1)* %in1
declare float @llvm.fabs.f32(float) nounwind readnone
; CHECK-LABEL: {{^}}fmuladd_f32:
-; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
}
; CHECK-LABEL: {{^}}fmuladd_f64:
-; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
}
; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
}
; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
}
; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
}
; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
}
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
-; CHECK: BUFFER_STORE_DWORD [[RESULT]]
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
; into 2 modifiers, although theoretically that should work.
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
-; SI: V_AND_B32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
-; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
+; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
}
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f64:
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
}
; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64:
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fabs = call double @fabs(double %bc)
}
; FUNC-LABEL: {{^}}fneg_fabs_f64:
-; SI: S_LOAD_DWORDX2
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI-DAG: V_OR_B32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
-; SI-DAG: V_MOV_B32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
+; SI: s_load_dwordx2
+; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
+; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
%fsub = fsub double -0.000000e+00, %fabs
}
; FUNC-LABEL: {{^}}fneg_fabs_v2f64:
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
}
; FUNC-LABEL: {{^}}fneg_fabs_v4f64:
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
-; SI-NOT: AND
-; SI: V_SUB_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+; SI-NOT: and
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
}
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
-; SI-NOT: AND
-; SI: V_MUL_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
-; SI-NOT: AND
+; SI-NOT: and
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
+; SI-NOT: and
define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
; R600: |PV.{{[XYZW]}}|
; R600: -PV
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @llvm.fabs.f32(float %bc)
; R600: |PV.{{[XYZW]}}|
; R600: -PV
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @fabs(float %bc)
}
; FUNC-LABEL: {{^}}fneg_fabs_f32:
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
%fsub = fsub float -0.000000e+00, %fabs
}
; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
-; SI: V_OR_B32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%val = load float addrspace(1)* %in, align 4
%fabs = call float @llvm.fabs.f32(float %val)
; R600: -PV
; FIXME: SGPR should be used directly for first src operand.
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
; FIXME: SGPR should be used directly for first src operand.
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
-; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
-; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f64:
-; SI: V_XOR_B32
+; SI: v_xor_b32
define void @fneg_f64(double addrspace(1)* %out, double %in) {
%fneg = fsub double -0.000000e+00, %in
store double %fneg, double addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fneg_v2f64:
-; SI: V_XOR_B32
-; SI: V_XOR_B32
+; SI: v_xor_b32
+; SI: v_xor_b32
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
; R600: -PV
; R600: -PV
-; SI: V_XOR_B32
-; SI: V_XOR_B32
-; SI: V_XOR_B32
-; SI: V_XOR_B32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
; FUNC-LABEL: {{^}}fneg_free_f64:
; FIXME: Unnecessary copy to VGPRs
-; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fsub = fsub double 0.0, %bc
}
; SI-LABEL: {{^}}fneg_fold_f64:
-; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NOT: XOR
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: xor
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in
; FUNC-LABEL: {{^}}fneg_f32:
; R600: -PV
-; SI: V_XOR_B32
+; SI: v_xor_b32
define void @fneg_f32(float addrspace(1)* %out, float %in) {
%fneg = fsub float -0.000000e+00, %in
store float %fneg, float addrspace(1)* %out
; R600: -PV
; R600: -PV
-; SI: V_XOR_B32
-; SI: V_XOR_B32
+; SI: v_xor_b32
+; SI: v_xor_b32
define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
store <2 x float> %fneg, <2 x float> addrspace(1)* %out
; R600: -PV
; R600: -PV
-; SI: V_XOR_B32
-; SI: V_XOR_B32
-; SI: V_XOR_B32
-; SI: V_XOR_B32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
store <4 x float> %fneg, <4 x float> addrspace(1)* %out
; R600-NOT: XOR
; R600: -KC0[2].Z
-; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
-; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
+; XXX: We could use v_add_f32_e64 with the negate bit here instead.
+; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fsub = fsub float 0.0, %bc
}
; FUNC-LABEL: {{^}}fneg_fold_f32:
-; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: XOR
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: xor
+; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: buffer_load_ushort [[VAL:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
-; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI: buffer_load_ushort [[VAL:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
+; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
+; SI: buffer_store_dwordx2 [[RESULT]]
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_short [[RESULT]]
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 4
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: @fp_to_sint_f64_i32
-; SI: V_CVT_I32_F64_e32
+; SI: v_cvt_i32_f64_e32
define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
%result = fptosi double %in to i32
store i32 %result, i32 addrspace(1)* %out
}
; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
-; SI: V_CVT_I32_F64_e32
-; SI: V_CVT_I32_F64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
%result = fptosi <2 x double> %in to <2 x i32>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
}
; FUNC-LABEL: @fp_to_sint_v4f64_v4i32
-; SI: V_CVT_I32_F64_e32
-; SI: V_CVT_I32_F64_e32
-; SI: V_CVT_I32_F64_e32
-; SI: V_CVT_I32_F64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
%result = fptosi <4 x double> %in to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
}
; FUNC-LABEL: @fp_to_sint_i64_f64
-; CI-DAG: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; CI-DAG: V_TRUNC_F64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
-; CI-DAG: S_MOV_B32 s[[K0_LO:[0-9]+]], 0{{$}}
-; CI-DAG: S_MOV_B32 s[[K0_HI:[0-9]+]], 0x3df00000
+; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
+; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
-; CI-DAG: V_MUL_F64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
-; CI-DAG: V_FLOOR_F64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
-; CI-DAG: S_MOV_B32 s[[K1_HI:[0-9]+]], 0xc1f00000
+; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
-; CI-DAG: V_FMA_F64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
-; CI-DAG: V_CVT_U32_F64_e32 v[[LO:[0-9]+]], [[FMA]]
-; CI-DAG: V_CVT_I32_F64_e32 v[[HI:[0-9]+]], [[FLOOR]]
-; CI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
+; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
+; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid
; FUNC-LABEL: {{^}}fp_to_sint_i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: S_ENDPGM
+; SI: v_cvt_i32_f32_e32
+; SI: s_endpgm
define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
%conv = fptosi float %in to i32
store i32 %conv, i32 addrspace(1)* %out
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
%result = fptosi <2 x float> %in to <2 x i32>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in
%result = fptosi <4 x float> %value to <4 x i32>
; EG-DAG: CNDE_INT
; Check that the compiler doesn't crash with a "cannot select" error
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
entry:
%0 = fptosi float %in to i64
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
%conv = fptosi <2 x float> %x to <2 x i64>
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
%conv = fptosi <4 x float> %x to <4 x i64>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
-; SI: V_CVT_U32_F64_e32
+; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
%cast = fptoui double %in to i32
store i32 %cast, i32 addrspace(1)* %out, align 4
}
; SI-LABEL: @fp_to_uint_v2i32_v2f64
-; SI: V_CVT_U32_F64_e32
-; SI: V_CVT_U32_F64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
%cast = fptoui <2 x double> %in to <2 x i32>
store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
}
; SI-LABEL: @fp_to_uint_v4i32_v4f64
-; SI: V_CVT_U32_F64_e32
-; SI: V_CVT_U32_F64_e32
-; SI: V_CVT_U32_F64_e32
-; SI: V_CVT_U32_F64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
%cast = fptoui <4 x double> %in to <4 x i32>
store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
}
; FUNC-LABEL: @fp_to_uint_i64_f64
-; CI-DAG: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; CI-DAG: V_TRUNC_F64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
-; CI-DAG: S_MOV_B32 s[[K0_LO:[0-9]+]], 0{{$}}
-; CI-DAG: S_MOV_B32 s[[K0_HI:[0-9]+]], 0x3df00000
+; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
+; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
-; CI-DAG: V_MUL_F64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
-; CI-DAG: V_FLOOR_F64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
-; CI-DAG: S_MOV_B32 s[[K1_HI:[0-9]+]], 0xc1f00000
+; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
-; CI-DAG: V_FMA_F64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
-; CI-DAG: V_CVT_U32_F64_e32 v[[LO:[0-9]+]], [[FMA]]
-; CI-DAG: V_CVT_U32_F64_e32 v[[HI:[0-9]+]], [[FLOOR]]
-; CI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
+; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid
; FUNC-LABEL: {{^}}fp_to_uint_i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: S_ENDPGM
+; SI: v_cvt_u32_f32_e32
+; SI: s_endpgm
define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) {
%conv = fptoui float %in to i32
store i32 %conv, i32 addrspace(1)* %out
; FUNC-LABEL: {{^}}fp_to_uint_v2i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
%result = fptoui <2 x float> %in to <2 x i32>
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
%conv = fptoui float %x to i64
store i64 %conv, i64 addrspace(1)* %out
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
%conv = fptoui <2 x float> %x to <2 x i64>
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
%conv = fptoui <4 x float> %x to <4 x i64>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; CHECK: {{^}}fpext:
-; CHECK: V_CVT_F64_F32_e32
+; CHECK: v_cvt_f64_f32_e32
define void @fpext(double addrspace(1)* %out, float %in) {
%result = fpext float %in to double
store double %result, double addrspace(1)* %out
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; CHECK: {{^}}fptrunc:
-; CHECK: V_CVT_F32_F64_e32
+; CHECK: v_cvt_f32_f64_e32
define void @fptrunc(float addrspace(1)* %out, double %in) {
%result = fptrunc double %in to float
store float %result, float addrspace(1)* %out
; RUN: llc -march=r600 -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
-; SI-DAG: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
-; SI-DAG: V_CMP
-; SI-DAG: V_MUL_F32
-; SI: V_RCP_F32_e32
-; SI: V_MUL_F32_e32
-; SI: V_MUL_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_MAD_F32
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10
+; SI-DAG: v_cmp
+; SI-DAG: v_mul_f32
+; SI: v_rcp_f32_e32
+; SI: v_mul_f32_e32
+; SI: v_mul_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_mad_f32
+; SI: s_endpgm
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #0 {
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
}
; FUNC-LABEL: {{^}}unsafe_frem_f32:
-; SI: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
-; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*}}
-; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]]
-; SI: V_MUL_F32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
-; SI: V_TRUNC_F32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10
+; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
+; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
+; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #1 {
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
; correctly
; FUNC-LABEL: {{^}}frem_f64:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
%r0 = load double addrspace(1)* %in1, align 8
}
; FUNC-LABEL: {{^}}unsafe_frem_f64:
-; SI: V_RCP_F64_e32
-; SI: V_MUL_F64
-; SI: V_BFE_U32
-; SI: V_FMA_F64
-; SI: S_ENDPGM
+; SI: v_rcp_f64_e32
+; SI: v_mul_f64
+; SI: v_bfe_u32
+; SI: v_fma_f64
+; SI: s_endpgm
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #1 {
%r0 = load double addrspace(1)* %in1, align 8
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fsqrt_f32:
-; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
+; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%r0 = load float addrspace(1)* %in
}
; CHECK: {{^}}fsqrt_f64:
-; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r0 = load double addrspace(1)* %in
; FUNC-LABEL: {{^}}v_fsub_f32:
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%b_ptr = getelementptr float addrspace(1)* %in, i32 1
%a = load float addrspace(1)* %in, align 4
; FUNC-LABEL: {{^}}s_fsub_f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
-; SI: V_SUB_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
%sub = fsub float %a, %b
store float %sub, float addrspace(1)* %out, align 4
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
; FIXME: Should be using SGPR directly for first operand
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
%sub = fsub <2 x float> %a, %b
store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1)* %in, align 16
; FIXME: Should be using SGPR directly for first operand
; FUNC-LABEL: {{^}}s_fsub_v4f32:
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: S_ENDPGM
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: s_endpgm
define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
%result = fsub <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}fsub_f64:
-; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}v_ftrunc_f64:
-; CI: V_TRUNC_F64
-; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
-; SI: S_ENDPGM
+; CI: v_trunc_f64
+; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
+; SI: s_endpgm
define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%x = load double addrspace(1)* %in, align 8
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
}
; FUNC-LABEL: {{^}}ftrunc_f64:
-; CI: V_TRUNC_F64_e32
+; CI: v_trunc_f64_e32
-; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: S_ENDPGM
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: s_endpgm
define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
}
; FUNC-LABEL: {{^}}ftrunc_v2f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
}
; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f64:
-; FIXME-CI: V_TRUNC_F64_e32
-; FIXME-CI: V_TRUNC_F64_e32
-; FIXME-CI: V_TRUNC_F64_e32
+; FIXME-CI: v_trunc_f64_e32
+; FIXME-CI: v_trunc_f64_e32
+; FIXME-CI: v_trunc_f64_e32
; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; }
; FUNC-LABEL: {{^}}ftrunc_v4f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}ftrunc_v8f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
}
; FUNC-LABEL: {{^}}ftrunc_v16f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
; FUNC-LABEL: {{^}}ftrunc_f32:
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
%y = call float @llvm.trunc.f32(float %x) nounwind readnone
store float %y, float addrspace(1)* %out
; FUNC-LABEL: {{^}}ftrunc_v2f32:
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
%y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
store <2 x float> %y, <2 x float> addrspace(1)* %out
; FIXME-EG: TRUNC
; FIXME-EG: TRUNC
; FIXME-EG: TRUNC
-; FIXME-SI: V_TRUNC_F32_e32
-; FIXME-SI: V_TRUNC_F32_e32
-; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: v_trunc_f32_e32
+; FIXME-SI: v_trunc_f32_e32
+; FIXME-SI: v_trunc_f32_e32
; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
; store <3 x float> %y, <3 x float> addrspace(1)* %out
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
%y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
store <4 x float> %y, <4 x float> addrspace(1)* %out
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
%y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
store <8 x float> %y, <8 x float> addrspace(1)* %out
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
%y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
store <16 x float> %y, <16 x float> addrspace(1)* %out
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space:
-; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}} offset:64
+; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
; SI, which is why it is being OR'd with the base pointer.
-; SI: S_OR_B32
-; CI: S_ADD_I32
-; CHECK: DS_WRITE_B32
+; SI: s_or_b32
+; CI: s_add_i32
+; CHECK: ds_write_b32
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: {{^}}gep_as_vector_v4:
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
%p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: {{^}}gep_as_vector_v2:
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
%p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
-; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_add_i32:
-; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret:
-; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
-; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
-; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_and_i32:
-; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret:
-; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
-; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
-; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_sub_i32:
-; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
-; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
-; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
-; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_max_i32:
-; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret:
-; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
-; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
-; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umax_i32:
-; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
-; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
-; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
-; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_min_i32:
-; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret:
-; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
-; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
-; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umin_i32:
-; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
-; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
-; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
-; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_or_i32:
-; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret:
-; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
-; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
-; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xchg_i32:
-; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
-; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
-; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
-; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
+; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
}
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
-; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
+; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
-; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xor_i32:
-; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
-; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
}
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
-; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
-; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; SI: BUFFER_STORE_DWORD [[RET]]
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
; FUNC-LABEL: {{^}}test_i8:
; EG: CF_END
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
%arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
%1 = load i8 addrspace(2)* %arrayidx, align 1
; FUNC-LABEL: {{^}}test_i16:
; EG: CF_END
-; SI: BUFFER_STORE_SHORT
-; SI: S_ENDPGM
+; SI: buffer_store_short
+; SI: s_endpgm
define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
%arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
%1 = load i16 addrspace(2)* %arrayidx, align 2
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
; FUNC-LABEL: {{^}}float:
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
; FUNC-LABEL: {{^}}i32:
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
; FUNC-LABEL: {{^}}struct_foo_gv_load:
-; SI: S_LOAD_DWORD
+; SI: s_load_dword
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
<1 x i32> <i32 4> ]
; FUNC-LABEL: {{^}}array_v1_gv_load:
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32> addrspace(2)* %gep, align 4
define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_load_store:
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
%val = load half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_bitcast_from_half:
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
%val = load half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
; CHECK-LABEL: {{^}}test_bitcast_to_half:
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
%val = load i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_extend32:
-; CHECK: V_CVT_F32_F16_e32
+; CHECK: v_cvt_f32_f16_e32
%val16 = load half addrspace(1)* %in
%val32 = fpext half %val16 to float
define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_extend64:
-; CHECK: V_CVT_F32_F16_e32
-; CHECK: V_CVT_F64_F32_e32
+; CHECK: v_cvt_f32_f16_e32
+; CHECK: v_cvt_f64_f32_e32
%val16 = load half addrspace(1)* %in
%val64 = fpext half %val16 to double
define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_trunc32:
-; CHECK: V_CVT_F16_F32_e32
+; CHECK: v_cvt_f16_f32_e32
%val32 = load float addrspace(1)* %in
%val16 = fptrunc float %val32 to half
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}test_i64_eq:
-; SI: V_CMP_EQ_I64
+; SI: v_cmp_eq_i64
define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp eq i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_ne:
-; SI: V_CMP_NE_I64
+; SI: v_cmp_ne_i64
define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ne i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_slt:
-; SI: V_CMP_LT_I64
+; SI: v_cmp_lt_i64
define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp slt i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_ult:
-; SI: V_CMP_LT_U64
+; SI: v_cmp_lt_u64
define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ult i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_sle:
-; SI: V_CMP_LE_I64
+; SI: v_cmp_le_i64
define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sle i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_ule:
-; SI: V_CMP_LE_U64
+; SI: v_cmp_le_u64
define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ule i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_sgt:
-; SI: V_CMP_GT_I64
+; SI: v_cmp_gt_i64
define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sgt i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_ugt:
-; SI: V_CMP_GT_U64
+; SI: v_cmp_gt_u64
define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_sge:
-; SI: V_CMP_GE_I64
+; SI: v_cmp_ge_i64
define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sge i64 %a, %b
%result = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_i64_uge:
-; SI: V_CMP_GE_U64
+; SI: v_cmp_ge_u64
define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp uge i64 %a, %b
%result = sext i1 %cmp to i32
; Use a 64-bit value with lo bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
-; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
-; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
+; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
+; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
entry:
store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
; Use a 64-bit value with hi bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_hi:
-; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
-; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
+; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
+; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
entry:
store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
}
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
store float 0.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0.5{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
store float 0.5, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -0.5{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
store float -0.5, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
store float 1.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -1.0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
store float -1.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
store float 2.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -2.0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
store float -2.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 4.0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
store float 4.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -4.0{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
store float -4.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_literal_imm_f32:
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x45800000
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_literal_imm_f32(float addrspace(1)* %out) {
store float 4096.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.5
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -0.5
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 1.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -1.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 2.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -2.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 4.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
-; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
-; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -4.0
store float %y, float addrspace(1)* %out
}
; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
-; CHECK: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; CHECK: V_ADD_F32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
+; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%x = load float addrspace(1)* %in
%y = fadd float %x, 0.5
}
; CHECK-LABEL: @commute_add_literal_f32
-; CHECK: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; CHECK: V_ADD_F32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
-; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
+; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
+; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%x = load float addrspace(1)* %in
%y = fadd float %x, 1024.0
; indexing of vectors.
; CHECK: extract_w_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELS_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
}
; CHECK: extract_wo_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELS_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
}
; CHECK: insert_w_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELD_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movreld_b32_e32
define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
}
; CHECK: insert_wo_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELD_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movreld_b32_e32
define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
; SI-LABEL: {{^}}private_access_f64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX2
-; SI-ALLOCA: BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: buffer_store_dwordx2
+; SI-ALLOCA: buffer_load_dwordx2
-; SI-PROMOTE: DS_WRITE_B64
-; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load double addrspace(1)* %in, align 8
%array = alloca double, i32 16, align 8
; SI-LABEL: {{^}}private_access_v2f64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX4
-; SI-ALLOCA: BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: buffer_store_dwordx4
+; SI-ALLOCA: buffer_load_dwordx4
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
; SI-LABEL: {{^}}private_access_i64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX2
-; SI-ALLOCA: BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: buffer_store_dwordx2
+; SI-ALLOCA: buffer_load_dwordx2
-; SI-PROMOTE: DS_WRITE_B64
-; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%array = alloca i64, i32 16, align 8
; SI-LABEL: {{^}}private_access_v2i64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX4
-; SI-ALLOCA: BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: buffer_store_dwordx4
+; SI-ALLOCA: buffer_load_dwordx4
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}infinite_loop:
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
; SI: BB0_1:
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_WAITCNT vmcnt(0) expcnt(0)
-; SI: S_BRANCH BB0_1
+; SI: buffer_store_dword [[REG]]
+; SI: s_waitcnt vmcnt(0) expcnt(0)
+; SI: s_branch BB0_1
define void @infinite_loop(i32 addrspace(1)* %out) {
entry:
br label %for.body
; not just directly into the vector component?
; SI-LABEL: {{^}}insertelement_v4f32_0:
-; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
-; V_MOV_B32_e32
-; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
-; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
-; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
+; s_load_dwordx4 s{{[}}[[LOW_REG:[0-9]+]]:
+; v_mov_b32_e32
+; v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
+; v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]]
+; buffer_store_dwordx4 v{{[}}[[LOW_REG]]:
define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v2f32:
-; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
-; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
-; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
+; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
+; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
%vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}dynamic_insertelement_v4f32:
-; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
-; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
-; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
+; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
+; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
}
; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
}
; SI-LABEL: {{^}}dynamic_insertelement_v2i32:
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_store_dwordx2
define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i32> %a, i32 5, i32 %b
store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}dynamic_insertelement_v4i32:
-; SI: BUFFER_STORE_DWORDX4
+; SI: buffer_store_dwordx4
define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i32> %a, i32 5, i32 %b
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v8i32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i32> %a, i32 5, i32 %b
store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
}
; SI-LABEL: {{^}}dynamic_insertelement_v16i32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i32> %a, i32 5, i32 %b
store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
; SI-LABEL: {{^}}dynamic_insertelement_v2i16:
-; FIXMESI: BUFFER_STORE_DWORDX2
+; FIXMESI: buffer_store_dwordx2
define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i16> %a, i16 5, i32 %b
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}dynamic_insertelement_v4i16:
-; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i16> %a, i16 5, i32 %b
store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v4i8:
-; FIXMESI: BUFFER_STORE_DWORD
+; FIXMESI: buffer_store_dword
define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i8> %a, i8 5, i32 %b
store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v8i8:
-; FIXMESI: BUFFER_STORE_DWORDX2
+; FIXMESI: buffer_store_dwordx2
define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i8> %a, i8 5, i32 %b
store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v16i8:
-; FIXMESI: BUFFER_STORE_DWORDX4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i8> %a, i8 5, i32 %b
store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
%vecins = insertelement <2 x double> %a, double 8.0, i32 %b
store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i64> %a, i64 5, i32 %b
store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
%vecins = insertelement <4 x double> %a, double 8.0, i32 %b
store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
}
; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
%vecins = insertelement <8 x double> %a, double 8.0, i32 %b
store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
; EG-CHECK-LABEL: {{^}}i8_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i8_arg:
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
; EG-CHECK-LABEL: {{^}}i8_zext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i8_zext_arg:
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
; EG-CHECK-LABEL: {{^}}i8_sext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i8_sext_arg:
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
; EG-CHECK-LABEL: {{^}}i16_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i16_arg:
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
; EG-CHECK-LABEL: {{^}}i16_zext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i16_zext_arg:
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
; EG-CHECK-LABEL: {{^}}i16_sext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i16_sext_arg:
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
; EG-CHECK-LABEL: {{^}}i32_arg:
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i32_arg:
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
; EG-CHECK-LABEL: {{^}}f32_arg:
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}f32_arg:
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
store float %in, float addrspace(1)* %out, align 4
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v2i8_arg:
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v2i16_arg:
-; SI-CHECK-DAG: BUFFER_LOAD_USHORT
-; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+; SI-CHECK-DAG: buffer_load_ushort
+; SI-CHECK-DAG: buffer_load_ushort
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-CHECK-LABEL: {{^}}v2i32_arg:
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-CHECK-LABEL: {{^}}v2f32_arg:
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-CHECK-LABEL: {{^}}v3i32_arg:
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-CHECK-LABEL: {{^}}v3f32_arg:
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v4i8_arg:
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v4i16_arg:
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-CHECK-LABEL: {{^}}v4i32_arg:
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-CHECK-LABEL: {{^}}v4f32_arg:
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v8i8_arg:
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v8i16_arg:
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-CHECK-LABEL: {{^}}v8i32_arg:
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-CHECK-LABEL: {{^}}v8f32_arg:
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v16i8_arg:
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v16i16_arg:
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-CHECK-LABEL: {{^}}v16i32_arg:
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-CHECK-LABEL: {{^}}v16f32_arg:
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}kernel_arg_i64:
-; SI: S_LOAD_DWORDX2
-; SI: S_LOAD_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_load_dwordx2
+; SI: s_load_dwordx2
+; SI: buffer_store_dwordx2
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
-; XSI: S_LOAD_DWORDX2
-; XSI: S_LOAD_DWORDX2
-; XSI: BUFFER_STORE_DWORDX2
+; XSI: s_load_dwordx2
+; XSI: s_load_dwordx2
+; XSI: buffer_store_dwordx2
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
; ret void
; RUN: llc -march=r600 -mcpu=SI < %s
-; CHECK: S_ENDPGM
+; CHECK: s_endpgm
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
; FUNC-LABEL: {{^}}s_abs_i32:
-; SI: S_SUB_I32
-; SI: S_MAX_I32
-; SI: S_ENDPGM
+; SI: s_sub_i32
+; SI: s_max_i32
+; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
}
; FUNC-LABEL: {{^}}v_abs_i32:
-; SI: V_SUB_I32_e32
-; SI: V_MAX_I32_e32
-; SI: S_ENDPGM
+; SI: v_sub_i32_e32
+; SI: v_max_i32_e32
+; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
}
; FUNC-LABEL: {{^}}abs_i32_legacy_amdil:
-; SI: V_SUB_I32_e32
-; SI: V_MAX_I32_e32
-; SI: S_ENDPGM
+; SI: v_sub_i32_e32
+; SI: v_max_i32_e32
+; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
; FUNC-LABEL: {{^}}test_barrier_global:
; EG: GROUP_BARRIER
-; SI: S_BARRIER
+; SI: s_barrier
define void @test_barrier_global(i32 addrspace(1)* %out) {
entry:
; FUNC-LABEL: {{^}}test_barrier_local:
; EG: GROUP_BARRIER
-; SI: S_BARRIER
+; SI: s_barrier
define void @test_barrier_local(i32 addrspace(1)* %out) {
entry:
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
-; SI: V_BFE_I32
+; SI: v_bfe_i32
; EG: BFE_INT
; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
-; SI: V_BFE_I32
+; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
-; SI: V_BFE_I32
+; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
-; SI: V_BFE_I32
+; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
}
; FUNC-LABEL: {{^}}v_bfe_print_arg:
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
%load = load i32 addrspace(1)* %src0, align 4
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_test_6:
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: s_endpgm
define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_i32_test_7:
-; SI-NOT: SHL
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: shl
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
; FIXME: The shifts should be 1 BFE
; FUNC-LABEL: {{^}}bfe_i32_test_8:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: s_endpgm
define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_i32_test_9:
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
}
; FUNC-LABEL: {{^}}bfe_i32_test_10:
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
}
; FUNC-LABEL: {{^}}bfe_i32_test_11:
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
}
; FUNC-LABEL: {{^}}bfe_i32_test_12:
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
}
; FUNC-LABEL: {{^}}bfe_i32_test_13:
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_i32_test_14:
-; SI-NOT: LSHR
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: lshr
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
; XXX - This should really be a single BFE, but the sext_inreg of the
; extended type i24 is never custom lowered.
; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
; XSI-NOT: SHL
; XSI-NOT: SHR
-; XSI: BUFFER_STORE_DWORD [[BFE]],
+; XSI: buffer_store_dword [[BFE]],
define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
}
; FUNC-LABEL: @simplify_demanded_bfe_sdiv
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]]
-; SI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
-; SI: V_LSHRREV_B32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
-; SI: V_ADD_I32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
-; SI: V_ASHRREV_I32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
-; SI: BUFFER_STORE_DWORD [[TMP2]]
+; SI: buffer_load_dword [[LOAD:v[0-9]+]]
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
+; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
+; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
+; SI: buffer_store_dword [[TMP2]]
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%src = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
-; SI: V_BFE_U32
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
-; SI: V_BFE_U32
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
-; SI: V_BFE_U32
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
-; SI: V_BFE_U32
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
-; SI: BUFFER_LOAD_UBYTE
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%load = load i8 addrspace(1)* %in
%ext = zext i8 %load to i32
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
+; SI-NEXT: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
+; SI-NEXT: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
}
; FUNC-LABEL: {{^}}bfe_u32_test_1:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: s_endpgm
; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
}
; FUNC-LABEL: {{^}}bfe_u32_test_4:
-; SI-NOT: LSHL
-; SI-NOT: SHR
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: lshl
+; SI-NOT: shr
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_test_5:
-; SI: BUFFER_LOAD_DWORD
-; SI-NOT: LSHL
-; SI-NOT: SHR
-; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI-NOT: lshl
+; SI-NOT: shr
+; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
+; SI: s_endpgm
define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_test_6:
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: s_endpgm
define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_test_7:
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_test_8:
-; SI-NOT: BFE
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_test_9:
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
}
; FUNC-LABEL: {{^}}bfe_u32_test_10:
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
}
; FUNC-LABEL: {{^}}bfe_u32_test_11:
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
}
; FUNC-LABEL: {{^}}bfe_u32_test_12:
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
; FUNC-LABEL: {{^}}bfe_u32_test_13:
; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_test_14:
-; SI-NOT: LSHR
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: lshr
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFEfppppppppppppp
define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
; XXX: The operand to v_bfe_u32 could also just directly be the load register.
; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
-; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
-; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
-; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
-; SI-DAG: BUFFER_STORE_DWORD [[AND]]
-; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[ARG:v[0-9]+]]
+; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
+; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
+; SI-DAG: buffer_store_dword [[AND]]
+; SI-DAG: buffer_store_dword [[BFE]]
+; SI: s_endpgm
define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
i32 addrspace(1)* %out1,
i32 addrspace(1)* %in) nounwind {
declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfi_arg_arg_arg:
-; SI: V_BFI_B32
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfi_arg_arg_imm:
-; SI: V_BFI_B32
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
}
; FUNC-LABEL: {{^}}bfi_arg_imm_arg:
-; SI: V_BFI_B32
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
}
; FUNC-LABEL: {{^}}bfi_imm_arg_arg:
-; SI: V_BFI_B32
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone
declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfm_arg_arg:
-; SI: V_BFM
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfm_arg_imm:
-; SI: V_BFM
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
}
; FUNC-LABEL: {{^}}bfm_imm_arg:
-; SI: V_BFM
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
}
; FUNC-LABEL: {{^}}bfm_imm_imm:
-; SI: V_BFM
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
; FUNC-LABEL: {{^}}s_brev_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}v_brev_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
; FUNC-LABEL: {{^}}clamp_0_1_f32:
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
; EG: MOV_SAT
define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
}
; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32:
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
%clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
store float %clamp, float addrspace(1)* %out, align 4
declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
; SI-LABEL: {{^}}test_unpack_byte0_to_float:
-; SI: V_CVT_F32_UBYTE0
+; SI: v_cvt_f32_ubyte0
define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
}
; SI-LABEL: {{^}}test_unpack_byte1_to_float:
-; SI: V_CVT_F32_UBYTE1
+; SI: v_cvt_f32_ubyte1
define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
}
; SI-LABEL: {{^}}test_unpack_byte2_to_float:
-; SI: V_CVT_F32_UBYTE2
+; SI: v_cvt_f32_ubyte2
define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
}
; SI-LABEL: {{^}}test_unpack_byte3_to_float:
-; SI: V_CVT_F32_UBYTE3
+; SI: v_cvt_f32_ubyte3
define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
; SI-LABEL: {{^}}test_div_fixup_f32:
-; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_div_fixup_f64:
-; SI: V_DIV_FIXUP_F64
+; SI: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
; SI-LABEL: {{^}}test_div_fmas_f32:
-; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
%result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_div_fmas_f64:
-; SI: V_DIV_FMAS_F64
+; SI: v_div_fmas_f64
define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
%result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
; SI-LABEL @test_div_scale_f32_1:
-; SI-DAG: BUFFER_LOAD_DWORD [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: BUFFER_LOAD_DWORD [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f32_2:
-; SI-DAG: BUFFER_LOAD_DWORD [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: BUFFER_LOAD_DWORD [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f64_1:
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f64_1:
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f32_scalar_num_1:
-; SI-DAG: BUFFER_LOAD_DWORD [[B:v[0-9]+]]
-; SI-DAG: S_LOAD_DWORD [[A:s[0-9]+]]
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: s_load_dword [[A:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr float addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f32_scalar_num_2:
-; SI-DAG: BUFFER_LOAD_DWORD [[B:v[0-9]+]]
-; SI-DAG: S_LOAD_DWORD [[A:s[0-9]+]]
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: s_load_dword [[A:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr float addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f32_scalar_den_1:
-; SI-DAG: BUFFER_LOAD_DWORD [[A:v[0-9]+]]
-; SI-DAG: S_LOAD_DWORD [[B:s[0-9]+]]
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: s_load_dword [[B:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr float addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f32_scalar_den_2:
-; SI-DAG: BUFFER_LOAD_DWORD [[A:v[0-9]+]]
-; SI-DAG: S_LOAD_DWORD [[B:s[0-9]+]]
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: s_load_dword [[B:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr float addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f64_scalar_num_1:
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[B:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: S_LOAD_DWORDX2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f64_scalar_num_2:
-; SI-DAG: S_LOAD_DWORDX2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[B:v\[[0-9]+:[0-9]+\]]]
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f64_scalar_den_1:
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[A:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: S_LOAD_DWORDX2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f64_scalar_den_2:
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[A:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: S_LOAD_DWORDX2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid
}
; SI-LABEL @test_div_scale_f32_all_scalar_1:
-; SI-DAG: S_LOAD_DWORD [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VA:v[0-9]+]], [[A]]
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
}
; SI-LABEL @test_div_scale_f32_all_scalar_2:
-; SI-DAG: S_LOAD_DWORD [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[B]]
-; SI: V_DIV_SCALE_F32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
-; SI: BUFFER_STORE_DWORD [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
}
; SI-LABEL @test_div_scale_f64_all_scalar_1:
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORDX2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
-; SI-DAG: V_MOV_B32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
+; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
}
; SI-LABEL @test_div_scale_f64_all_scalar_2:
-; SI-DAG: S_LOAD_DWORDX2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
-; SI-DAG: V_MOV_B32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
-; SI: V_DIV_SCALE_F64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT0]]
-; SI: S_ENDPGM
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
+; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}fract_f32:
-; SI: V_FRACT_F32
+; SI: v_fract_f32
; EG: FRACT
define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
%val = load float addrspace(1)* %src, align 4
}
; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
-; SI: V_FRACT_F32
+; SI: v_fract_f32
; EG: FRACT
define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
%val = load float addrspace(1)* %src, align 4
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}test_imad24:
-; SI: V_MAD_I32_I24
+; SI: v_mad_i32_i24
; CM: MULADD_INT24
; R600: MULLO_INT
; R600: ADD_INT
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}vector_imax:
-; SI: V_MAX_I32_e32
+; SI: v_max_i32_e32
define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
}
; SI-LABEL: {{^}}scalar_imax:
-; SI: S_MAX_I32
+; SI: s_max_i32
define void @scalar_imax(i32 %p0, i32 %p1) #0 {
entry:
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}vector_imin:
-; SI: V_MIN_I32_e32
+; SI: v_min_i32_e32
define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
}
; SI-LABEL: {{^}}scalar_imin:
-; SI: S_MIN_I32
+; SI: s_min_i32
define void @scalar_imin(i32 %p0, i32 %p1) #0 {
entry:
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}test_imul24:
-; SI: V_MUL_I32_I24
+; SI: v_mul_i32_i24
; CM: MUL_INT24
; R600: MULLO_INT
define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}kill_gs_const:
-; SI-NOT: V_CMPX_LE_F32
-; SI: S_MOV_B64 exec, 0
+; SI-NOT: v_cmpx_le_f32
+; SI: s_mov_b64 exec, 0
define void @kill_gs_const() #0 {
main_body:
declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
; SI-LABEL: {{^}}test_ldexp_f32:
-; SI: V_LDEXP_F32
-; SI: S_ENDPGM
+; SI: v_ldexp_f32
+; SI: s_endpgm
define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
%result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_ldexp_f64:
-; SI: V_LDEXP_F64
-; SI: S_ENDPGM
+; SI: v_ldexp_f64
+; SI: s_endpgm
define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
%result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone
; FUNC-LABEL: {{^}}rsq_legacy_f32:
-; SI: V_RSQ_LEGACY_F32_e32
+; SI: v_rsq_legacy_f32_e32
; EG: RECIPSQRT_IEEE
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone
declare double @llvm.sqrt.f64(double) nounwind readnone
; FUNC-LABEL: {{^}}rcp_f64:
-; SI: V_RCP_F64_e32
+; SI: v_rcp_f64_e32
define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
%rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
store double %rcp, double addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}rcp_pat_f64:
-; SI: V_RCP_F64_e32
+; SI: v_rcp_f64_e32
define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
%rcp = fdiv double 1.0, %src
store double %rcp, double addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE-NOT: V_RSQ_F64_e32
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE-NOT: v_rsq_f64_e32
define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
%sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
%rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
declare float @llvm.sqrt.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rcp_f32:
-; SI: V_RCP_F32_e32
+; SI: v_rcp_f32_e32
; EG: RECIP_IEEE
define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
%rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
; FIXME: Evergreen only ever does unsafe fp math.
; FUNC-LABEL: {{^}}rcp_pat_f32:
-; SI-SAFE: V_RCP_F32_e32
-; XSI-SAFE-SPDENORM-NOT: V_RCP_F32_e32
+; SI-SAFE: v_rcp_f32_e32
+; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
; EG: RECIP_IEEE
}
; FUNC-LABEL: {{^}}rsq_rcp_pat_f32:
-; SI-UNSAFE: V_RSQ_F32_e32
-; SI-SAFE: V_SQRT_F32_e32
-; SI-SAFE: V_RCP_F32_e32
+; SI-UNSAFE: v_rsq_f32_e32
+; SI-SAFE: v_sqrt_f32_e32
+; SI-SAFE: v_rcp_f32_e32
; EG: RECIPSQRT_IEEE
define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f64:
-; SI: V_RSQ_CLAMP_F64_e32
+; SI: v_rsq_clamp_f64_e32
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
store double %rsq_clamped, double addrspace(1)* %out, align 8
declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f32:
-; SI: V_RSQ_CLAMP_F32_e32
+; SI: v_rsq_clamp_f32_e32
; EG: RECIPSQRT_CLAMPED
define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rsq_f32:
-; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
; EG: RECIPSQRT_IEEE
define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
; TODO: Really these should be constant folded
; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
-; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 4.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
; EG: RECIPSQRT_IEEE
define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
%rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
}
; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
-; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 0x42c80000
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
; EG: RECIPSQRT_IEEE
define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
%rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
; SI-LABEL: {{^}}test_trig_preop_f64:
-; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
+; SI-DAG: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load double addrspace(1)* %aptr, align 8
%b = load i32 addrspace(1)* %bptr, align 4
}
; SI-LABEL: {{^}}test_trig_preop_f64_imm_segment:
-; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
%a = load double addrspace(1)* %aptr, align 8
%result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
; R600-CHECK: {{^}}amdgpu_trunc:
; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK: {{^}}amdgpu_trunc:
-; SI-CHECK: V_TRUNC_F32
+; SI-CHECK: v_trunc_f32
define void @amdgpu_trunc(float addrspace(1)* %out, float %x) {
entry:
declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}test_umad24:
-; SI: V_MAD_U32_U24
+; SI: v_mad_u32_u24
; EG: MULADD_UINT24
; R600: MULLO_UINT
; R600: ADD_INT
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}vector_umax:
-; SI: V_MAX_U32_e32
+; SI: v_max_u32_e32
define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
}
; SI-LABEL: {{^}}scalar_umax:
-; SI: S_MAX_U32
+; SI: s_max_u32
define void @scalar_umax(i32 %p0, i32 %p1) #0 {
entry:
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
}
; SI-LABEL: {{^}}trunc_zext_umax:
-; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
-; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
-; SI-NOT: AND
-; SI: BUFFER_STORE_SHORT [[RESULT]],
+; SI: buffer_load_ubyte [[VREG:v[0-9]+]],
+; SI: v_max_u32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: and
+; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}vector_umin:
-; SI: V_MIN_U32_e32
+; SI: v_min_u32_e32
define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
}
; SI-LABEL: {{^}}scalar_umin:
-; SI: S_MIN_U32
+; SI: s_min_u32
define void @scalar_umin(i32 %p0, i32 %p1) #0 {
entry:
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
}
; SI-LABEL: {{^}}trunc_zext_umin:
-; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
-; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
-; SI-NOT: AND
-; SI: BUFFER_STORE_SHORT [[RESULT]],
+; SI: buffer_load_ubyte [[VREG:v[0-9]+]],
+; SI: v_min_u32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: and
+; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}test_umul24:
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
; R600: MUL_UINT24
; R600: MULLO_UINT
define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: S_MOV_B32
-;CHECK-NEXT: V_INTERP_MOV_F32
+;CHECK: s_mov_b32
+;CHECK-NEXT: v_interp_mov_f32
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}gather4_v2:
-;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_v2() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4:
-;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_cl:
-;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_l:
-;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_b:
-;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_b_cl:
-;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_b_cl_v8:
-;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_cl_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_lz_v2:
-;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_lz_v2() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_lz:
-;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
;CHECK-LABEL: {{^}}gather4_o:
-;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_cl_o:
-;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_cl_o_v8:
-;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_cl_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_l_o:
-;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_l_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_l_o_v8:
-;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_l_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_b_o:
-;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_b_o_v8:
-;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_b_cl_o:
-;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_lz_o:
-;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_lz_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
;CHECK-LABEL: {{^}}gather4_c:
-;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_cl:
-;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_cl_v8:
-;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_cl_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_l:
-;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_l_v8:
-;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_l_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_b:
-;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_b_v8:
-;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_b_cl:
-;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_lz:
-;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
;CHECK-LABEL: {{^}}gather4_c_o:
-;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_o_v8:
-;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_cl_o:
-;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_l_o:
-;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_l_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_b_o:
-;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
-;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_lz_o:
-;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_lz_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
-;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_lz_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}getlod:
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getlod() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}getlod_v2:
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getlod_v2() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}getlod_v4:
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getlod_v4() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}image_load:
-;CHECK: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @image_load() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}image_load_mip:
-;CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @image_load_mip() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}getresinfo:
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getresinfo() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}sample:
-;CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_cl:
-;CHECK: IMAGE_SAMPLE_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_d:
-;CHECK: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_d_cl:
-;CHECK: IMAGE_SAMPLE_D_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_l:
-;CHECK: IMAGE_SAMPLE_L {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_b:
-;CHECK: IMAGE_SAMPLE_B {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_b_cl:
-;CHECK: IMAGE_SAMPLE_B_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_lz:
-;CHECK: IMAGE_SAMPLE_LZ {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_cd:
-;CHECK: IMAGE_SAMPLE_CD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_cd_cl:
-;CHECK: IMAGE_SAMPLE_CD_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c:
-;CHECK: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_cl:
-;CHECK: IMAGE_SAMPLE_C_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_d:
-;CHECK: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_d_cl:
-;CHECK: IMAGE_SAMPLE_C_D_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_l:
-;CHECK: IMAGE_SAMPLE_C_L {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_b:
-;CHECK: IMAGE_SAMPLE_C_B {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_b_cl:
-;CHECK: IMAGE_SAMPLE_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_lz:
-;CHECK: IMAGE_SAMPLE_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_cd:
-;CHECK: IMAGE_SAMPLE_C_CD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_cd_cl:
-;CHECK: IMAGE_SAMPLE_C_CD_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}sample:
-;CHECK: IMAGE_SAMPLE_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_cl:
-;CHECK: IMAGE_SAMPLE_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_d:
-;CHECK: IMAGE_SAMPLE_D_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_d_cl:
-;CHECK: IMAGE_SAMPLE_D_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_l:
-;CHECK: IMAGE_SAMPLE_L_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_b:
-;CHECK: IMAGE_SAMPLE_B_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_b_cl:
-;CHECK: IMAGE_SAMPLE_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_lz:
-;CHECK: IMAGE_SAMPLE_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_cd:
-;CHECK: IMAGE_SAMPLE_CD_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_cd_cl:
-;CHECK: IMAGE_SAMPLE_CD_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c:
-;CHECK: IMAGE_SAMPLE_C_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_cl:
-;CHECK: IMAGE_SAMPLE_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_d:
-;CHECK: IMAGE_SAMPLE_C_D_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_d_cl:
-;CHECK: IMAGE_SAMPLE_C_D_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_l:
-;CHECK: IMAGE_SAMPLE_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_b:
-;CHECK: IMAGE_SAMPLE_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_b_cl:
-;CHECK: IMAGE_SAMPLE_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_lz:
-;CHECK: IMAGE_SAMPLE_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_cd:
-;CHECK: IMAGE_SAMPLE_C_CD_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
}
;CHECK-LABEL: {{^}}sample_c_cd_cl:
-;CHECK: IMAGE_SAMPLE_C_CD_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-DAG: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, -1
+;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
; Test that ccordinates are stored in vgprs and not sgprs
; CHECK: vgpr_coords
-; CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
; ESGS ring buffer
; CHECK-LABEL: {{^}}main:
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
main_body:
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v[0-9]+}}, 2, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v[0-9]+}}, 1, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v[0-9]+}}, 4, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
-; CHECK-DAG: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
; CHECK-LABEL: {{^}}v1:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 13
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
define void @v1(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
}
; CHECK-LABEL: {{^}}v2:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 11
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
define void @v2(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
}
; CHECK-LABEL: {{^}}v3:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
define void @v3(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
}
; CHECK-LABEL: {{^}}v4:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 7
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
define void @v4(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
}
; CHECK-LABEL: {{^}}v5:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
define void @v5(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
}
; CHECK-LABEL: {{^}}v6:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 6
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
define void @v6(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
}
; CHECK-LABEL: {{^}}v7:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 9
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
define void @v7(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: image_sample {{v[0-9]+}}, 2
+;CHECK-DAG: image_sample {{v[0-9]+}}, 1
+;CHECK-DAG: image_sample {{v[0-9]+}}, 4
+;CHECK-DAG: image_sample {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: image_sample {{v[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
}
; CHECK: {{^}}v1:
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
define void @v1(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}main:
-; CHECK: S_SENDMSG Gs(emit stream 0)
-; CHECK: S_SENDMSG Gs(cut stream 1)
-; CHECK: S_SENDMSG Gs(emit-cut stream 2)
-; CHECK: S_SENDMSG Gs_done(nop)
+; CHECK: s_sendmsg Gs(emit stream 0)
+; CHECK: s_sendmsg Gs(cut stream 1)
+; CHECK: s_sendmsg Gs(emit-cut stream 2)
+; CHECK: s_sendmsg Gs_done(nop)
define void @main() {
main_body:
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}test1:
-;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test1(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
}
;CHECK-LABEL: {{^}}test2:
-;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test2(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
}
;CHECK-LABEL: {{^}}test3:
-;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test3(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
}
;CHECK-LABEL: {{^}}test4:
-;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test4(i32 %vdata, i32 %vaddr) #0 {
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_MBCNT_LO_U32_B32_e64
-;CHECK: V_MBCNT_HI_U32_B32_e32
+;CHECK: v_mbcnt_lo_u32_b32_e64
+;CHECK: v_mbcnt_hi_u32_b32_e32
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}kilp_gs_const:
-; SI: S_MOV_B64 exec, 0
+; SI: s_mov_b64 exec, 0
define void @kilp_gs_const() #0 {
main_body:
%0 = icmp ule i32 0, 3
declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
; FUNC-LABEL: {{^}}test_lrp:
-; SI: V_SUB_F32
-; SI: V_MAD_F32
+; SI: v_sub_f32
+; SI: v_mad_f32
define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
%mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
store float %mad, float addrspace(1)* %out, align 4
;EG: ADD *
;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG-NOT: COS
-;SI: V_COS_F32
-;SI-NOT: V_COS_F32
+;SI: v_cos_f32
+;SI-NOT: v_cos_f32
define void @test(float addrspace(1)* %out, float %x) #1 {
%cos = call float @llvm.cos.f32(float %x)
;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG-NOT: COS
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI-NOT: V_COS_F32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI-NOT: v_cos_f32
define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
%cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
define void @test(float addrspace(1)* %out, float %in) {
entry:
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
; R600-CHECK: {{^}}f32:
; R600-CHECK: FLOOR
; SI-CHECK: {{^}}f32:
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: v_floor_f32_e32
define void @f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.floor.f32(float %in)
; R600-CHECK: FLOOR
; R600-CHECK: FLOOR
; SI-CHECK: {{^}}v2f32:
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in)
; R600-CHECK: FLOOR
; R600-CHECK: FLOOR
; SI-CHECK: {{^}}v4f32:
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
define void @test(float addrspace(1)* %out, float %in) {
entry:
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_WRITE_B8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-
-
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-; SI: DS_READ_U8
-
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-; SI: DS_WRITE_B8
-
-; SI: S_ENDPGM
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
%bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-; SI: DS_WRITE_B16
-
-; SI: S_ENDPGM
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+
+; SI: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
%bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
%bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
; FIXME: Use 64-bit ops
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: DS_READ_B32
-; SI-DAG: DS_WRITE_B32
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
-; SI-DAG: S_ENDPGM
+; SI-DAG: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
%bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_LOAD_UBYTE
-; SI-DAG: BUFFER_STORE_BYTE
-
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI: s_endpgm
define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
%bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-; SI-DAG: BUFFER_LOAD_USHORT
-
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-; SI-DAG: BUFFER_STORE_SHORT
-
-; SI: S_ENDPGM
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+
+; SI: s_endpgm
define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
%bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: BUFFER_LOAD_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
%bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: BUFFER_LOAD_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
%bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: BUFFER_LOAD_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
%bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}rint_f64:
-; CI: V_RNDNE_F64_e32
+; CI: v_rndne_f64_e32
-; SI-DAG: V_ADD_F64
-; SI-DAG: V_ADD_F64
-; SI-DAG V_CMP_GT_F64_e64
-; SI: V_CNDMASK_B32
-; SI: V_CNDMASK_B32
-; SI: S_ENDPGM
+; SI-DAG: v_add_f64
+; SI-DAG: v_add_f64
+; SI-DAG v_cmp_gt_f64_e64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: s_endpgm
define void @rint_f64(double addrspace(1)* %out, double %in) {
entry:
%0 = call double @llvm.rint.f64(double %in)
}
; FUNC-LABEL: {{^}}rint_v2f64:
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
entry:
%0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
}
; FUNC-LABEL: {{^}}rint_v4f64:
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
entry:
%0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
; FUNC-LABEL: {{^}}rint_f32:
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
define void @rint_f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.rint.f32(float %in) #0
; R600: RNDNE
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
; R600: RNDNE
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
; FUNC-LABEL: {{^}}legacy_amdil_round_nearest_f32:
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
define void @legacy_amdil_round_nearest_f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.AMDIL.round.nearest.f32(float %in) #0
; EG: ADD *
; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG-NOT: SIN
-; SI: V_MUL_F32
-; SI: V_FRACT_F32
-; SI: V_SIN_F32
-; SI-NOT: V_SIN_F32
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
%sin = call float @llvm.sin.f32(float %x)
}
; FUNC-LABEL: {{^}}sin_3x_f32:
-; SI-UNSAFE-NOT: V_ADD_F32
+; SI-UNSAFE-NOT: v_add_f32
; SI-UNSAFE: 0x3ef47644
-; SI-UNSAFE: V_MUL_F32
-; SI-SAFE: V_MUL_F32
-; SI-SAFE: V_MUL_F32
-; SI: V_FRACT_F32
-; SI: V_SIN_F32
-; SI-NOT: V_SIN_F32
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_mul_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
%y = fmul float 3.0, %x
%sin = call float @llvm.sin.f32(float %y)
}
; FUNC-LABEL: {{^}}sin_2x_f32:
-; SI-UNSAFE-NOT: V_ADD_F32
+; SI-UNSAFE-NOT: v_add_f32
; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: V_MUL_F32
-; SI-SAFE: V_ADD_F32
-; SI-SAFE: V_MUL_F32
-; SI: V_FRACT_F32
-; SI: V_SIN_F32
-; SI-NOT: V_SIN_F32
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_add_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
; FUNC-LABEL: {{^}}test_2sin_f32:
; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: V_MUL_F32
-; SI-SAFE: V_ADD_F32
-; SI-SAFE: V_MUL_F32
-; SI: V_FRACT_F32
-; SI: V_SIN_F32
-; SI-NOT: V_SIN_F32
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_add_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG-NOT: SIN
-; SI: V_SIN_F32
-; SI: V_SIN_F32
-; SI: V_SIN_F32
-; SI: V_SIN_F32
-; SI-NOT: V_SIN_F32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
; R600-CHECK: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS
; SI-CHECK-LABEL: {{^}}sqrt_f32:
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: v_sqrt_f32_e32
define void @sqrt_f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.sqrt.f32(float %in)
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS
; SI-CHECK-LABEL: {{^}}sqrt_v2f32:
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS
; SI-CHECK-LABEL: {{^}}sqrt_v4f32:
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
; SI-LABEL: {{^}}global_copy_i1_to_i1:
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_AND_B32_e32 v{{[0-9]+}}, 1
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
store i1 %load, i1 addrspace(1)* %out, align 1
; SI-LABEL: {{^}}global_sextload_i1_to_i32:
; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = sext i1 %load to i32
}
; SI-LABEL: {{^}}global_zextload_i1_to_i32:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = zext i1 %load to i32
; SI-LABEL: {{^}}global_sextload_i1_to_i64:
; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = sext i1 %load to i64
}
; SI-LABEL: {{^}}global_zextload_i1_to_i64:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = zext i1 %load to i64
}
; SI-LABEL: {{^}}i1_arg:
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_AND_B32_e32
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
store i1 %x, i1 addrspace(1)* %out, align 1
ret void
}
; SI-LABEL: {{^}}i1_arg_zext_i32:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
%ext = zext i1 %x to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}i1_arg_zext_i64:
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
%ext = zext i1 %x to i64
store i64 %ext, i64 addrspace(1)* %out, align 8
; SI-LABEL: {{^}}i1_arg_sext_i32:
; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
%ext = sext i1 %x to i32
store i32 %ext, i32addrspace(1)* %out, align 4
; SI-LABEL: {{^}}i1_arg_sext_i64:
; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
%ext = sext i1 %x to i64
store i64 %ext, i64 addrspace(1)* %out, align 8
; FUNC-LABEL: {{^}}load_i8:
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
%1 = load i8 addrspace(1)* %in
%2 = zext i8 %1 to i32
; R600-CHECK: 24
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = load i8 addrspace(1)* %in
; FUNC-LABEL: {{^}}load_v2i8:
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
%0 = load <2 x i8> addrspace(1)* %in
; R600-CHECK-DAG: 24
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-CHECK-DAG: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
%0 = load <2 x i8> addrspace(1)* %in
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
%0 = load <4 x i8> addrspace(1)* %in
; R600-CHECK-DAG: 24
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-CHECK-DAG: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
%0 = load <4 x i8> addrspace(1)* %in
; Load an i16 value from the global address space.
; FUNC-LABEL: {{^}}load_i16:
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
%0 = load i16 addrspace(1)* %in
; R600-CHECK: 16
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
%0 = load i16 addrspace(1)* %in
; FUNC-LABEL: {{^}}load_v2i16:
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
%0 = load <2 x i16> addrspace(1)* %in
; R600-CHECK-DAG: 16
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-CHECK-DAG: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
%0 = load <2 x i16> addrspace(1)* %in
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
%0 = load <4 x i16> addrspace(1)* %in
; R600-CHECK-DAG: 16
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-CHECK-DAG: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
%0 = load <4 x i16> addrspace(1)* %in
; FUNC-LABEL: {{^}}load_i32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+; SI-CHECK: buffer_load_dword v{{[0-9]+}}
define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = load i32 addrspace(1)* %in
; FUNC-LABEL: {{^}}load_f32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+; SI-CHECK: buffer_load_dword v{{[0-9]+}}
define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
entry:
%0 = load float addrspace(1)* %in
; FUNC-LABEL: {{^}}load_v2f32:
; R600-CHECK: MEM_RAT
; R600-CHECK: VTX_READ_64
-; SI-CHECK: BUFFER_LOAD_DWORDX2
+; SI-CHECK: buffer_load_dwordx2
define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
entry:
%0 = load <2 x float> addrspace(1)* %in
; FUNC-LABEL: {{^}}load_i64:
; R600-CHECK: VTX_READ_64
-; SI-CHECK: BUFFER_LOAD_DWORDX2
+; SI-CHECK: buffer_load_dwordx2
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
%0 = load i64 addrspace(1)* %in
; R600-CHECK: MEM_RAT
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
; R600-CHECK: 31
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
; R600-CHECK: VTX_READ_128
; R600-CHECK: VTX_READ_128
; XXX: We should be using DWORDX4 instructions on SI.
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
entry:
%0 = load <8 x i32> addrspace(1)* %in
; R600-CHECK: VTX_READ_128
; R600-CHECK: VTX_READ_128
; XXX: We should be using DWORDX4 instructions on SI.
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
entry:
%0 = load <16 x i32> addrspace(1)* %in
; R600-CHECK: 24
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_sbyte v{{[0-9]+}},
define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%0 = load i8 addrspace(2)* %in
; Load an aligned i8 value
; FUNC-LABEL: {{^}}load_const_i8_aligned:
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%0 = load i8 addrspace(2)* %in
; Load an un-aligned i8 value
; FUNC-LABEL: {{^}}load_const_i8_unaligned:
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%0 = getelementptr i8 addrspace(2)* %in, i32 1
; R600-CHECK: 16
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
%0 = load i16 addrspace(2)* %in
; Load an aligned i16 value
; FUNC-LABEL: {{^}}load_const_i16_aligned:
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
%0 = load i16 addrspace(2)* %in
; Load an un-aligned i16 value
; FUNC-LABEL: {{^}}load_const_i16_unaligned:
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
%0 = getelementptr i16 addrspace(2)* %in, i32 1
; FUNC-LABEL: {{^}}load_const_addrspace_i32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+; SI-CHECK: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%0 = load i32 addrspace(2)* %in
; FUNC-LABEL: {{^}}load_const_addrspace_f32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+; SI-CHECK: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
%1 = load float addrspace(2)* %in
store float %1, float addrspace(1)* %out
; Load an i8 value from the local address space.
; FUNC-LABEL: {{^}}load_i8_local:
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
%1 = load i8 addrspace(3)* %in
%2 = zext i8 %1 to i32
; FUNC-LABEL: {{^}}load_i8_sext_local:
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
%0 = load i8 addrspace(3)* %in
; FUNC-LABEL: {{^}}load_v2i8_local:
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
%0 = load <2 x i8> addrspace(3)* %in
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
%0 = load <2 x i8> addrspace(3)* %in
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
%0 = load <4 x i8> addrspace(3)* %in
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
%0 = load <4 x i8> addrspace(3)* %in
; Load an i16 value from the local address space.
; FUNC-LABEL: {{^}}load_i16_local:
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
%0 = load i16 addrspace(3)* %in
; FUNC-LABEL: {{^}}load_i16_sext_local:
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
%0 = load i16 addrspace(3)* %in
; FUNC-LABEL: {{^}}load_v2i16_local:
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
%0 = load <2 x i16> addrspace(3)* %in
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
%0 = load <2 x i16> addrspace(3)* %in
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
%0 = load <4 x i16> addrspace(3)* %in
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
%0 = load <4 x i16> addrspace(3)* %in
; load an i32 value from the local address space.
; FUNC-LABEL: {{^}}load_i32_local:
; R600-CHECK: LDS_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
; load a f32 value from the local address space.
; FUNC-LABEL: {{^}}load_f32_local:
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
%0 = load float addrspace(3)* %in
; FUNC-LABEL: {{^}}load_v2f32_local:
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b64
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
entry:
%0 = load <2 x float> addrspace(3)* %in
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
-; SI-CHECK-DAG: DS_READ_B32
-; SI-CHECK-DAG: DS_READ2_B32
+; SI-CHECK-DAG: ds_read_b32
+; SI-CHECK-DAG: ds_read2_b32
define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
%scalar = load i32 addrspace(3)* %in
%tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
; On SI we need to make sure that the base offset is a register and not
; an immediate.
; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
-; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0
-; SI-CHECK: DS_READ_B32 v0, v[[ZERO]] offset:4
+; SI-CHECK: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
+; SI-CHECK: ds_read_b32 v0, v[[ZERO]] offset:4
; R600-CHECK: LDS_READ_RET
define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
; EG-CHECK: {{^}}load_v2i32:
; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
; SI-CHECK: {{^}}load_v2i32:
-; SI-CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%a = load <2 x i32> addrspace(1) * %in
store <2 x i32> %a, <2 x i32> addrspace(1)* %out
; EG-CHECK: {{^}}load_v4i32:
; EG-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
; SI-CHECK: {{^}}load_v4i32:
-; SI-CHECK: BUFFER_LOAD_DWORDX4 v[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%a = load <4 x i32> addrspace(1) * %in
store <4 x i32> %a, <4 x i32> addrspace(1)* %out
; load a f64 value from the global address space.
; CHECK-LABEL: {{^}}load_f64:
-; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%1 = load double addrspace(1)* %in
store double %1, double addrspace(1)* %out
}
; CHECK-LABEL: {{^}}load_i64:
-; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tmp = load i64 addrspace(1)* %in
store i64 %tmp, i64 addrspace(1)* %out, align 8
; Load a f64 value from the constant address space.
; CHECK-LABEL: {{^}}load_const_addrspace_f64:
-; CHECK: S_LOAD_DWORDX2 s[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
%1 = load double addrspace(2)* %in
store double %1, double addrspace(1)* %out
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
; BOTH-LABEL: {{^}}local_i32_load
-; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
-; BOTH: BUFFER_STORE_DWORD [[REG]],
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
+; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%gep = getelementptr i32 addrspace(3)* %in, i32 7
%val = load i32 addrspace(3)* %gep, align 4
}
; BOTH-LABEL: {{^}}local_i32_load_0_offset
-; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
-; BOTH: BUFFER_STORE_DWORD [[REG]],
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%val = load i32 addrspace(3)* %in, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
; BOTH-NOT: ADD
-; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
-; BOTH: BUFFER_STORE_BYTE [[REG]],
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
+; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65535
%val = load i8 addrspace(3)* %gep, align 4
; BOTH-LABEL: {{^}}local_i8_load_over_i16_max_offset:
; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
; SI, which is why it is being OR'd with the base pointer.
-; SI: S_OR_B32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
-; CI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
-; BOTH: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
-; BOTH: BUFFER_STORE_BYTE [[REG]],
+; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
+; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
+; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
+; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65536
%val = load i8 addrspace(3)* %gep, align 4
; BOTH-LABEL: {{^}}local_i64_load:
; BOTH-NOT: ADD
-; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
-; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%gep = getelementptr i64 addrspace(3)* %in, i32 7
%val = load i64 addrspace(3)* %gep, align 8
}
; BOTH-LABEL: {{^}}local_i64_load_0_offset
-; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
-; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%val = load i64 addrspace(3)* %in, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
; BOTH-LABEL: {{^}}local_f64_load:
; BOTH-NOT: ADD
-; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
-; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%gep = getelementptr double addrspace(3)* %in, i32 7
%val = load double addrspace(3)* %gep, align 8
}
; BOTH-LABEL: {{^}}local_f64_load_0_offset
-; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
-; BOTH: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%val = load double addrspace(3)* %in, align 8
store double %val, double addrspace(1)* %out, align 8
; BOTH-LABEL: {{^}}local_i64_store:
; BOTH-NOT: ADD
-; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
%gep = getelementptr i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
; BOTH-LABEL: {{^}}local_i64_store_0_offset:
; BOTH-NOT: ADD
-; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
store i64 1234, i64 addrspace(3)* %out, align 8
ret void
; BOTH-LABEL: {{^}}local_f64_store:
; BOTH-NOT: ADD
-; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
define void @local_f64_store(double addrspace(3)* %out) nounwind {
%gep = getelementptr double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
}
; BOTH-LABEL: {{^}}local_f64_store_0_offset
-; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
store double 20.0, double addrspace(3)* %out, align 8
ret void
; BOTH-LABEL: {{^}}local_v2i64_store:
; BOTH-NOT: ADD
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
-; BOTH: S_ENDPGM
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
+; BOTH: s_endpgm
define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
; BOTH-NOT: ADD
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
-; BOTH: S_ENDPGM
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH: s_endpgm
define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
ret void
; BOTH-LABEL: {{^}}local_v4i64_store:
; BOTH-NOT: ADD
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
-; BOTH: S_ENDPGM
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
+; BOTH: s_endpgm
define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
; BOTH-NOT: ADD
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
-; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
-; BOTH: S_ENDPGM
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
+; BOTH: s_endpgm
define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
ret void
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
; EG: LDS_WRXCHG_RET *
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
; EG: LDS_WRXCHG_RET *
-; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
; EG: LDS_ADD_RET *
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
; EG: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
; EG: LDS_ADD_RET *
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
; EG: LDS_ADD_RET *
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
; EG: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32
-; SI: S_ENDPGM
+; SI: ds_sub_rtn_u32
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
; EG: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
; EG: LDS_SUB_RET *
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
; EG: LDS_SUB_RET *
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
; EG: LDS_AND_RET *
-; SI: DS_AND_RTN_B32
-; SI: S_ENDPGM
+; SI: ds_and_rtn_b32
+; SI: s_endpgm
define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
; EG: LDS_AND_RET *
-; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
; EG: LDS_OR_RET *
-; SI: DS_OR_RTN_B32
-; SI: S_ENDPGM
+; SI: ds_or_rtn_b32
+; SI: s_endpgm
define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
; EG: LDS_OR_RET *
-; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
; EG: LDS_XOR_RET *
-; SI: DS_XOR_RTN_B32
-; SI: S_ENDPGM
+; SI: ds_xor_rtn_b32
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
; EG: LDS_XOR_RET *
-; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
; EG: LDS_MIN_INT_RET *
-; SI: DS_MIN_RTN_I32
-; SI: S_ENDPGM
+; SI: ds_min_rtn_i32
+; SI: s_endpgm
define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
; EG: LDS_MIN_INT_RET *
-; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
; EG: LDS_MAX_INT_RET *
-; SI: DS_MAX_RTN_I32
-; SI: S_ENDPGM
+; SI: ds_max_rtn_i32
+; SI: s_endpgm
define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
; EG: LDS_MAX_INT_RET *
-; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
; EG: LDS_MIN_UINT_RET *
-; SI: DS_MIN_RTN_U32
-; SI: S_ENDPGM
+; SI: ds_min_rtn_u32
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
; EG: LDS_MIN_UINT_RET *
-; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
; EG: LDS_MAX_UINT_RET *
-; SI: DS_MAX_RTN_U32
-; SI: S_ENDPGM
+; SI: ds_max_rtn_u32
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
; EG: LDS_MAX_UINT_RET *
-; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: S_ENDPGM
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: s_endpgm
define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
-; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_ADD_U32 [[VPTR]], [[DATA]] [M0]
-; SI: S_ENDPGM
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
+; SI: s_endpgm
define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
-; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
-; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
-; SI: S_ENDPGM
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; SI: s_endpgm
define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_U32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
-; SI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}}
-; CI: DS_INC_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
+; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
-; SI: DS_SUB_U32
-; SI: S_ENDPGM
+; SI: ds_sub_u32
+; SI: s_endpgm
define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
-; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]]
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
+; SI: s_endpgm
define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_U32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: S_ENDPGM
+; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
+; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
-; SI: DS_AND_B32
-; SI: S_ENDPGM
+; SI: ds_and_b32
+; SI: s_endpgm
define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
-; SI: DS_AND_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
-; SI: DS_OR_B32
-; SI: S_ENDPGM
+; SI: ds_or_b32
+; SI: s_endpgm
define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
-; SI: DS_OR_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
-; SI: DS_XOR_B32
-; SI: S_ENDPGM
+; SI: ds_xor_b32
+; SI: s_endpgm
define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
-; SI: DS_XOR_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
; }
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
-; SI: DS_MIN_I32
-; SI: S_ENDPGM
+; SI: ds_min_i32
+; SI: s_endpgm
define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
-; SI: DS_MIN_I32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
-; SI: DS_MAX_I32
-; SI: S_ENDPGM
+; SI: ds_max_i32
+; SI: s_endpgm
define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
-; SI: DS_MAX_I32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
-; SI: DS_MIN_U32
-; SI: S_ENDPGM
+; SI: ds_min_u32
+; SI: s_endpgm
define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
-; SI: DS_MIN_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
-; SI: DS_MAX_U32
-; SI: S_ENDPGM
+; SI: ds_max_u32
+; SI: s_endpgm
define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
-; SI: DS_MAX_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: S_ENDPGM
+; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
-; SI: DS_WRXCHG_RTN_B64
-; SI: S_ENDPGM
+; SI: ds_wrxchg_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
-; SI: DS_WRXCHG_RTN_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
-; SI: DS_ADD_RTN_U64
-; SI: S_ENDPGM
+; SI: ds_add_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
-; SI: DS_INC_RTN_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_inc_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
-; SI: DS_SUB_RTN_U64
-; SI: S_ENDPGM
+; SI: ds_sub_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
-; SI: DS_SUB_RTN_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_sub_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
-; SI: DS_DEC_RTN_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_dec_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
-; SI: DS_AND_RTN_B64
-; SI: S_ENDPGM
+; SI: ds_and_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
-; SI: DS_AND_RTN_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_and_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
-; SI: DS_OR_RTN_B64
-; SI: S_ENDPGM
+; SI: ds_or_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
-; SI: DS_OR_RTN_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_or_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
-; SI: DS_XOR_RTN_B64
-; SI: S_ENDPGM
+; SI: ds_xor_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
-; SI: DS_XOR_RTN_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_xor_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
; }
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
-; SI: DS_MIN_RTN_I64
-; SI: S_ENDPGM
+; SI: ds_min_rtn_i64
+; SI: s_endpgm
define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
-; SI: DS_MIN_RTN_I64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_min_rtn_i64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
-; SI: DS_MAX_RTN_I64
-; SI: S_ENDPGM
+; SI: ds_max_rtn_i64
+; SI: s_endpgm
define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
-; SI: DS_MAX_RTN_I64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_max_rtn_i64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
-; SI: DS_MIN_RTN_U64
-; SI: S_ENDPGM
+; SI: ds_min_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
-; SI: DS_MIN_RTN_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_min_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
-; SI: DS_MAX_RTN_U64
-; SI: S_ENDPGM
+; SI: ds_max_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
-; SI: DS_MAX_RTN_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_max_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
-; SI: DS_WRXCHG_RTN_B64
-; SI: S_ENDPGM
+; SI: ds_wrxchg_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
-; SI: DS_WRXCHG_RTN_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
-; SI: DS_ADD_U64
-; SI: S_ENDPGM
+; SI: ds_add_u64
+; SI: s_endpgm
define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: DS_ADD_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
-; SI: S_ENDPGM
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; SI: s_endpgm
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_INC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: S_ENDPGM
+; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: s_endpgm
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
-; SI: DS_INC_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_inc_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
-; SI: DS_SUB_U64
-; SI: S_ENDPGM
+; SI: ds_sub_u64
+; SI: s_endpgm
define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
-; SI: DS_SUB_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_sub_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_DEC_U64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: S_ENDPGM
+; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
+; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: s_endpgm
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
-; SI: DS_DEC_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_dec_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
-; SI: DS_AND_B64
-; SI: S_ENDPGM
+; SI: ds_and_b64
+; SI: s_endpgm
define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
-; SI: DS_AND_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_and_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
-; SI: DS_OR_B64
-; SI: S_ENDPGM
+; SI: ds_or_b64
+; SI: s_endpgm
define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
-; SI: DS_OR_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_or_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
-; SI: DS_XOR_B64
-; SI: S_ENDPGM
+; SI: ds_xor_b64
+; SI: s_endpgm
define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
-; SI: DS_XOR_B64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_xor_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
; }
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
-; SI: DS_MIN_I64
-; SI: S_ENDPGM
+; SI: ds_min_i64
+; SI: s_endpgm
define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
-; SI: DS_MIN_I64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_min_i64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
-; SI: DS_MAX_I64
-; SI: S_ENDPGM
+; SI: ds_max_i64
+; SI: s_endpgm
define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
-; SI: DS_MAX_I64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_max_i64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
-; SI: DS_MIN_U64
-; SI: S_ENDPGM
+; SI: ds_min_u64
+; SI: s_endpgm
define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
-; SI: DS_MIN_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_min_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
-; SI: DS_MAX_U64
-; SI: S_ENDPGM
+; SI: ds_max_u64
+; SI: s_endpgm
define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
-; SI: DS_MAX_U64 {{.*}} offset:32
-; SI: S_ENDPGM
+; SI: ds_max_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
; this consistently on evergreen GPUs.
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
-; SI-CHECK: DS_WRITE_B32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
-; SI-CHECK-NOT: DS_WRITE_B32 {{v[0-9]*}}, v[[ADDRW]]
+; SI-CHECK: ds_write_b32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
+; SI-CHECK-NOT: ds_write_b32 {{v[0-9]*}}, v[[ADDRW]]
; GROUP_BARRIER must be the last instruction in a clause
; EG-CHECK: GROUP_BARRIER
; constant offsets.
; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI: V_ADD_I32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
-; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]] [M0]
-; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
-; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]] [M0]
+; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0]
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
; CI-NEXT: .long 32768
; EG: LDS_WRITE
-; SI-NOT: S_WQM_B64
-; SI: DS_WRITE_B32
+; SI-NOT: s_wqm_b64
+; SI: ds_write_b32
; GROUP_BARRIER must be the last instruction in a clause
; EG: GROUP_BARRIER
; EG-NEXT: ALU clause
-; SI: S_BARRIER
+; SI: s_barrier
; EG: LDS_READ_RET
-; SI: DS_READ_B32 {{v[0-9]+}},
+; SI: ds_read_b32 {{v[0-9]+}},
define void @local_memory(i32 addrspace(1)* %out) {
entry:
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1
+;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test(i32 %p) {
%i = mul i32 %p, 2
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1
+;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test(i32 %p) {
%i = udiv i32 %p, 2
declare float @llvm.fabs.f32(float) #0
; FUNC-LABEL: {{^}}mad_sub_f32:
-; SI: BUFFER_LOAD_DWORD [[REGA:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGB:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGC:v[0-9]+]]
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
+; SI: buffer_store_dword [[RESULT]]
define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}mad_sub_inv_f32:
-; SI: BUFFER_LOAD_DWORD [[REGA:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGB:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGC:v[0-9]+]]
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
+; SI: buffer_store_dword [[RESULT]]
define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}mad_sub_f64:
-; SI: V_MUL_F64
-; SI: V_ADD_F64
+; SI: v_mul_f64
+; SI: v_add_f64
define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}mad_sub_fabs_f32:
-; SI: BUFFER_LOAD_DWORD [[REGA:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGB:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGC:v[0-9]+]]
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
+; SI: buffer_store_dword [[RESULT]]
define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}mad_sub_fabs_inv_f32:
-; SI: BUFFER_LOAD_DWORD [[REGA:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGB:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGC:v[0-9]+]]
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
+; SI: buffer_store_dword [[RESULT]]
define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}neg_neg_mad_f32:
-; SI: V_MAD_F32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}mad_fabs_sub_f32:
-; SI: BUFFER_LOAD_DWORD [[REGA:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGB:v[0-9]+]]
-; SI: BUFFER_LOAD_DWORD [[REGC:v[0-9]+]]
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
+; SI: buffer_store_dword [[RESULT]]
define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
}
; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
-; SI-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; SI: buffer_store_dword [[RESULT]]
define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
}
; FUNC-LABEL: {{^}}fsub_fadd_a_a_c:
-; SI-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
-; SI: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
+; SI: buffer_store_dword [[RESULT]]
define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
; Make sure we aren't masking the inputs.
; CM-NOT: AND
; CM: MULADD_INT24
-; SI-NOT: AND
-; SI: V_MAD_I32_I24
+; SI-NOT: and
+; SI: v_mad_i32_i24
define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = shl i32 %a, 8
; FUNC-LABEL: {{^}}u32_mad24:
; EG: MULADD_UINT24
-; SI: V_MAD_U32_U24
+; SI: v_mad_u32_u24
define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
; EG: 16
-; SI: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16
define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
entry:
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
; EG: 8
-; SI: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
+; SI: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
entry:
; Check that the select instruction is not deleted.
; FUNC-LABEL: {{^}}i24_i32_i32_mad:
; EG: CNDE_INT
-; SI: V_CNDMASK
+; SI: v_cndmask
define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
%0 = ashr i32 %a, 8
; resulting in losing the store to gptr
; FUNC-LABEL: {{^}}missing_store_reduced:
-; SI: DS_READ_B64
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: ds_read_b64
+; SI: buffer_store_dword
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
%ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
; MUBUF load with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: {{^}}mubuf_load0:
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x30,0xe0
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1
; MUBUF load with the largest possible immediate offset
; CHECK-LABEL: {{^}}mubuf_load1:
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x20,0xe0
+; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x20,0xe0
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = getelementptr i8 addrspace(1)* %in, i64 4095
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: {{^}}mubuf_load2:
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1024
; MUBUF load with a 12-bit immediate offset and a register offset
; CHECK-LABEL: {{^}}mubuf_load3:
; CHECK-NOT: ADD
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 %offset
; MUBUF store with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: {{^}}mubuf_store0:
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x70,0xe0
define void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1
; MUBUF store with the largest possible immediate offset
; CHECK-LABEL: {{^}}mubuf_store1:
-; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x60,0xe0
+; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x60,0xe0
define void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: {{^}}mubuf_store2:
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
define void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1024
; MUBUF store with a 12-bit immediate offset and a register offset
; CHECK-LABEL: {{^}}mubuf_store3:
; CHECK-NOT: ADD
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x70,0xe0
define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 %offset
}
; CHECK-LABEL: {{^}}store_sgpr_ptr:
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
store i32 99, i32 addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x28
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x28
define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
%out.gep = getelementptr i32 addrspace(1)* %out, i32 10
store i32 99, i32 addrspace(1)* %out.gep, align 4
}
; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
%out.gep = getelementptr i32 addrspace(1)* %out, i32 32768
store i32 99, i32 addrspace(1)* %out.gep, align 4
}
; CHECK-LABEL: {{^}}store_vgpr_ptr:
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
}
; FUNC-LABEL: {{^}}s_trunc_i64_mul_to_i32:
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: S_MUL_I32
-; SI: BUFFER_STORE_DWORD
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_mul_i32
+; SI: buffer_store_dword
define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%mul = mul i64 %b, %a
%trunc = trunc i64 %mul to i32
}
; FUNC-LABEL: {{^}}v_trunc_i64_mul_to_i32:
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: V_MUL_LO_I32
-; SI: BUFFER_STORE_DWORD
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: v_mul_lo_i32
+; SI: buffer_store_dword
define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 8
%b = load i64 addrspace(1)* %bptr, align 8
; FUNC-LABEL: {{^}}mul64_sext_c:
; EG-DAG: MULLO_INT
; EG-DAG: MULHI_INT
-; SI-DAG: S_MUL_I32
-; SI-DAG: V_MUL_HI_I32
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_i32
define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
entry:
%0 = sext i32 %in to i64
; FUNC-LABEL: {{^}}v_mul64_sext_c:
; EG-DAG: MULLO_INT
; EG-DAG: MULHI_INT
-; SI-DAG: V_MUL_LO_I32
-; SI-DAG: V_MUL_HI_I32
-; SI: S_ENDPGM
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_mul_hi_i32
+; SI: s_endpgm
define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
%val = load i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
}
; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
-; SI-DAG: V_MUL_LO_I32 v{{[0-9]+}}, 9, v{{[0-9]+}}
-; SI-DAG: V_MUL_HI_I32 v{{[0-9]+}}, 9, v{{[0-9]+}}
-; SI: S_ENDPGM
+; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI: s_endpgm
define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
%val = load i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
}
; FUNC-LABEL: {{^}}s_mul_i32:
-; SI: S_LOAD_DWORD [[SRC0:s[0-9]+]],
-; SI: S_LOAD_DWORD [[SRC1:s[0-9]+]],
-; SI: S_MUL_I32 [[SRESULT:s[0-9]+]], [[SRC0]], [[SRC1]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; SI: s_load_dword [[SRC0:s[0-9]+]],
+; SI: s_load_dword [[SRC1:s[0-9]+]],
+; SI: s_mul_i32 [[SRESULT:s[0-9]+]], [[SRC0]], [[SRC1]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%mul = mul i32 %a, %b
store i32 %mul, i32 addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}v_mul_i32:
-; SI: V_MUL_LO_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
}
; FUNC-LABEL: {{^}}v_mul_i64:
-; SI: V_MUL_LO_I32
+; SI: v_mul_lo_i32
define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%b = load i64 addrspace(1)* %bptr, align 8
}
; FUNC-LABEL: {{^}}mul32_in_branch:
-; SI: S_MUL_I32
+; SI: s_mul_i32
define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
entry:
%0 = icmp eq i32 %a, 0
}
; FUNC-LABEL: {{^}}mul64_in_branch:
-; SI-DAG: S_MUL_I32
-; SI-DAG: V_MUL_HI_U32
-; SI: S_ENDPGM
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_u32
+; SI: s_endpgm
define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
; Make sure we are not masking the inputs
; CM-NOT: AND
; CM: MUL_INT24
-; SI-NOT: AND
-; SI: V_MUL_I32_I24
+; SI-NOT: and
+; SI: v_mul_i32_i24
define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
; FUNC-LABEL: {{^}}u32_mul24:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
; EG: 16
-; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16
+; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%0 = mul i16 %a, %b
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
+; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
entry:
; FUNC_LABEL: {{^}}mul24_i64:
; EG; MUL_UINT24
; EG: MULHI
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
; FIXME: SI support 24-bit mulhi
-; SI: V_MUL_HI_U32
+; SI: v_mul_hi_u32
define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = shl i64 %a, 40
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, 0xaaaaaaab
-;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
-;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
+;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
+;CHECK: v_mul_hi_u32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
+;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
define void @test(i32 %p) {
%i = udiv i32 %p, 3
; Make sure there isn't an extra space between the instruction name and first operands.
; SI-LABEL: {{^}}add_f32:
-; SI-DAG: S_LOAD_DWORD [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
-; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
+; SI: buffer_store_dword [[RESULT]],
define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
%result = fadd float %a, %b
store float %result, float addrspace(1)* %out
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI-LABEL: {{^}}or_v2i32:
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI-LABEL: {{^}}or_v4i32:
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
}
; SI-LABEL: {{^}}scalar_or_i32:
-; SI: S_OR_B32
+; SI: s_or_b32
define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%or = or i32 %a, %b
store i32 %or, i32 addrspace(1)* %out
}
; SI-LABEL: {{^}}vector_or_i32:
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
%loada = load i32 addrspace(1)* %a
%or = or i32 %loada, %b
}
; SI-LABEL: {{^}}scalar_or_literal_i32:
-; SI: S_OR_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
%or = or i32 %a, 99999
store i32 %or, i32 addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}vector_or_literal_i32:
-; SI: V_OR_B32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
%loada = load i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 65535
}
; SI-LABEL: {{^}}vector_or_inline_immediate_i32:
-; SI: V_OR_B32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
%loada = load i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 4
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
; SI-LABEL: {{^}}scalar_or_i64:
-; SI: S_OR_B64
+; SI: s_or_b64
define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%or = or i64 %a, %b
store i64 %or, i64 addrspace(1)* %out
}
; SI-LABEL: {{^}}vector_or_i64:
-; SI: V_OR_B32_e32 v{{[0-9]}}
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64 addrspace(1)* %a, align 8
%loadb = load i64 addrspace(1)* %a, align 8
}
; SI-LABEL: {{^}}scalar_vector_or_i64:
-; SI: V_OR_B32_e32 v{{[0-9]}}
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
%loada = load i64 addrspace(1)* %a
%or = or i64 %loada, %b
}
; SI-LABEL: {{^}}vector_or_i64_loadimm:
-; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
-; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f
-; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: S_ENDPGM
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x146f
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 22470723082367
; FIXME: The or 0 should really be removed.
; SI-LABEL: {{^}}vector_or_i64_imm:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI: V_OR_B32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
-; SI: V_OR_B32_e32 {{v[0-9]+}}, 0, {{.*}}
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
+; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
+; SI: s_endpgm
define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 8
}
; SI-LABEL: {{^}}trunc_i64_or_to_i32:
-; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
-; SI: S_OR_B32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: s_load_dword s[[SREG0:[0-9]+]]
+; SI: s_load_dword s[[SREG1:[0-9]+]]
+; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = or i64 %b, %a
%trunc = trunc i64 %add to i32
; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
; SI-CHECK: {{^}}or_i1:
-; SI-CHECK: S_OR_B64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float addrspace(1) * %in0
%b = load float addrspace(1) * %in1
; R600: LDS_READ
; R600: LDS_READ
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
; FUNC-LABEL: {{^}}multiple_structs:
; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
+; SI-NOT: v_movrel
%struct.point = type { i32, i32 }
define void @multiple_structs(i32 addrspace(1)* %out) {
; FUNC-LABEL: {{^}}direct_loop:
; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
; R600: MOVA_INT
-; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
-; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0
-; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0
+; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
; R600: MOVA_INT
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x1 ; encoding: [0x01,0x10,0x60,0xe0
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x1 ; encoding: [0x01,0x10,0x60,0xe0
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-; SI-NOT: V_MOV_B32_e{{(32|64)}} v0
+; SI-NOT: v_mov_b32_e{{(32|64)}} v0
define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
; R600_CHECK: MOV
; R600_CHECK: [[CHAN:[XYZW]]]+
; R600-NOT: [[CHAN]]+
-; SI: V_MOV_B32_e32 v3
+; SI: v_mov_b32_e32 v3
define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [3 x i8], align 1
; finds one, it should stop trying to promote.
; FUNC-LABEL: ptrtoint:
-; SI-NOT: DS_WRITE
-; SI: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
-; SI: BUFFER_LOAD_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x5
+; SI-NOT: ds_write
+; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x5
define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%alloca = alloca [16 x i32]
%tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <2 x double> addrspace(1)* %x, align 16
%tmp4 = load <2 x double> addrspace(1)* %y, align 16
}
; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
-; SI: DS_READ_B64
-; SI: DS_READ_B64
-; SI: DS_WRITE_B64
-; SI: DS_WRITE_B64
-; SI: S_ENDPGM
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: s_endpgm
define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x double> addrspace(3)* %x, align 16
%tmp4 = load <2 x double> addrspace(3)* %y, align 16
}
; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <8 x i32> addrspace(1)* %x, align 32
%tmp4 = load <8 x i32> addrspace(1)* %y, align 32
}
; SI-LABEL: {{^}}no_reorder_extload_64:
-; SI: DS_READ_B64
-; SI: DS_READ_B64
-; SI: DS_WRITE_B64
-; SI-NOT: DS_READ
-; SI: DS_WRITE_B64
-; SI: S_ENDPGM
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_write_b64
+; SI-NOT: ds_read
+; SI: ds_write_b64
+; SI: s_endpgm
define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x i32> addrspace(3)* %x, align 8
%tmp4 = load <2 x i32> addrspace(3)* %y, align 8
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}s_rotl_i64:
-; SI-DAG: S_LSHL_B64
-; SI-DAG: S_SUB_I32
-; SI-DAG: S_LSHR_B64
-; SI: S_OR_B64
-; SI: S_ENDPGM
+; SI-DAG: s_lshl_b64
+; SI-DAG: s_sub_i32
+; SI-DAG: s_lshr_b64
+; SI: s_or_b64
+; SI: s_endpgm
define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
entry:
%0 = shl i64 %x, %y
}
; FUNC-LABEL: {{^}}v_rotl_i64:
-; SI-DAG: V_LSHL_B64
-; SI-DAG: V_SUB_I32
-; SI: V_LSHR_B64
-; SI: V_OR_B32
-; SI: V_OR_B32
-; SI: S_ENDPGM
+; SI-DAG: v_lshl_b64
+; SI-DAG: v_sub_i32
+; SI: v_lshr_b64
+; SI: v_or_b32
+; SI: v_or_b32
+; SI: s_endpgm
define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
%x = load i64 addrspace(1)* %xptr, align 8
; R600-NEXT: 32
; R600: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
-; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
-; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
+; SI: s_sub_i32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
+; SI: v_mov_b32_e32 [[VDST:v[0-9]+]], [[SDST]]
+; SI: v_alignbit_b32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%0 = shl i32 %x, %y
}
; FUNC-LABEL: {{^}}rotl_v2i32:
-; SI-DAG: S_SUB_I32
-; SI-DAG: S_SUB_I32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: V_ALIGNBIT_B32
-; SI: S_ENDPGM
+; SI-DAG: s_sub_i32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI: s_endpgm
define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
entry:
%0 = shl <2 x i32> %x, %y
}
; FUNC-LABEL: {{^}}rotl_v4i32:
-; SI-DAG: S_SUB_I32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: S_SUB_I32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: S_SUB_I32
-; SI-DAG: V_ALIGNBIT_B32
-; SI-DAG: S_SUB_I32
-; SI-DAG: V_ALIGNBIT_B32
-; SI: S_ENDPGM
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI: s_endpgm
define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
entry:
%0 = shl <4 x i32> %x, %y
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}s_rotr_i64:
-; SI-DAG: S_SUB_I32
-; SI-DAG: S_LSHR_B64
-; SI-DAG: S_LSHL_B64
-; SI: S_OR_B64
+; SI-DAG: s_sub_i32
+; SI-DAG: s_lshr_b64
+; SI-DAG: s_lshl_b64
+; SI: s_or_b64
define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
entry:
%tmp0 = sub i64 64, %y
}
; FUNC-LABEL: {{^}}v_rotr_i64:
-; SI-DAG: V_SUB_I32
-; SI-DAG: V_LSHR_B64
-; SI-DAG: V_LSHL_B64
-; SI: V_OR_B32
-; SI: V_OR_B32
+; SI-DAG: v_sub_i32
+; SI-DAG: v_lshr_b64
+; SI-DAG: v_lshl_b64
+; SI: v_or_b32
+; SI: v_or_b32
define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
%x = load i64 addrspace(1)* %xptr, align 8
; FUNC-LABEL: {{^}}rotr_i32:
; R600: BIT_ALIGN_INT
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%tmp0 = sub i32 32, %y
; R600: BIT_ALIGN_INT
; R600: BIT_ALIGN_INT
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
entry:
%tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
; R600: BIT_ALIGN_INT
; R600: BIT_ALIGN_INT
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
entry:
%tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
declare double @llvm.sqrt.f64(double) nounwind readnone
; SI-LABEL: {{^}}rsq_f32:
-; SI: V_RSQ_F32_e32
-; SI: S_ENDPGM
+; SI: v_rsq_f32_e32
+; SI: s_endpgm
define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
}
; SI-LABEL: {{^}}rsq_f64:
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE: V_SQRT_F64_e32
-; SI: S_ENDPGM
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE: v_sqrt_f64_e32
+; SI: s_endpgm
define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
}
; SI-LABEL: {{^}}rsq_f32_sgpr:
-; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; SI: S_ENDPGM
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; SI: s_endpgm
define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt
}
; FUNC-LABEL: {{^}}v_saddo_i64:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
; CHECK-LABEL: {{^}}mubuf:
-; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
-; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+; Make sure we aren't using VGPRs for the source operand of s_mov_b64
+; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
; instructions
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #1
; Test moving an SMRD instruction to the VALU
; CHECK-LABEL: {{^}}smrd_valu:
-; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]]
-; CHECK: BUFFER_STORE_DWORD [[OUT]]
+; CHECK: buffer_load_dword [[OUT:v[0-9]+]]
+; CHECK: buffer_store_dword [[OUT]]
define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
entry:
; Test moving ann SMRD with an immediate offset to the VALU
; CHECK-LABEL: {{^}}smrd_valu2:
-; CHECK: BUFFER_LOAD_DWORD
+; CHECK: buffer_load_dword
define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
}
; CHECK-LABEL: {{^}}s_load_imm_v8i32:
-; CHECK: BUFFER_LOAD_DWORDX4
-; CHECK: BUFFER_LOAD_DWORDX4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
}
; CHECK-LABEL: {{^}}s_load_imm_v16i32:
-; CHECK: BUFFER_LOAD_DWORDX4
-; CHECK: BUFFER_LOAD_DWORDX4
-; CHECK: BUFFER_LOAD_DWORDX4
-; CHECK: BUFFER_LOAD_DWORDX4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
; FUNC-LABEL: {{^}}scalar_to_vector_v2i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: s_endpgm
define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tmp1 = load i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %tmp1 to <2 x i16>
}
; FUNC-LABEL: {{^}}scalar_to_vector_v2f32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: s_endpgm
define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tmp1 = load float addrspace(1)* %in, align 4
%bc = bitcast float %tmp1 to <2 x i16>
; ordering the loads so that the lower address loads come first.
; FUNC-LABEL: {{^}}cluster_global_arg_loads:
-; SI-DAG: BUFFER_LOAD_DWORD [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; SI-DAG: BUFFER_LOAD_DWORD [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x4
-; SI: BUFFER_STORE_DWORD [[REG0]]
-; SI: BUFFER_STORE_DWORD [[REG1]]
+; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x4
+; SI: buffer_store_dword [[REG0]]
+; SI: buffer_store_dword [[REG1]]
define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
%load0 = load i32 addrspace(1)* %ptr, align 4
%gep = getelementptr i32 addrspace(1)* %ptr, i32 1
; Test for a crach in SIInstrInfo::areLoadsFromSameBasePtr() when checking
; an MUBUF load which does not have a vaddr operand.
; FUNC-LABEL: {{^}}same_base_ptr_crash:
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
entry:
%out1 = getelementptr i32 addrspace(1)* %out, i32 %offset
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; FUNC-LABEL: {{^}}cluster_arg_loads:
-; SI: S_LOAD_DWORDX2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI-NEXT: S_LOAD_DWORDX2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NEXT: S_LOAD_DWORD s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-NEXT: S_LOAD_DWORD s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
store i32 %x, i32 addrspace(1)* %out0, align 4
store i32 %y, i32 addrspace(1)* %out1, align 4
; working.
; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
-; SI: V_MUL_HI_I32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
-; SI: V_ADD_I32
-; SI: V_LSHRREV_B32
-; SI: V_ASHRREV_I32
-; SI: V_ADD_I32
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
+; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
+; SI: v_add_i32
+; SI: v_lshrrev_b32
+; SI: v_ashrrev_i32
+; SI: v_add_i32
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32 addrspace(1) * %in
%result = sdiv i32 %num, 3435
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}sdiv24_i8:
-; SI: V_CVT_F32_I32
-; SI: V_CVT_F32_I32
-; SI: V_RCP_F32
-; SI: V_CVT_I32_F32
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
; EG: INT_TO_FLT
; EG-DAG: INT_TO_FLT
}
; FUNC-LABEL: {{^}}sdiv24_i16:
-; SI: V_CVT_F32_I32
-; SI: V_CVT_F32_I32
-; SI: V_RCP_F32
-; SI: V_CVT_I32_F32
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
; EG: INT_TO_FLT
; EG-DAG: INT_TO_FLT
}
; FUNC-LABEL: {{^}}sdiv24_i32:
-; SI: V_CVT_F32_I32
-; SI: V_CVT_F32_I32
-; SI: V_RCP_F32
-; SI: V_CVT_I32_F32
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
; EG: INT_TO_FLT
; EG-DAG: INT_TO_FLT
}
; FUNC-LABEL: {{^}}sdiv25_i32:
-; SI-NOT: V_CVT_F32_I32
-; SI-NOT: V_RCP_F32
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
}
; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
-; SI-NOT: V_CVT_F32_I32
-; SI-NOT: V_RCP_F32
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
}
; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
-; SI-NOT: V_CVT_F32_I32
-; SI-NOT: V_RCP_F32
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
}
; FUNC-LABEL: {{^}}srem24_i8:
-; SI: V_CVT_F32_I32
-; SI: V_CVT_F32_I32
-; SI: V_RCP_F32
-; SI: V_CVT_I32_F32
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
; EG: INT_TO_FLT
; EG-DAG: INT_TO_FLT
}
; FUNC-LABEL: {{^}}srem24_i16:
-; SI: V_CVT_F32_I32
-; SI: V_CVT_F32_I32
-; SI: V_RCP_F32
-; SI: V_CVT_I32_F32
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
; EG: INT_TO_FLT
; EG-DAG: INT_TO_FLT
}
; FUNC-LABEL: {{^}}srem24_i32:
-; SI: V_CVT_F32_I32
-; SI: V_CVT_F32_I32
-; SI: V_RCP_F32
-; SI: V_CVT_I32_F32
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
; EG: INT_TO_FLT
; EG-DAG: INT_TO_FLT
}
; FUNC-LABEL: {{^}}srem25_i32:
-; SI-NOT: V_CVT_F32_I32
-; SI-NOT: V_RCP_F32
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
}
; FUNC-LABEL: {{^}}test_no_srem24_i32_1:
-; SI-NOT: V_CVT_F32_I32
-; SI-NOT: V_RCP_F32
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
}
; FUNC-LABEL: {{^}}test_no_srem24_i32_2:
-; SI-NOT: V_CVT_F32_I32
-; SI-NOT: V_RCP_F32
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
; FIXME: This should go in existing select.ll test, except the current testcase there is broken on SI
; FUNC-LABEL: {{^}}select_i1:
-; SI: V_CNDMASK_B32
-; SI-NOT: V_CNDMASK_B32
+; SI: v_cndmask_b32
+; SI-NOT: v_cndmask_b32
define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i1 %a, i1 %b
; FUNC-LABEL: {{^}}select_v4i8:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
%cmp = icmp eq i8 %c, 0
%select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
}
; FUNC-LABEL: {{^}}select_v4i16:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
}
; FUNC-LABEL: {{^}}select_v2i32:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: BUFFER_STORE_DWORDX2
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_dwordx2
define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
}
; FUNC-LABEL: {{^}}select_v4i32:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: BUFFER_STORE_DWORDX4
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_dwordx4
define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
}
; FUNC-LABEL: {{^}}select_v8i32:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
}
; FUNC-LABEL: {{^}}select_v2f32:
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_store_dwordx2
define void @select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x float> %a, <2 x float> %b
}
; FUNC-LABEL: {{^}}select_v4f32:
-; SI: BUFFER_STORE_DWORDX4
+; SI: buffer_store_dwordx4
define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x float> %a, <4 x float> %b
}
; FUNC-LABEL: {{^}}select_v8f32:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x float> %a, <8 x float> %b
}
; FUNC-LABEL: {{^}}select_v2f64:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x double> %a, <2 x double> %b
}
; FUNC-LABEL: {{^}}select_v4f64:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x double> %a, <4 x double> %b
}
; FUNC-LABEL: {{^}}select_v8f64:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x double> %a, <8 x double> %b
; CHECK-LABEL: {{^}}select0:
; i64 select should be split into two i32 selects, and we shouldn't need
; to use a shfit to extract the hi dword of the input.
-; CHECK-NOT: S_LSHR_B64
-; CHECK: V_CNDMASK
-; CHECK: V_CNDMASK
+; CHECK-NOT: s_lshr_b64
+; CHECK: v_cndmask
+; CHECK: v_cndmask
define void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
entry:
%0 = icmp ugt i32 %cond, 5
}
; CHECK-LABEL: {{^}}select_trunc_i64:
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 0, i64 %in
}
; CHECK-LABEL: {{^}}select_trunc_i64_2:
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 %a, i64 %b
}
; CHECK-LABEL: {{^}}v_select_trunc_i64_2:
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
%a = load i64 addrspace(1)* %aptr, align 8
}
; FUNC-LABEL: {{^}}selectcc_bool:
-; SI: V_CMP_NE_I32
-; SI-NEXT: V_CNDMASK_B32_e64
-; SI-NOT: CMP
-; SI-NOT: CNDMASK
+; SI: v_cmp_ne_i32
+; SI-NEXT: v_cndmask_b32_e64
+; SI-NOT: cmp
+; SI-NOT: cndmask
define void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = select i1 %icmp0, i32 -1, i32 0
; EG: OR_INT
; EG: CNDE_INT
; EG: CNDE_INT
-; SI: V_CMP_EQ_I64
-; SI: V_CNDMASK
-; SI: V_CNDMASK
+; SI: v_cmp_eq_i64
+; SI: v_cndmask
+; SI: v_cndmask
define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
entry:
%0 = icmp eq i64 %lhs, %rhs
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; SI-LABEL: {{^}}sext_bool_icmp_ne:
-; SI: V_CMP_NE_I32
-; SI-NEXT: V_CNDMASK_B32
-; SI-NOT: V_CMP_NE_I32
-; SI-NOT: V_CNDMASK_B32
-; SI: S_ENDPGM
+; SI: v_cmp_ne_i32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_cmp_ne_i32
+; SI-NOT: v_cndmask_b32
+; SI: s_endpgm
define void @sext_bool_icmp_ne(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
; FUNC-LABEL: {{^}}f32_oeq:
; R600: SETE_DX10
-; SI: V_CMP_EQ_F32
+; SI: v_cmp_eq_f32
define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp oeq float %a, %b
; FUNC-LABEL: {{^}}f32_ogt:
; R600: SETGT_DX10
-; SI: V_CMP_GT_F32
+; SI: v_cmp_gt_f32
define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ogt float %a, %b
; FUNC-LABEL: {{^}}f32_oge:
; R600: SETGE_DX10
-; SI: V_CMP_GE_F32
+; SI: v_cmp_ge_f32
define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp oge float %a, %b
; FUNC-LABEL: {{^}}f32_olt:
; R600: SETGT_DX10
-; SI: V_CMP_LT_F32
+; SI: v_cmp_lt_f32
define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp olt float %a, %b
; FUNC-LABEL: {{^}}f32_ole:
; R600: SETGE_DX10
-; SI: V_CMP_LE_F32
+; SI: v_cmp_le_f32
define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ole float %a, %b
; R600-DAG: SETNE_DX10
; R600-DAG: AND_INT
; R600-DAG: SETNE_INT
-; SI: V_CMP_O_F32
-; SI: V_CMP_NEQ_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32
+; SI: v_cmp_o_f32
+; SI: v_cmp_neq_f32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_and_b32_e32
define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp one float %a, %b
; R600-DAG: SETE_DX10
; R600-DAG: AND_INT
; R600-DAG: SETNE_INT
-; SI: V_CMP_O_F32
+; SI: v_cmp_o_f32
define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ord float %a, %b
; R600-DAG: SETE_DX10
; R600-DAG: OR_INT
; R600-DAG: SETNE_INT
-; SI: V_CMP_U_F32
-; SI: V_CMP_EQ_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f32
+; SI: v_cmp_eq_f32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ueq float %a, %b
; FUNC-LABEL: {{^}}f32_ugt:
; R600: SETGE
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_GT_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f32
+; SI: v_cmp_gt_f32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ugt float %a, %b
; FUNC-LABEL: {{^}}f32_uge:
; R600: SETGT
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_GE_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f32
+; SI: v_cmp_ge_f32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp uge float %a, %b
; FUNC-LABEL: {{^}}f32_ult:
; R600: SETGE
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_LT_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f32
+; SI: v_cmp_lt_f32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ult float %a, %b
; FUNC-LABEL: {{^}}f32_ule:
; R600: SETGT
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_LE_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f32
+; SI: v_cmp_le_f32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ule float %a, %b
; FUNC-LABEL: {{^}}f32_une:
; R600: SETNE_DX10
-; SI: V_CMP_NEQ_F32
+; SI: v_cmp_neq_f32
define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp une float %a, %b
; R600: SETNE_DX10
; R600: OR_INT
; R600: SETNE_INT
-; SI: V_CMP_U_F32
+; SI: v_cmp_u_f32
define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp uno float %a, %b
; FUNC-LABEL: {{^}}i32_eq:
; R600: SETE_INT
-; SI: V_CMP_EQ_I32
+; SI: v_cmp_eq_i32
define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp eq i32 %a, %b
; FUNC-LABEL: {{^}}i32_ne:
; R600: SETNE_INT
-; SI: V_CMP_NE_I32
+; SI: v_cmp_ne_i32
define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ne i32 %a, %b
; FUNC-LABEL: {{^}}i32_ugt:
; R600: SETGT_UINT
-; SI: V_CMP_GT_U32
+; SI: v_cmp_gt_u32
define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ugt i32 %a, %b
; FUNC-LABEL: {{^}}i32_uge:
; R600: SETGE_UINT
-; SI: V_CMP_GE_U32
+; SI: v_cmp_ge_u32
define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp uge i32 %a, %b
; FUNC-LABEL: {{^}}i32_ult:
; R600: SETGT_UINT
-; SI: V_CMP_LT_U32
+; SI: v_cmp_lt_u32
define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ult i32 %a, %b
; FUNC-LABEL: {{^}}i32_ule:
; R600: SETGE_UINT
-; SI: V_CMP_LE_U32
+; SI: v_cmp_le_u32
define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ule i32 %a, %b
; FUNC-LABEL: {{^}}i32_sgt:
; R600: SETGT_INT
-; SI: V_CMP_GT_I32
+; SI: v_cmp_gt_i32
define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp sgt i32 %a, %b
; FUNC-LABEL: {{^}}i32_sge:
; R600: SETGE_INT
-; SI: V_CMP_GE_I32
+; SI: v_cmp_ge_i32
define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp sge i32 %a, %b
; FUNC-LABEL: {{^}}i32_slt:
; R600: SETGT_INT
-; SI: V_CMP_LT_I32
+; SI: v_cmp_lt_i32
define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp slt i32 %a, %b
; FUNC-LABEL: {{^}}i32_sle:
; R600: SETGE_INT
-; SI: V_CMP_LE_I32
+; SI: v_cmp_le_i32
define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp sle i32 %a, %b
;;;==========================================================================;;;
; FUNC-LABEL: {{^}}f64_oeq:
-; SI: V_CMP_EQ_F64
+; SI: v_cmp_eq_f64
define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp oeq double %a, %b
}
; FUNC-LABEL: {{^}}f64_ogt:
-; SI: V_CMP_GT_F64
+; SI: v_cmp_gt_f64
define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ogt double %a, %b
}
; FUNC-LABEL: {{^}}f64_oge:
-; SI: V_CMP_GE_F64
+; SI: v_cmp_ge_f64
define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp oge double %a, %b
}
; FUNC-LABEL: {{^}}f64_olt:
-; SI: V_CMP_LT_F64
+; SI: v_cmp_lt_f64
define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp olt double %a, %b
}
; FUNC-LABEL: {{^}}f64_ole:
-; SI: V_CMP_LE_F64
+; SI: v_cmp_le_f64
define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ole double %a, %b
}
; FUNC-LABEL: {{^}}f64_one:
-; SI: V_CMP_O_F64
-; SI: V_CMP_NEQ_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32
+; SI: v_cmp_o_f64
+; SI: v_cmp_neq_f64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_and_b32_e32
define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp one double %a, %b
}
; FUNC-LABEL: {{^}}f64_ord:
-; SI: V_CMP_O_F64
+; SI: v_cmp_o_f64
define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ord double %a, %b
}
; FUNC-LABEL: {{^}}f64_ueq:
-; SI: V_CMP_U_F64
-; SI: V_CMP_EQ_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f64
+; SI: v_cmp_eq_f64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ueq double %a, %b
}
; FUNC-LABEL: {{^}}f64_ugt:
-; SI: V_CMP_U_F64
-; SI: V_CMP_GT_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f64
+; SI: v_cmp_gt_f64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ugt double %a, %b
}
; FUNC-LABEL: {{^}}f64_uge:
-; SI: V_CMP_U_F64
-; SI: V_CMP_GE_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f64
+; SI: v_cmp_ge_f64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp uge double %a, %b
}
; FUNC-LABEL: {{^}}f64_ult:
-; SI: V_CMP_U_F64
-; SI: V_CMP_LT_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f64
+; SI: v_cmp_lt_f64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ult double %a, %b
}
; FUNC-LABEL: {{^}}f64_ule:
-; SI: V_CMP_U_F64
-; SI: V_CMP_LE_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_u_f64
+; SI: v_cmp_le_f64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_or_b32_e32
define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ule double %a, %b
}
; FUNC-LABEL: {{^}}f64_une:
-; SI: V_CMP_NEQ_F64
+; SI: v_cmp_neq_f64
define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp une double %a, %b
}
; FUNC-LABEL: {{^}}f64_uno:
-; SI: V_CMP_U_F64
+; SI: v_cmp_u_f64
define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp uno double %a, %b
;;;==========================================================================;;;
; FUNC-LABEL: {{^}}i64_eq:
-; SI: V_CMP_EQ_I64
+; SI: v_cmp_eq_i64
define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp eq i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_ne:
-; SI: V_CMP_NE_I64
+; SI: v_cmp_ne_i64
define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ne i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_ugt:
-; SI: V_CMP_GT_U64
+; SI: v_cmp_gt_u64
define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ugt i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_uge:
-; SI: V_CMP_GE_U64
+; SI: v_cmp_ge_u64
define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp uge i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_ult:
-; SI: V_CMP_LT_U64
+; SI: v_cmp_lt_u64
define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ult i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_ule:
-; SI: V_CMP_LE_U64
+; SI: v_cmp_le_u64
define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ule i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_sgt:
-; SI: V_CMP_GT_I64
+; SI: v_cmp_gt_i64
define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp sgt i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_sge:
-; SI: V_CMP_GE_I64
+; SI: v_cmp_ge_i64
define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp sge i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_slt:
-; SI: V_CMP_LT_I64
+; SI: v_cmp_lt_i64
define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp slt i64 %a, %b
}
; FUNC-LABEL: {{^}}i64_sle:
-; SI: V_CMP_LE_I64
+; SI: v_cmp_le_i64
define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp sle i64 %a, %b
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}main:
-; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
-; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
+; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp oeq float %p, %p
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}main:
-; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
-; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
+; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp une float %p, %p
; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
-; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
+; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
+; SI: buffer_store_dword [[EXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
}
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
}
; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
}
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
}
; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_mov_b32 {{s[0-9]+}}, -1
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_bfe_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx2
define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%c = add i64 %a, %b
%shl = shl i64 %c, 63
}
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_mov_b32 {{s[0-9]+}}, -1
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
}
; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_mov_b32 {{s[0-9]+}}, -1
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
}
; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: S_ADD_I32 [[ADD:s[0-9]+]],
-; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_add_i32 [[ADD:s[0-9]+]],
+; SI: s_ashr_i32 s{{[0-9]+}}, [[ADD]], 31
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
-; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
-; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
-; XSI: BUFFER_STORE_DWORD
+; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
+; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31
+; XSI: buffer_store_dword
; XEG: BFE_INT
; XEG: ASHR
; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
; }
; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
-; SI-NOT: BFE
-; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
+; SI-NOT: {{[^@]}}bfe
+; SI: s_lshl_b32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
+; SI: s_ashr_i32 {{s[0-9]+}}, [[REG]], 7
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
}
; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
-; SI-DAG: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
-; SI-DAG: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
-; SI-DAG: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
-; SI-DAG: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
-; SI: S_ENDPGM
+; SI-DAG: s_lshl_b32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
+; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG0]], 7
+; SI-DAG: s_lshl_b32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
+; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG1]], 7
+; SI: s_endpgm
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
}
; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX4
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx4
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
}
; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
}
; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX4
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx4
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
}
; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
-; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX2
+; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
}
; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
%loada = load <4 x i32> addrspace(1)* %a, align 16
%loadb = load <4 x i32> addrspace(1)* %b, align 16
}
; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
%loada = load <4 x i32> addrspace(1)* %a, align 16
%loadb = load <4 x i32> addrspace(1)* %b, align 16
; when computeKnownBitsForTargetNode is implemented for imax.
; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
-; SI: BUFFER_LOAD_SBYTE
-; SI: V_MAX_I32
-; SI: V_BFE_I32
-; SI: BUFFER_STORE_SHORT
+; SI: buffer_load_sbyte
+; SI: v_max_i32
+; SI: v_bfe_i32
+; SI: buffer_store_short
define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = sext i8 %tmp5 to i32
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfe_0_width:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_8_bfe_8:
-; SI: V_BFE_I32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: v_bfe_i32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}bfe_8_bfe_16:
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI: S_ENDPGM
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI: s_endpgm
define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
; This really should be folded into 1
; FUNC-LABEL: {{^}}bfe_16_bfe_8:
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
; Make sure there isn't a redundant BFE
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
-; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%c = add i32 %a, %b ; add to prevent folding into extload
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
}
; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
-; SI: BUFFER_LOAD_SBYTE
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI: buffer_load_sbyte
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
%load = load i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
; SI: .text
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
%load = load i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
}
; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
-; SI-NOT: SHR
-; SI-NOT: SHL
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; SI-NOT: shr
+; SI-NOT: shl
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
}
; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
-; SI: BUFFER_LOAD_DWORD
-; SI-NOT: SHL
-; SI-NOT: SHR
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI-NOT: shl
+; SI-NOT: shr
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
+; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
}
; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
-; SI: S_ENDPGM
+; SI: buffer_load_dword
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
+; SI: s_endpgm
define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
; threads will execute the same code paths, so we don't need to worry
; about instructions in different blocks overwriting each other.
; SI-LABEL: {{^}}sgpr_if_else_salu_br:
-; SI: S_ADD
-; SI: S_ADD
+; SI: s_add
+; SI: s_add
define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
; different threads will take different control flow paths.
; SI-LABEL: {{^}}sgpr_if_else_valu_br:
-; SI: S_ADD_I32 [[SGPR:s[0-9]+]]
-; SI-NOT: S_ADD_I32 [[SGPR]]
+; SI: s_add_i32 [[SGPR:s[0-9]+]]
+; SI-NOT: s_add_i32 [[SGPR]]
define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
; used in an REG_SEQUENCE that also needs to be handled.
; SI-LABEL: {{^}}test_dup_operands:
-; SI: V_ADD_I32_e32
+; SI: v_add_i32_e32
define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
%a = load <2 x i32> addrspace(1)* %in
%lo = extractelement <2 x i32> %a, i32 0
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
; CHECK-LABEL: {{^}}phi1:
-; CHECK: S_BUFFER_LOAD_DWORD [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0
-; CHECK: V_MOV_B32_e32 v{{[0-9]}}, [[DST]]
+; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
; an assertion failure.
; CHECK-LABEL: {{^}}sample_v3:
-; CHECK: IMAGE_SAMPLE
-; CHECK: IMAGE_SAMPLE
-; CHECK: EXP
-; CHECK: S_ENDPGM
+; CHECK: image_sample
+; CHECK: image_sample
+; CHECK: exp
+; CHECK: s_endpgm
define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
entry:
!2 = metadata !{metadata !"const", null, i32 1}
; CHECK-LABEL: {{^}}copy1:
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: V_ADD
-; CHECK: S_ENDPGM
+; CHECK: buffer_load_dword
+; CHECK: v_add
+; CHECK: s_endpgm
define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
entry:
%0 = load float addrspace(1)* %in0
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
-; CHECK: S_ENDPGM
+; CHECK: s_endpgm
define void @copy2([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
entry:
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}shl_v2i32:
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}shl_v4i32:
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
;SI-CHECK: {{^}}shl_i64:
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
;EG-CHECK-DAG: CNDE_INT
;SI-CHECK: {{^}}shl_v2i64:
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
;EG-CHECK-DAG: CNDE_INT
;SI-CHECK: {{^}}shl_v4i64:
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
; Test with inline immediate
; FUNC-LABEL: {{^}}shl_2_add_9_i32:
-; SI: V_LSHLREV_B32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: V_ADD_I32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
}
; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
-; SI-DAG: V_ADD_I32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
-; SI-DAG: V_LSHLREV_B32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI-DAG: BUFFER_STORE_DWORD [[ADDREG]]
-; SI-DAG: BUFFER_STORE_DWORD [[SHLREG]]
-; SI: S_ENDPGM
+; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-DAG: buffer_store_dword [[ADDREG]]
+; SI-DAG: buffer_store_dword [[SHLREG]]
+; SI: s_endpgm
define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
; Test with add literal constant
; FUNC-LABEL: {{^}}shl_2_add_999_i32:
-; SI: V_LSHLREV_B32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: V_ADD_I32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
}
; FUNC-LABEL: {{^}}test_add_shl_add_constant:
-; SI-DAG: S_LOAD_DWORD [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: S_LSHL_B32 [[SHL3:s[0-9]+]], [[X]], 3
-; SI: S_ADD_I32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
-; SI: S_ADD_I32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]]
+; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: buffer_store_dword [[VRESULT]]
define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
}
; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
-; SI-DAG: S_LOAD_DWORD [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: S_LSHL_B32 [[SHL3:s[0-9]+]], [[X]], 3
-; SI: S_ADD_I32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
-; SI: S_ADD_I32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]]
+; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: buffer_store_dword [[VRESULT]]
define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
%add.0 = add i32 %x, 123
; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
; SI-LABEL: {{^}}load_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_READ_B32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
+; SI: s_endpgm
define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
; remaining add use goes through the normal shl + add constant fold.
; SI-LABEL: {{^}}load_shl_base_lds_1:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_READ_B32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
-; SI: V_ADD_I32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
-; SI-DAG: BUFFER_STORE_DWORD [[RESULT]]
-; SI-DAG: BUFFER_STORE_DWORD [[ADDUSE]]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
+; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
+; SI-DAG: buffer_store_dword [[RESULT]]
+; SI-DAG: buffer_store_dword [[ADDUSE]]
+; SI: s_endpgm
define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
@maxlds = addrspace(3) global [65536 x i8] zeroinitializer, align 4
; SI-LABEL: {{^}}load_shl_base_lds_max_offset
-; SI: DS_READ_U8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
-; SI: S_ENDPGM
+; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
+; SI: s_endpgm
define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 65535
; pointer can be used with an offset into the second one.
; SI-LABEL: {{^}}load_shl_base_lds_2:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI-NEXT: DS_READ2ST64_B32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
+; SI: s_endpgm
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 64
}
; SI-LABEL: {{^}}store_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_WRITE_B32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
+; SI: s_endpgm
define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_CMPST_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_WRXCHG_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_ADD_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_SUB_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_AND_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_OR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_XOR_RTN_B32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
; }
; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_MIN_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_MAX_RTN_I32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_MIN_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
}
; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
-; SI: V_LSHLREV_B32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: DS_MAX_RTN_U32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: S_ENDPGM
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
; the wrong register class is used for the REG_SEQUENCE instructions.
; CHECK: {{^}}main:
-; CHECK: IMAGE_SAMPLE_B v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
+; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
; CHECK-LABEL: {{^}}main:
; Writing to M0 from an SMRD instruction will hang the GPU.
-; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
-; CHECK: S_ENDPGM
+; CHECK-NOT: s_buffer_load_dword m0
+; CHECK: s_endpgm
@ddxy_lds = external addrspace(3) global [64 x i32]
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
!0 = metadata !{metadata !"const", null, i32 1}
; CHECK-LABEL: {{^}}main1:
-; CHECK: S_ENDPGM
+; CHECK: s_endpgm
define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}test_8_min_char:
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
; ModuleID = 'radeon'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
target triple = "r600--"
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}s_sext_i1_to_i32:
-; SI: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i32
}
; SI-LABEL: {{^}}test_s_sext_i32_to_i64:
-; SI: S_ASHR_I32
-; SI: S_ENDPG
+; SI: s_ashr_i32
+; SI: s_endpg
define void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
entry:
%mul = mul i32 %a, %b
}
; SI-LABEL: {{^}}s_sext_i1_to_i64:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i64
}
; SI-LABEL: {{^}}s_sext_i32_to_i64:
-; SI: S_ASHR_I32
-; SI: S_ENDPGM
+; SI: s_ashr_i32
+; SI: s_endpgm
define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
%sext = sext i32 %a to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}v_sext_i32_to_i64:
-; SI: V_ASHR
-; SI: S_ENDPGM
+; SI: v_ashr
+; SI: s_endpgm
define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%sext = sext i32 %val to i64
}
; SI-LABEL: {{^}}s_sext_i16_to_i64:
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
%sext = sext i16 %a to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
; FIXME: Fix truncating store for local memory
; SI-LABEL: {{^}}trunc_load_alloca_i64:
-; SI: V_MOVRELS_B32
-; SI-NOT: V_MOVRELS_B32
-; SI: S_ENDPGM
+; SI: v_movrels_b32
+; SI-NOT: v_movrels_b32
+; SI: s_endpgm
define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
%idx = add i32 %a, %b
%alloca = alloca i64, i32 4
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
-; SI: V_CVT_F64_I32_e32
+; SI: v_cvt_f64_i32_e32
define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
%result = sitofp i32 %in to double
store double %result, double addrspace(1)* %out
}
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
; we should be able to fold the SGPRs into the V_CNDMASK instructions.
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = sitofp i1 %cmp to double
}
; SI-LABEL: {{^}}sint_to_fp_i1_f64_load:
-; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, -1
-; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, -1
+; SI-NEXT: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
%fp = sitofp i1 %in to double
store double %fp, double addrspace(1)* %out, align 8
}
; SI-LABEL: @v_sint_to_fp_i64_to_f64
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI-DAG: V_CVT_F64_U32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
-; SI-DAG: V_CVT_F64_I32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: V_LDEXP_F64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
+; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr i64 addrspace(1)* %in, i32 %tid
; FUNC-LABEL: {{^}}s_sint_to_fp_i32_to_f32:
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI: V_CVT_F32_I32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
+; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
%result = sitofp i32 %in to float
store float %result, float addrspace(1)* %out
; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI: V_CVT_F32_I32_e32
-; SI: V_CVT_F32_I32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
%result = sitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_CVT_F32_I32_e32
-; SI: V_CVT_F32_I32_e32
-; SI: V_CVT_F32_I32_e32
-; SI: V_CVT_F32_I32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%value = load <4 x i32> addrspace(1) * %in
%result = sitofp <4 x i32> %value to <4 x float>
}
; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
}
; FUNC-LABEL: {{^}}sint_to_fp_i1_f32_load:
-; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.0
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
%fp = sitofp i1 %in to float
store float %fp, float addrspace(1)* %out, align 4
; SMRD load with an immediate offset.
; CHECK-LABEL: {{^}}smrd0:
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 1
; SMRD load with the largest possible immediate offset.
; CHECK-LABEL: {{^}}smrd1:
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 255
; SMRD load with an offset greater than the largest possible immediate.
; CHECK-LABEL: {{^}}smrd2:
-; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
-; CHECK: S_ENDPGM
+; CHECK: s_mov_b32 s[[OFFSET:[0-9]]], 0x400
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 256
; SMRD load with a 64-bit offset
; CHECK-LABEL: {{^}}smrd3:
-; CHECK-DAG: S_MOV_B32 s[[SHI:[0-9]+]], 4
-; CHECK-DAG: S_MOV_B32 s[[SLO:[0-9]+]], 0 ;
+; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4
+; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
; FIXME: We don't need to copy these values to VGPRs
-; CHECK-DAG: V_MOV_B32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; CHECK-DAG: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; FIXME: We should be able to use S_LOAD_DWORD here
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: S_ENDPGM
+; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; FIXME: We should be able to use s_load_dword here
+; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
; SMRD load using the load.const intrinsic with an immediate offset
; CHECK-LABEL: {{^}}smrd_load_const0:
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
; SMRD load using the load.const intrinsic with the largest possible immediate
; offset.
; CHECK-LABEL: {{^}}smrd_load_const1:
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
; largets possible immediate.
; immediate offset.
; CHECK-LABEL: {{^}}smrd_load_const2:
-; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK: s_mov_b32 s[[OFFSET:[0-9]]], 0x400
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK-LABEL: {{^}}ashr_v2i32:
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK-LABEL: {{^}}ashr_v4i32:
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
;EG-CHECK: ASHR
;SI-CHECK-LABEL: {{^}}ashr_i64:
-;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
+;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry:
%0 = sext i32 %in to i64
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
;SI-CHECK-LABEL: {{^}}ashr_i64_2:
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
;EG-CHECK-DAG: CNDE_INT
;SI-CHECK-LABEL: {{^}}ashr_v2i64:
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
;EG-CHECK-DAG: CNDE_INT
;SI-CHECK-LABEL: {{^}}ashr_v4i64:
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}lshr_v2i32:
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}lshr_v4i32:
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
;SI-CHECK: {{^}}lshr_i64:
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
;EG-CHECK-DAG: CNDE_INT
;SI-CHECK: {{^}}lshr_v2i64:
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
;EG-CHECK-DAG: CNDE_INT
;SI-CHECK: {{^}}lshr_v4i64:
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
}
; FUNC-LABEL: {{^}}s_ssubo_i64:
-; SI: S_SUB_U32
-; SI: S_SUBB_U32
+; SI: s_sub_u32
+; SI: s_subb_u32
define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
}
; FUNC-LABEL: {{^}}v_ssubo_i64:
-; SI: V_SUB_I32_e32
-; SI: V_SUBB_U32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
; should be done in a single store.
; SI-LABEL: {{^}}store_v3i32:
-; SI: BUFFER_STORE_DWORDX4
+; SI: buffer_store_dwordx4
define void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
ret void
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
; SI-LABEL: {{^}}global_store_v3i64:
-; SI: BUFFER_STORE_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
define void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32
ret void
;===------------------------------------------------------------------------===;
; FUNC-LABEL: {{^}}store_i1:
; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
define void @store_i1(i1 addrspace(1)* %out) {
entry:
store i1 true, i1 addrspace(1)* %out
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
; SI-CHECK-LABEL: {{^}}store_i8:
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
entry:
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
; SI-CHECK-LABEL: {{^}}store_i16:
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: buffer_store_short
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
store i16 %in, i16 addrspace(1)* %out
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK-NOT: MEM_RAT MSKOR
; SI-CHECK-LABEL: {{^}}store_v2i8:
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
; CM-CHECK-LABEL: {{^}}store_v2i16:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; SI-CHECK-LABEL: {{^}}store_v2i16:
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
; CM-CHECK-LABEL: {{^}}store_v4i8:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; SI-CHECK-LABEL: {{^}}store_v4i8:
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
; CM-CHECK-LABEL: {{^}}store_f32:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
; SI-CHECK-LABEL: {{^}}store_f32:
-; SI-CHECK: BUFFER_STORE_DWORD
+; SI-CHECK: buffer_store_dword
define void @store_f32(float addrspace(1)* %out, float %in) {
store float %in, float addrspace(1)* %out
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK-NOT: MEM_RAT MSKOR
; SI-CHECK-LABEL: {{^}}store_v4i16:
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK-NOT: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK-NOT: buffer_store_byte
define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
; CM-CHECK-LABEL: {{^}}store_v2f32:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; SI-CHECK-LABEL: {{^}}store_v2f32:
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK: buffer_store_dwordx2
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
entry:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SI-CHECK-LABEL: {{^}}store_v4i32:
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK: buffer_store_dwordx4
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
; FUNC-LABEL: {{^}}store_i64_i8:
; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
; FUNC-LABEL: {{^}}store_i64_i16:
; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: buffer_store_short
define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
; FUNC-LABEL: {{^}}store_local_i1:
; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
define void @store_local_i1(i1 addrspace(3)* %out) {
entry:
store i1 true, i1 addrspace(3)* %out
; EG-CHECK-LABEL: {{^}}store_local_i8:
; EG-CHECK: LDS_BYTE_WRITE
; SI-CHECK-LABEL: {{^}}store_local_i8:
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
store i8 %in, i8 addrspace(3)* %out
ret void
; EG-CHECK-LABEL: {{^}}store_local_i16:
; EG-CHECK: LDS_SHORT_WRITE
; SI-CHECK-LABEL: {{^}}store_local_i16:
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: ds_write_b16
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
store i16 %in, i16 addrspace(3)* %out
ret void
; CM-CHECK-LABEL: {{^}}store_local_v2i16:
; CM-CHECK: LDS_WRITE
; SI-CHECK-LABEL: {{^}}store_local_v2i16:
-; SI-CHECK: DS_WRITE_B16
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: ds_write_b16
+; SI-CHECK: ds_write_b16
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(3)* %out
; CM-CHECK-LABEL: {{^}}store_local_v4i8:
; CM-CHECK: LDS_WRITE
; SI-CHECK-LABEL: {{^}}store_local_v4i8:
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(3)* %out
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
; SI-CHECK-LABEL: {{^}}store_local_v2i32:
-; SI-CHECK: DS_WRITE_B64
+; SI-CHECK: ds_write_b64
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
entry:
store <2 x i32> %in, <2 x i32> addrspace(3)* %out
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
; SI-CHECK-LABEL: {{^}}store_local_v4i32:
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out
; FUNC-LABEL: {{^}}store_local_i64_i8:
; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
; FUNC-LABEL: {{^}}store_local_i64_i16:
; EG-CHECK: LDS_SHORT_WRITE
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: ds_write_b16
define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
; CM-CHECK-LABEL: {{^}}vecload2:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; SI-CHECK-LABEL: {{^}}vecload2:
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK: buffer_store_dwordx2
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
%0 = load i32 addrspace(2)* %mem, align 4
; CM-CHECK: STORE_DWORD
; CM-CHECK: STORE_DWORD
; CM-CHECK: STORE_DWORD
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
define void @i128-const-store(i32 addrspace(1)* %out) {
entry:
store i32 1, i32 addrspace(1)* %out, align 4
;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
}
; FUNC-LABEL: {{^}}s_sub_i64:
-; SI: S_SUB_U32
-; SI: S_SUBB_U32
+; SI: s_sub_u32
+; SI: s_subb_u32
; EG-DAG: SETGE_UINT
; EG-DAG: CNDE_INT
}
; FUNC-LABEL: {{^}}v_sub_i64:
-; SI: V_SUB_I32_e32
-; SI: V_SUBB_U32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
; EG-DAG: SETGE_UINT
; EG-DAG: CNDE_INT
; SI-LABEL: {{^}}global_truncstore_i32_to_i1:
-; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
-; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: BUFFER_STORE_BYTE [[VREG]],
+; SI: s_load_dword [[LOAD:s[0-9]+]],
+; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
+; SI: buffer_store_byte [[VREG]],
define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind {
%trunc = trunc i32 %val to i1
store i1 %trunc, i1 addrspace(1)* %out, align 1
}
; SI-LABEL: {{^}}global_truncstore_i64_to_i1:
-; SI: BUFFER_STORE_BYTE
+; SI: buffer_store_byte
define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind {
%trunc = trunc i64 %val to i1
store i1 %trunc, i1 addrspace(1)* %out, align 1
}
; SI-LABEL: {{^}}global_truncstore_i16_to_i1:
-; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
-; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: BUFFER_STORE_BYTE [[VREG]],
+; SI: s_load_dword [[LOAD:s[0-9]+]],
+; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
+; SI: buffer_store_byte [[VREG]],
define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
%trunc = trunc i16 %val to i1
store i1 %trunc, i1 addrspace(1)* %out, align 1
define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
; SI-LABEL: {{^}}trunc_i64_to_i32_store:
-; SI: S_LOAD_DWORD [[SLOAD:s[0-9]+]], s[0:1], 0xb
-; SI: V_MOV_B32_e32 [[VLOAD:v[0-9]+]], [[SLOAD]]
-; SI: BUFFER_STORE_DWORD [[VLOAD]]
+; SI: s_load_dword [[SLOAD:s[0-9]+]], s[0:1], 0xb
+; SI: v_mov_b32_e32 [[VLOAD:v[0-9]+]], [[SLOAD]]
+; SI: buffer_store_dword [[VLOAD]]
; EG-LABEL: {{^}}trunc_i64_to_i32_store:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
}
; SI-LABEL: {{^}}trunc_load_shl_i64:
-; SI-DAG: S_LOAD_DWORDX2
-; SI-DAG: S_LOAD_DWORD [[SREG:s[0-9]+]],
-; SI: S_LSHL_B32 [[SHL:s[0-9]+]], [[SREG]], 2
-; SI: V_MOV_B32_e32 [[VSHL:v[0-9]+]], [[SHL]]
-; SI: BUFFER_STORE_DWORD [[VSHL]],
+; SI-DAG: s_load_dwordx2
+; SI-DAG: s_load_dword [[SREG:s[0-9]+]],
+; SI: s_lshl_b32 [[SHL:s[0-9]+]], [[SREG]], 2
+; SI: v_mov_b32_e32 [[VSHL:v[0-9]+]], [[SHL]]
+; SI: buffer_store_dword [[VSHL]],
define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
%b = shl i64 %a, 2
%result = trunc i64 %b to i32
}
; SI-LABEL: {{^}}trunc_shl_i64:
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: S_LSHL_B64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
-; SI: S_ADD_U32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
-; SI: S_ADDC_U32
-; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
-; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
+; SI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
+; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
+; SI: s_addc_u32
+; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
+; SI: buffer_store_dword v[[LO_VREG]],
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
%aa = add i64 %a, 234 ; Prevent shrinking store.
%b = shl i64 %aa, 2
}
; SI-LABEL: {{^}}trunc_i32_to_i1:
-; SI: V_AND_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: V_CMP_EQ_I32
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: v_cmp_eq_i32
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
%a = load i32 addrspace(1)* %ptr, align 4
%trunc = trunc i32 %a to i1
}
; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
-; SI: V_AND_B32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
-; SI: V_CMP_EQ_I32
+; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: v_cmp_eq_i32
define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
%trunc = trunc i32 %a to i1
%result = select i1 %trunc, i32 1, i32 0
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
; FUNC-LABEL: {{^}}uaddo_i64_zext:
-; SI: ADD
-; SI: ADDC
-; SI: ADDC
+; SI: add
+; SI: addc
+; SI: addc
define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
}
; FUNC-LABEL: {{^}}s_uaddo_i32:
-; SI: S_ADD_I32
+; SI: s_add_i32
define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %uadd, 0
}
; FUNC-LABEL: {{^}}v_uaddo_i32:
-; SI: V_ADD_I32
+; SI: v_add_i32
define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
}
; FUNC-LABEL: {{^}}s_uaddo_i64:
-; SI: S_ADD_U32
-; SI: S_ADDC_U32
+; SI: s_add_u32
+; SI: s_addc_u32
define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
}
; FUNC-LABEL: {{^}}v_uaddo_i64:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
;EG-CHECK-LABEL: {{^}}test2:
;EG-CHECK: CF_END
;SI-CHECK-LABEL: {{^}}test2:
-;SI-CHECK: S_ENDPGM
+;SI-CHECK: s_endpgm
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG-CHECK-LABEL: {{^}}test4:
;EG-CHECK: CF_END
;SI-CHECK-LABEL: {{^}}test4:
-;SI-CHECK: S_ENDPGM
+;SI-CHECK: s_endpgm
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: V_RCP_IFLAG_F32_e32 [[RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[RCP_HI:v[0-9]+]], [[RCP]]
-; SI-DAG: V_MUL_LO_I32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: V_SUB_I32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
-; SI: V_CNDMASK_B32_e64
-; SI: V_MUL_HI_U32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: V_ADD_I32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
-; SI: V_CNDMASK_B32_e64
-; SI: V_MUL_HI_U32 [[Quotient:v[0-9]+]]
-; SI: V_MUL_LO_I32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
+; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
+; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI: v_cndmask_b32_e64
+; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
+; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI: v_cndmask_b32_e64
+; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
+; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
%result0 = udiv i32 %x, %y
store i32 %result0, i32 addrspace(1)* %out
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
%result0 = udiv <2 x i32> %x, %y
store <2 x i32> %result0, <2 x i32> addrspace(1)* %out
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[THIRD_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[THIRD_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[THIRD_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FOURTH_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FOURTH_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
+; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FOURTH_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FOURTH_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
%result0 = udiv <4 x i32> %x, %y
store <4 x i32> %result0, <4 x i32> addrspace(1)* %out
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}udiv24_i8:
-; SI: V_CVT_F32_UBYTE
-; SI: V_CVT_F32_UBYTE
-; SI: V_RCP_F32
-; SI: V_CVT_U32_F32
+; SI: v_cvt_f32_ubyte
+; SI: v_cvt_f32_ubyte
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
; EG-DAG: UINT_TO_FLT
}
; FUNC-LABEL: {{^}}udiv24_i16:
-; SI: V_CVT_F32_U32
-; SI: V_CVT_F32_U32
-; SI: V_RCP_F32
-; SI: V_CVT_U32_F32
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
; EG-DAG: UINT_TO_FLT
}
; FUNC-LABEL: {{^}}udiv24_i32:
-; SI: V_CVT_F32_U32
-; SI-DAG: V_CVT_F32_U32
-; SI-DAG: V_RCP_F32
-; SI: V_CVT_U32_F32
+; SI: v_cvt_f32_u32
+; SI-DAG: v_cvt_f32_u32
+; SI-DAG: v_rcp_f32
+; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
; EG-DAG: UINT_TO_FLT
; FUNC-LABEL: {{^}}udiv25_i32:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: V_RCP_IFLAG
-; SI-NOT: V_RCP_F32
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: V_RCP_IFLAG
-; SI-NOT: V_RCP_F32
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: V_RCP_IFLAG
-; SI-NOT: V_RCP_F32
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
}
; FUNC-LABEL: {{^}}urem24_i8:
-; SI: V_CVT_F32_UBYTE
-; SI: V_CVT_F32_UBYTE
-; SI: V_RCP_F32
-; SI: V_CVT_U32_F32
+; SI: v_cvt_f32_ubyte
+; SI: v_cvt_f32_ubyte
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
; EG-DAG: UINT_TO_FLT
}
; FUNC-LABEL: {{^}}urem24_i16:
-; SI: V_CVT_F32_U32
-; SI: V_CVT_F32_U32
-; SI: V_RCP_F32
-; SI: V_CVT_U32_F32
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
; EG-DAG: UINT_TO_FLT
}
; FUNC-LABEL: {{^}}urem24_i32:
-; SI: V_CVT_F32_U32
-; SI: V_CVT_F32_U32
-; SI: V_RCP_F32
-; SI: V_CVT_U32_F32
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
; EG: UINT_TO_FLT
; EG-DAG: UINT_TO_FLT
; FUNC-LABEL: {{^}}urem25_i32:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: V_RCP_IFLAG
-; SI-NOT: V_RCP_F32
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: V_RCP_IFLAG
-; SI-NOT: V_RCP_F32
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: V_RCP_IFLAG
-; SI-NOT: V_RCP_F32
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
-;SI: S_ENDPGM
+;SI: s_endpgm
define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
-;SI: S_ENDPGM
+;SI: s_endpgm
define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-LABEL: {{^}}uint_to_fp_f64_i32
-; SI: V_CVT_F64_U32_e32
-; SI: S_ENDPGM
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) {
%cast = uitofp i32 %in to double
store double %cast, double addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}uint_to_fp_i1_f64:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
; we should be able to fold the SGPRs into the V_CNDMASK instructions.
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to double
}
; SI-LABEL: {{^}}uint_to_fp_i1_f64_load:
-; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, 1
-; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, 1
+; SI-NEXT: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
%fp = uitofp i1 %in to double
store double %fp, double addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI-DAG: V_CVT_F64_U32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
-; SI-DAG: V_CVT_F64_U32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: V_LDEXP_F64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
+; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr i64 addrspace(1)* %in, i32 %tid
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
%result = uitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%value = load <4 x i32> addrspace(1) * %in
%result = uitofp <4 x i32> %value to <4 x float>
; R600: UINT_TO_FLT
; R600: UINT_TO_FLT
; R600: MULADD_IEEE
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_MAD_F32
-; SI: S_ENDPGM
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_mad_f32
+; SI: s_endpgm
define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
entry:
%0 = uitofp i64 %in to float
}
; FUNC-LABEL: {{^}}uint_to_fp_i1_f32:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
}
; FUNC-LABEL: {{^}}uint_to_fp_i1_f32_load:
-; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.0
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
%fp = uitofp i1 %in to float
store float %fp, float addrspace(1)* %out, align 4
; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes.
; SI-LABEL: {{^}}unaligned_load_store_i32:
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_WRITE_B32
-; SI: S_ENDPGM
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_write_b32
+; SI: s_endpgm
define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
%v = load i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
}
; SI-LABEL: {{^}}unaligned_load_store_v4i32:
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_READ_U16
-; SI: DS_WRITE_B32
-; SI: DS_WRITE_B32
-; SI: DS_WRITE_B32
-; SI: DS_WRITE_B32
-; SI: S_ENDPGM
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: s_endpgm
define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
%v = load <4 x i32> addrspace(3)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
}
; SI-LABEL: {{^}}load_lds_i64_align_4:
-; SI: DS_READ2_B32
-; SI: S_ENDPGM
+; SI: ds_read2_b32
+; SI: s_endpgm
define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%val = load i64 addrspace(3)* %in, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
}
; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
-; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
+; SI: s_endpgm
define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = getelementptr i64 addrspace(3)* %in, i32 4
%val = load i64 addrspace(3)* %ptr, align 4
; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
-; SI: S_ENDPGM
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
+; SI: s_endpgm
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
%ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
; }
; SI-LABEL: {{^}}store_lds_i64_align_4:
-; SI: DS_WRITE2_B32
-; SI: S_ENDPGM
+; SI: ds_write2_b32
+; SI: s_endpgm
define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
store i64 %val, i64 addrspace(3)* %out, align 4
ret void
}
; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
-; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
-; SI: S_ENDPGM
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
+; SI: s_endpgm
define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
%ptr = getelementptr i64 addrspace(3)* %out, i32 4
store i64 0, i64 addrspace(3)* %ptr, align 4
; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: DS_WRITE2_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI: S_ENDPGM
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_endpgm
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
%ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
; COMMON-LABEL: {{^}}branch_false:
; SI: .text
-; SI-NEXT: S_ENDPGM
+; SI-NEXT: s_endpgm
define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 false, label %for.end, label %for.body.lr.ph
; COMMON-LABEL: {{^}}branch_undef:
; SI: .text
-; SI-NEXT: S_ENDPGM
+; SI-NEXT: s_endpgm
define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 undef, label %for.end, label %for.body.lr.ph
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_RCP_IFLAG_F32_e32
+;CHECK: v_rcp_iflag_f32_e32
define void @test(i32 %p, i32 %q) {
%i = udiv i32 %p, %q
;EG-CHECK: {{^}}test2:
;EG-CHECK: CF_END
;SI-CHECK: {{^}}test2:
-;SI-CHECK: S_ENDPGM
+;SI-CHECK: s_endpgm
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
;EG-CHECK: {{^}}test4:
;EG-CHECK: CF_END
;SI-CHECK: {{^}}test4:
-;SI-CHECK: S_ENDPGM
+;SI-CHECK: s_endpgm
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
-; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
%dbl = fadd float %a, %a
store float %dbl, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
-; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]],
-; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR:s[0-9]+]],
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
-; SI: S_LOAD_DWORD [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_LOAD_DWORD [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
store float %fma, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
-; SI: S_LOAD_DWORD [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_LOAD_DWORD [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
-; SI: S_LOAD_DWORD [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_LOAD_DWORD [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
-; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]]
-; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
store float %fma, float addrspace(1)* %out, align 4
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
-; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]]
-; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
-; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]]
-; SI: V_MAD_I32_I24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
%fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
store i32 %fma, i32 addrspace(1)* %out, align 4
}
; FUNC-LABEL: {{^}}s_usubo_i32:
-; SI: S_SUB_I32
+; SI: s_sub_i32
define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %usub, 0
}
; FUNC-LABEL: {{^}}v_usubo_i32:
-; SI: V_SUBREV_I32_e32
+; SI: v_subrev_i32_e32
define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
}
; FUNC-LABEL: {{^}}s_usubo_i64:
-; SI: S_SUB_U32
-; SI: S_SUBB_U32
+; SI: s_sub_u32
+; SI: s_subb_u32
define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
}
; FUNC-LABEL: {{^}}v_usubo_i64:
-; SI: V_SUB_I32
-; SI: V_SUBB_U32
+; SI: v_sub_i32
+; SI: v_subb_u32
define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
declare i32 @llvm.r600.read.tidig.x() #1
; SI-LABEL: {{^}}v_cnd_nan_nosgpr:
-; SI: V_CNDMASK_B32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
+; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
; SI-DAG: v{{[0-9]}}
; All nan values are converted to 0xffffffff
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
%idx = call i32 @llvm.r600.read.tidig.x() #1
%f.gep = getelementptr float addrspace(1)* %fptr, i32 %idx
; never be moved.
; SI-LABEL: {{^}}v_cnd_nan:
-; SI: V_CNDMASK_B32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
+; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
; SI-DAG: v{{[0-9]}}
; All nan values are converted to 0xffffffff
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
%setcc = icmp ne i32 %c, 0
%select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
; Make sure the i1 values created by the cfg structurizer pass are
; moved using VALU instructions
-; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
-; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
+; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
+; SI: v_mov_b32_e32 v{{[0-9]}}, -1
define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
entry:
switch i32 %a, label %default [
; Test that we correctly commute a sub instruction
; FUNC-LABEL: {{^}}sub_rev:
-; SI-NOT: V_SUB_I32_e32 v{{[0-9]+}}, s
-; SI: V_SUBREV_I32_e32 v{{[0-9]+}}, s
+; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
+; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
; ModuleID = 'vop-shrink.ll'
; 32-bit op when we shrink it.
; FUNC-LABEL: {{^}}add_fold:
-; SI: V_ADD_F32_e32 v{{[0-9]+}}, 0x44800000
+; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000
define void @add_fold(float addrspace(1)* %out) {
entry:
%tmp = call i32 @llvm.r600.read.tidig.x()
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}test_select_v2i32:
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
entry:
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}test_select_v2f32:
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
entry:
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}test_select_v4i32:
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
entry:
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
; CHECK-LABEL: {{^}}main:
-; CHECK: S_LOAD_DWORDX4
-; CHECK: S_LOAD_DWORDX4
-; CHECK: S_WAITCNT lgkmcnt(0){{$}}
-; CHECK: S_WAITCNT vmcnt(0){{$}}
-; CHECK: S_WAITCNT expcnt(0) lgkmcnt(0){{$}}
+; CHECK: s_load_dwordx4
+; CHECK: s_load_dwordx4
+; CHECK: s_waitcnt lgkmcnt(0){{$}}
+; CHECK: s_waitcnt vmcnt(0){{$}}
+; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}}
define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
%tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].X
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].Y
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].Z
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].W
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].X
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].Y
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].Z
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @local_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.x() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[1].W
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @local_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.y() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].X
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @local_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.z() #0
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[2].Z
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0xb
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @get_work_dim (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.AMDGPU.read.workdim() #0
; kernel arguments, but this may change in the future.
; FUNC-LABEL: {{^}}tgid_x:
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
+; SI: buffer_store_dword [[VVAL]]
define void @tgid_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
}
; FUNC-LABEL: {{^}}tgid_y:
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s5
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
+; SI: buffer_store_dword [[VVAL]]
define void @tgid_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
}
; FUNC-LABEL: {{^}}tgid_z:
-; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s6
-; SI: BUFFER_STORE_DWORD [[VVAL]]
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
+; SI: buffer_store_dword [[VVAL]]
define void @tgid_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
}
; FUNC-LABEL: {{^}}tidig_x:
-; SI: BUFFER_STORE_DWORD v0
+; SI: buffer_store_dword v0
define void @tidig_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
}
; FUNC-LABEL: {{^}}tidig_y:
-; SI: BUFFER_STORE_DWORD v1
+; SI: buffer_store_dword v1
define void @tidig_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
}
; FUNC-LABEL: {{^}}tidig_z:
-; SI: BUFFER_STORE_DWORD v2
+; SI: buffer_store_dword v2
define void @tidig_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}xor_v2i32:
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: {{^}}xor_v4i32:
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
%a = load <4 x i32> addrspace(1) * %in0
;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
;SI-CHECK: {{^}}xor_i1:
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+;SI-CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float addrspace(1) * %in0
}
; SI-CHECK-LABEL: {{^}}vector_xor_i32:
-; SI-CHECK: V_XOR_B32_e32
+; SI-CHECK: v_xor_b32_e32
define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%a = load i32 addrspace(1)* %in0
%b = load i32 addrspace(1)* %in1
}
; SI-CHECK-LABEL: {{^}}scalar_xor_i32:
-; SI-CHECK: S_XOR_B32
+; SI-CHECK: s_xor_b32
define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%result = xor i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
}
; SI-CHECK-LABEL: {{^}}scalar_not_i32:
-; SI-CHECK: S_NOT_B32
+; SI-CHECK: s_not_b32
define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
%result = xor i32 %a, -1
store i32 %result, i32 addrspace(1)* %out
}
; SI-CHECK-LABEL: {{^}}vector_not_i32:
-; SI-CHECK: V_NOT_B32
+; SI-CHECK: v_not_b32
define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%a = load i32 addrspace(1)* %in0
%b = load i32 addrspace(1)* %in1
}
; SI-CHECK-LABEL: {{^}}vector_xor_i64:
-; SI-CHECK: V_XOR_B32_e32
-; SI-CHECK: V_XOR_B32_e32
-; SI-CHECK: S_ENDPGM
+; SI-CHECK: v_xor_b32_e32
+; SI-CHECK: v_xor_b32_e32
+; SI-CHECK: s_endpgm
define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
%a = load i64 addrspace(1)* %in0
%b = load i64 addrspace(1)* %in1
}
; SI-CHECK-LABEL: {{^}}scalar_xor_i64:
-; SI-CHECK: S_XOR_B64
-; SI-CHECK: S_ENDPGM
+; SI-CHECK: s_xor_b64
+; SI-CHECK: s_endpgm
define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%result = xor i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
}
; SI-CHECK-LABEL: {{^}}scalar_not_i64:
-; SI-CHECK: S_NOT_B64
+; SI-CHECK: s_not_b64
define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
%result = xor i64 %a, -1
store i64 %result, i64 addrspace(1)* %out
}
; SI-CHECK-LABEL: {{^}}vector_not_i64:
-; SI-CHECK: V_NOT_B32
-; SI-CHECK: V_NOT_B32
+; SI-CHECK: v_not_b32
+; SI-CHECK: v_not_b32
define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
%a = load i64 addrspace(1)* %in0
%b = load i64 addrspace(1)* %in1
; use an SALU instruction for this.
; SI-CHECK-LABEL: {{^}}xor_cf:
-; SI-CHECK: S_XOR_B64
+; SI-CHECK: s_xor_b64
define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) {
entry:
%0 = icmp eq i64 %a, 0
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
; SI-CHECK: {{^}}test:
-; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0{{$}}
-; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
-; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
+; SI-CHECK: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}}
+; SI-CHECK: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI-CHECK: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = mul i32 %a, %b
}
; SI-CHECK-LABEL: {{^}}testi1toi32:
-; SI-CHECK: V_CNDMASK_B32
+; SI-CHECK: v_cndmask_b32
define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp eq i32 %a, %b
}
; SI-CHECK-LABEL: {{^}}zext_i1_to_i64:
-; SI-CHECK: V_CMP_EQ_I32
-; SI-CHECK: V_CNDMASK_B32
-; SI-CHECK: S_MOV_B32 s{{[0-9]+}}, 0
+; SI-CHECK: v_cmp_eq_i32
+; SI-CHECK: v_cndmask_b32
+; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0
define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%ext = zext i1 %cmp to i64