0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
+ 0x00000030, "BUFFER_ATOMIC_SWAP", VReg_32, i32, atomic_swap_global
+>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
0x00000033, "BUFFER_ATOMIC_SUB", VReg_32, i32, atomic_sub_global
>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
-//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
-//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
+ 0x00000035, "BUFFER_ATOMIC_SMIN", VReg_32, i32, atomic_min_global
+>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
+ 0x00000036, "BUFFER_ATOMIC_UMIN", VReg_32, i32, atomic_umin_global
+>;
defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
0x00000037, "BUFFER_ATOMIC_SMAX", VReg_32, i32, atomic_max_global
>;
defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
0x00000039, "BUFFER_ATOMIC_AND", VReg_32, i32, atomic_and_global
>;
-//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
-//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
+defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
+ 0x0000003a, "BUFFER_ATOMIC_OR", VReg_32, i32, atomic_or_global
+>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
+ 0x0000003b, "BUFFER_ATOMIC_XOR", VReg_32, i32, atomic_xor_global
+>;
//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
VOP_F32_F32_F32, AMDGPUfmax
>;
-defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "V_MIN_F32", VOP_F32_F32_F32>;
-defm V_MAX_F32 : VOP2Inst <vop2<0x10>, "V_MAX_F32", VOP_F32_F32_F32>;
+defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "V_MIN_F32", VOP_F32_F32_F32, fminnum>;
+defm V_MAX_F32 : VOP2Inst <vop2<0x10>, "V_MAX_F32", VOP_F32_F32_F32, fmaxnum>;
defm V_MIN_I32 : VOP2Inst <vop2<0x11>, "V_MIN_I32", VOP_I32_I32_I32, AMDGPUsmin>;
defm V_MAX_I32 : VOP2Inst <vop2<0x12>, "V_MAX_I32", VOP_I32_I32_I32, AMDGPUsmax>;
defm V_MIN_U32 : VOP2Inst <vop2<0x13>, "V_MIN_U32", VOP_I32_I32_I32, AMDGPUumin>;
defm V_MUL_F64 : VOP3Inst <vop3<0x165>, "V_MUL_F64",
VOP_F64_F64_F64, fmul
>;
+
defm V_MIN_F64 : VOP3Inst <vop3<0x166>, "V_MIN_F64",
- VOP_F64_F64_F64
+ VOP_F64_F64_F64, fminnum
>;
defm V_MAX_F64 : VOP3Inst <vop3<0x167>, "V_MAX_F64",
- VOP_F64_F64_F64
+ VOP_F64_F64_F64, fmaxnum
>;
} // isCommutable = 1
def : Pat <
(i64 (ctpop i64:$src)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_BCNT1_I32_B64 $src), sub0),
- (S_MOV_B32 0), sub1)
+ (i64 (REG_SEQUENCE SReg_64,
+ (S_BCNT1_I32_B64 $src), sub0,
+ (S_MOV_B32 0), sub1))
>;
//===----------------------------------------------------------------------===//
(V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
>;
+def : Pat <
+ (i32 (bswap i32:$a)),
+ (V_BFI_B32 (S_MOV_B32 0x00ff00ff),
+ (V_ALIGNBIT_B32 $a, $a, 24),
+ (V_ALIGNBIT_B32 $a, $a, 8))
+>;
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//