X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FR600%2FSIInstructions.td;h=36b52929668105edaea36178c144dee77d1690b5;hp=a2c3454920fb3efa5982ee0af261ef7a393a0e10;hb=2220408e1a357b127aa915bff67ba7350cafd5c0;hpb=ceb4f4907d34eaeeb12009f316b4260a66ba1c50 diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a2c3454920f..36b52929668 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -896,19 +896,38 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store >; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; +defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic < + 0x00000030, "BUFFER_ATOMIC_SWAP", VReg_32, i32, atomic_swap_global +>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < 0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global >; -//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; +defm BUFFER_ATOMIC_SUB : MUBUF_Atomic < + 0x00000033, "BUFFER_ATOMIC_SUB", VReg_32, i32, atomic_sub_global +>; //def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; -//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; -//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>; -//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>; -//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>; -//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>; -//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>; -//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>; +defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic < + 0x00000035, "BUFFER_ATOMIC_SMIN", VReg_32, i32, atomic_min_global +>; +defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic < + 0x00000036, "BUFFER_ATOMIC_UMIN", VReg_32, i32, atomic_umin_global +>; +defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic < + 0x00000037, "BUFFER_ATOMIC_SMAX", VReg_32, i32, atomic_max_global +>; +defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic < + 0x00000038, "BUFFER_ATOMIC_UMAX", VReg_32, i32, atomic_umax_global +>; +defm BUFFER_ATOMIC_AND : MUBUF_Atomic < + 0x00000039, "BUFFER_ATOMIC_AND", VReg_32, i32, atomic_and_global +>; +defm BUFFER_ATOMIC_OR : MUBUF_Atomic < + 0x0000003a, "BUFFER_ATOMIC_OR", VReg_32, i32, atomic_or_global +>; +defm BUFFER_ATOMIC_XOR : MUBUF_Atomic < + 0x0000003b, "BUFFER_ATOMIC_XOR", VReg_32, i32, atomic_xor_global +>; //def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>; //def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>; //def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>; @@ -1388,8 +1407,8 @@ defm V_MAX_LEGACY_F32 : VOP2Inst , "V_MAX_LEGACY_F32", VOP_F32_F32_F32, AMDGPUfmax >; -defm V_MIN_F32 : VOP2Inst , "V_MIN_F32", VOP_F32_F32_F32>; -defm V_MAX_F32 : VOP2Inst , "V_MAX_F32", VOP_F32_F32_F32>; +defm V_MIN_F32 : VOP2Inst , "V_MIN_F32", VOP_F32_F32_F32, fminnum>; +defm V_MAX_F32 : VOP2Inst , "V_MAX_F32", VOP_F32_F32_F32, fmaxnum>; defm V_MIN_I32 : VOP2Inst , "V_MIN_I32", VOP_I32_I32_I32, AMDGPUsmin>; defm V_MAX_I32 : VOP2Inst , "V_MAX_I32", VOP_I32_I32_I32, AMDGPUsmax>; defm V_MIN_U32 : VOP2Inst , "V_MIN_U32", VOP_I32_I32_I32, AMDGPUumin>; @@ -1509,16 +1528,12 @@ defm V_CUBETC_F32 : VOP3Inst , "V_CUBETC_F32", defm V_CUBEMA_F32 : VOP3Inst , "V_CUBEMA_F32", VOP_F32_F32_F32_F32 >; - -let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in { defm V_BFE_U32 : VOP3Inst , "V_BFE_U32", VOP_I32_I32_I32_I32, AMDGPUbfe_u32 >; defm V_BFE_I32 : VOP3Inst , "V_BFE_I32", VOP_I32_I32_I32_I32, AMDGPUbfe_i32 >; -} - defm V_BFI_B32 : VOP3Inst , "V_BFI_B32", VOP_I32_I32_I32_I32, AMDGPUbfi >; @@ -1578,11 +1593,12 @@ defm V_ADD_F64 : VOP3Inst , "V_ADD_F64", defm V_MUL_F64 : VOP3Inst , "V_MUL_F64", VOP_F64_F64_F64, fmul >; + defm V_MIN_F64 : VOP3Inst , "V_MIN_F64", - VOP_F64_F64_F64 + VOP_F64_F64_F64, fminnum >; defm V_MAX_F64 : VOP3Inst , "V_MAX_F64", - VOP_F64_F64_F64 + VOP_F64_F64_F64, fmaxnum >; } // isCommutable = 1 @@ -1794,12 +1810,6 @@ def SI_ADDR64_RSRC : InstSI < "", [] >; -def SI_BUFFER_RSRC : InstSI < - (outs SReg_128:$srsrc), - (ins SReg_32:$ptr_lo, SReg_32:$ptr_hi, SSrc_32:$data_lo, SSrc_32:$data_hi), - "", [] ->; - def V_SUB_F64 : InstSI < (outs VReg_64:$dst), (ins VReg_64:$src0, VReg_64:$src1), @@ -1947,9 +1957,9 @@ def : Pat < def : Pat < (i64 (ctpop i64:$src)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_BCNT1_I32_B64 $src), sub0), - (S_MOV_B32 0), sub1) + (i64 (REG_SEQUENCE SReg_64, + (S_BCNT1_I32_B64 $src), sub0, + (S_MOV_B32 0), sub1)) >; //===----------------------------------------------------------------------===// @@ -1993,11 +2003,6 @@ def : Pat < (V_BCNT_U32_B32_e64 $popcnt, $val) >; -def : Pat < - (addc i32:$src0, i32:$src1), - (V_ADD_I32_e64 $src0, $src1) ->; - /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ @@ -2701,10 +2706,9 @@ def : DSAtomicCmpXChg; multiclass MUBUFLoad_Pattern { def : Pat < - (vt (constant_ld (add i64:$ptr, i64:$offset))), - (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0) + (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))), + (Instr_ADDR64 $srsrc, $vaddr, $offset) >; - } defm : MUBUFLoad_Pattern ; @@ -3053,6 +3057,13 @@ def : Pat < (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) >; +def : Pat < + (i32 (bswap i32:$a)), + (V_BFI_B32 (S_MOV_B32 0x00ff00ff), + (V_ALIGNBIT_B32 $a, $a, 24), + (V_ALIGNBIT_B32 $a, $a, 8)) +>; + //============================================================================// // Miscellaneous Optimization Patterns //============================================================================//