X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAMDGPU%2FSIInstructions.td;h=6f653c70aca06cb489bc456c21f0491bbb570ea6;hb=808f9643e6a596f7cdf5e02a080750fd687c6e6b;hp=f1a5546e3c13988d9808637b16a474d2de316ecb;hpb=3f7c35a966ec6a504e799389d23eaa8ae1f91358;p=oota-llvm.git diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index f1a5546e3c1..6f653c70aca 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -127,7 +127,7 @@ let Defs = [SCC] in { defm S_BREV_B32 : SOP1_32 , "s_brev_b32", - [(set i32:$dst, (AMDGPUbrev i32:$src0))] + [(set i32:$dst, (bitreverse i32:$src0))] >; defm S_BREV_B64 : SOP1_64 , "s_brev_b64", []>; @@ -187,10 +187,14 @@ defm S_XNOR_SAVEEXEC_B64 : SOP1_64 , "s_xnor_saveexec_b64", []> defm S_QUADMASK_B32 : SOP1_32 , "s_quadmask_b32", []>; defm S_QUADMASK_B64 : SOP1_64 , "s_quadmask_b64", []>; + +let Uses = [M0] in { defm S_MOVRELS_B32 : SOP1_32 , "s_movrels_b32", []>; defm S_MOVRELS_B64 : SOP1_64 , "s_movrels_b64", []>; defm S_MOVRELD_B32 : SOP1_32 , "s_movreld_b32", []>; defm S_MOVRELD_B64 : SOP1_64 , "s_movreld_b64", []>; +} // End Uses = [M0] + defm S_CBRANCH_JOIN : SOP1_1 , "s_cbranch_join", []>; defm S_MOV_REGRD_B32 : SOP1_32 , "s_mov_regrd_b32", []>; let Defs = [SCC] in { @@ -949,13 +953,13 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper < mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global >; defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper < - mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load + mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper < - mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load + mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load >; defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper < - mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load + mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load >; defm BUFFER_STORE_BYTE : MUBUF_Store_Helper < @@ -1335,9 +1339,12 @@ defm V_FREXP_MANT_F32 : VOP1Inst , "v_frexp_mant_f32", let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NONE>; } + +let Uses = [M0, EXEC] in { defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_I32_I32>; defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_I32_I32>; defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_I32_I32>; +} // End Uses = [M0, EXEC] // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -1424,9 +1431,7 @@ defm V_INTERP_MOV_F32 : VINTRP_m < //===----------------------------------------------------------------------===// multiclass V_CNDMASK { - defm _e32 : VOP2_m < - op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [], - name, name>; + defm _e32 : VOP2_m ; defm _e64 : VOP3_m < op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64, @@ -1585,10 +1590,10 @@ defm V_BCNT_U32_B32 : VOP2_VI3_Inst , "v_bcnt_u32_b32", VOP_I32_I32_I32 >; defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_lo_u32_b32", - VOP_I32_I32_I32 + VOP_I32_I32_I32, int_amdgcn_mbcnt_lo >; defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_hi_u32_b32", - VOP_I32_I32_I32 + VOP_I32_I32_I32, int_amdgcn_mbcnt_hi >; defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp @@ -1937,36 +1942,6 @@ def SI_KILL : InstSI < let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { -//defm SI_ : RegisterLoadStore ; - -let UseNamedOperandTable = 1 in { - -def SI_RegisterLoad : InstSI < - (outs VGPR_32:$dst, SReg_64:$temp), - (ins FRAMEri32:$addr, i32imm:$chan), - "", [] -> { - let isRegisterLoad = 1; - let mayLoad = 1; -} - -class SIRegStore : InstSI < - outs, - (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan), - "", [] -> { - let isRegisterStore = 1; - let mayStore = 1; -} - -let usesCustomInserter = 1 in { -def SI_RegisterStorePseudo : SIRegStore<(outs)>; -} // End usesCustomInserter = 1 -def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>; - - -} // End UseNamedOperandTable = 1 - class SI_INDIRECT_SRC : InstSI < (outs VGPR_32:$dst, SReg_64:$temp), (ins rc:$src, VSrc_32:$idx, i32imm:$off), @@ -2003,8 +1978,7 @@ multiclass SI_SPILL_SGPR { let UseNamedOperandTable = 1, Uses = [EXEC] in { def _SAVE : InstSI < (outs), - (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, - SReg_32:$scratch_offset), + (ins sgpr_class:$src, i32imm:$frame_idx), "", [] > { let mayStore = 1; @@ -2013,7 +1987,7 @@ multiclass SI_SPILL_SGPR { def _RESTORE : InstSI < (outs sgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx), "", [] > { let mayStore = 0; @@ -2065,8 +2039,8 @@ let Defs = [SCC] in { def SI_CONSTDATA_PTR : InstSI < (outs SReg_64:$dst), - (ins), - "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))] + (ins const_ga:$ptr), + "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))] > { let SALU = 1; } @@ -2113,24 +2087,29 @@ multiclass SMRD_Pattern { // 1. IMM offset def : Pat < - (constant_load (SMRDImm i64:$sbase, i32:$offset)), + (smrd_load (SMRDImm i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM") $sbase, $offset)) >; // 2. SGPR offset def : Pat < - (constant_load (SMRDSgpr i64:$sbase, i32:$offset)), + (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_SGPR") $sbase, $offset)) >; def : Pat < - (constant_load (SMRDImm32 i64:$sbase, i32:$offset)), + (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM_ci") $sbase, $offset)) > { let Predicates = [isCIOnly]; } } +// Global and constant loads can be selected to either MUBUF or SMRD +// instructions, but SMRD instructions are faster so we want the instruction +// selector to prefer those. +let AddedComplexity = 100 in { + defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; @@ -2159,6 +2138,8 @@ def : Pat < } // End Predicates = [isCI] +} // End let AddedComplexity = 10000 + //===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// @@ -2170,6 +2151,11 @@ def : Pat < (S_MOV_B32 0), sub1)) >; +def : Pat < + (i32 (smax i32:$x, (i32 (ineg i32:$x)))), + (S_ABS_I32 $x) +>; + //===----------------------------------------------------------------------===// // SOP2 Patterns //===----------------------------------------------------------------------===// @@ -2497,6 +2483,11 @@ def : Pat < /********** Extraction, Insertion, Building and Casting **********/ /********** ============================================ **********/ +//def : Extract_Element; +//def : Extract_Element; +//def : Extract_Element; +//def : Extract_Element; + foreach Index = 0-2 in { def Extract_Element_v2i32_#Index : Extract_Element < i32, v2i32, Index, !cast(sub#Index) @@ -2577,11 +2568,25 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; def : BitConvert ; +def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; + +def : BitConvert ; +def : BitConvert ; + +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + + + + def : BitConvert ; def : BitConvert ; def : BitConvert ; @@ -2610,10 +2615,9 @@ def : Pat < // Prevent expanding both fneg and fabs. -// FIXME: Should use S_OR_B32 def : Pat < (fneg (fabs f32:$src)), - (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ + (S_OR_B32 $src, 0x80000000) /* Set sign bit */ >; // FIXME: Should use S_OR_B32 @@ -3032,25 +3036,25 @@ multiclass SI_INDIRECT_Pattern { // 1. Extract with offset def : Pat< - (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))), + (eltvt (extractelt vt:$vec, (add i32:$idx, imm:$off))), (!cast("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off) >; // 2. Extract without offset def : Pat< - (eltvt (vector_extract vt:$vec, i32:$idx)), + (eltvt (extractelt vt:$vec, i32:$idx)), (!cast("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0) >; // 3. Insert with offset def : Pat< - (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), + (insertelt vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), (!cast("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val) >; // 4. Insert without offset def : Pat< - (vector_insert vt:$vec, eltvt:$val, i32:$idx), + (insertelt vt:$vec, eltvt:$val, i32:$idx), (!cast("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val) >; } @@ -3176,12 +3180,12 @@ def : Pat < def : Pat < (i1 (trunc i32:$a)), - (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) + (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1) >; def : Pat < (i1 (trunc i64:$a)), - (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), + (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), (EXTRACT_SUBREG $a, sub0)), 1) >;