X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUInstructions.td;h=cd3560378e57c84b1896ab366b9708cf4984696e;hb=328080423746398da1c44e679df6f9010374296a;hp=83e1359950da5caf667962854f04143027dd93bb;hpb=399880527d99f60dfbf580bb921ff7f234db3222;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 83e1359950d..cd3560378e5 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -34,47 +34,101 @@ class AMDGPUShaderInst pattern> } +def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; +def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; +def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; + def InstFlag : OperandWithDefaultOps ; +def ADDRIndirect : ComplexPattern; -def COND_EQ : PatLeaf < +let OperandType = "OPERAND_IMMEDIATE" in { + +def u32imm : Operand { + let PrintMethod = "printU32ImmOperand"; +} + +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def u8imm : Operand { + let PrintMethod = "printU8ImmOperand"; +} + +} // End OperandType = "OPERAND_IMMEDIATE" + +//===--------------------------------------------------------------------===// +// Custom Operands +//===--------------------------------------------------------------------===// +def brtarget : Operand; + +//===----------------------------------------------------------------------===// +// PatLeafs for floating-point comparisons +//===----------------------------------------------------------------------===// + +def COND_OEQ : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOEQ: case ISD::SETUEQ: - case ISD::SETEQ: return true;}}}] + [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] >; -def COND_NE : PatLeaf < +def COND_OGT : PatLeaf < + (cond), + [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] +>; + +def COND_OGE : PatLeaf < + (cond), + [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] +>; + +def COND_OLT : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETONE: case ISD::SETUNE: - case ISD::SETNE: return true;}}}] + [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] >; -def COND_GT : PatLeaf < + +def COND_OLE : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOGT: case ISD::SETUGT: - case ISD::SETGT: return true;}}}] + [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] >; -def COND_GE : PatLeaf < +def COND_UNE : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOGE: case ISD::SETUGE: - case ISD::SETGE: return true;}}}] + [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] >; -def COND_LT : PatLeaf < +def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; +def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; + +//===----------------------------------------------------------------------===// +// PatLeafs for unsigned comparisons +//===----------------------------------------------------------------------===// + +def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; +def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; +def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; +def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; + +//===----------------------------------------------------------------------===// +// PatLeafs for signed comparisons +//===----------------------------------------------------------------------===// + +def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; +def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; +def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; +def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; + +//===----------------------------------------------------------------------===// +// PatLeafs for integer equality +//===----------------------------------------------------------------------===// + +def COND_EQ : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOLT: case ISD::SETULT: - case ISD::SETLT: return true;}}}] + [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] >; -def COND_LE : PatLeaf < +def COND_NE : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOLE: case ISD::SETULE: - case ISD::SETLE: return true;}}}] + [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] >; def COND_NULL : PatLeaf < @@ -86,15 +140,197 @@ def COND_NULL : PatLeaf < // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// -def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ +class PrivateMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; +}]>; + +class PrivateLoad : PrivateMemOp < + (ops node:$ptr), (op node:$ptr) +>; + +class PrivateStore : PrivateMemOp < + (ops node:$value, node:$ptr), (op node:$value, node:$ptr) +>; + +def extloadi8_private : PrivateLoad ; +def sextloadi8_private : PrivateLoad ; +def extloadi16_private : PrivateLoad ; +def sextloadi16_private : PrivateLoad ; +def load_private : PrivateLoad ; + +def truncstorei8_private : PrivateStore ; +def truncstorei16_private : PrivateStore ; +def store_private : PrivateStore ; + +def global_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + +// Global address space loads +def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; +// Constant address space loads +def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + +def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + LoadSDNode *L = cast(N); + return L->getExtensionType() == ISD::ZEXTLOAD || + L->getExtensionType() == ISD::EXTLOAD; +}]>; + +def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + +def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + +def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + +def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + +def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def az_extloadi32_global : PatFrag<(ops node:$ptr), + (az_extloadi32 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +def az_extloadi32_constant : PatFrag<(ops node:$ptr), + (az_extloadi32 node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + +def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + +def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + +def local_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + +def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + +def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + +def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + + +class local_binary_atomic_op : + PatFrag<(ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + + +def atomic_swap_local : local_binary_atomic_op; +def atomic_load_add_local : local_binary_atomic_op; +def atomic_load_sub_local : local_binary_atomic_op; +def atomic_load_and_local : local_binary_atomic_op; +def atomic_load_or_local : local_binary_atomic_op; +def atomic_load_xor_local : local_binary_atomic_op; +def atomic_load_nand_local : local_binary_atomic_op; +def atomic_load_min_local : local_binary_atomic_op; +def atomic_load_max_local : local_binary_atomic_op; +def atomic_load_umin_local : local_binary_atomic_op; +def atomic_load_umax_local : local_binary_atomic_op; + +def mskor_global : PatFrag<(ops node:$val, node:$ptr), + (AMDGPUstore_mskor node:$val, node:$ptr), [{ + return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +}]>; + +def atomic_cmp_swap_32_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast(N); + return AN->getMemoryVT() == MVT::i32 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +def atomic_cmp_swap_64_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast(N); + return AN->getMemoryVT() == MVT::i64 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + + class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; -int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP32_NEG_ONE = 0xbf800000; +int FP32_ONE = 0x3f800000; } def CONST : Constants; @@ -116,7 +352,7 @@ class CLAMP : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "CLAMP $dst, $src0", - [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] + [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] >; class FABS : AMDGPUShaderInst < @@ -137,6 +373,8 @@ class FNEG : AMDGPUShaderInst < multiclass RegisterLoadStore { +let UseNamedOperandTable = 1 in { + def RegisterLoad : AMDGPUShaderInst < (outs dstClass:$dst), (ins addrClass:$addr, i32imm:$chan), @@ -155,6 +393,7 @@ multiclass RegisterLoadStore /* --------------------- */ /* Extract element pattern */ -class Extract_Element : Pat< (sub_type (vector_extract vec_type:$src, sub_idx)), @@ -186,61 +425,6 @@ class Insert_Element ; -// Vector Build pattern -class Vector1_Build : Pat < - (vecType (build_vector elemType:$src)), - (vecType (COPY_TO_REGCLASS $src, rc)) ->; - -class Vector2_Build : Pat < - (vecType (build_vector elemType:$sub0, elemType:$sub1)), - (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1) ->; - -class Vector4_Build : Pat < - (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3) ->; - -class Vector8_Build : Pat < - (vecType (build_vector elemType:$sub0, elemType:$sub1, - elemType:$sub2, elemType:$sub3, - elemType:$sub4, elemType:$sub5, - elemType:$sub6, elemType:$sub7)), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1), - $sub2, sub2), $sub3, sub3), - $sub4, sub4), $sub5, sub5), - $sub6, sub6), $sub7, sub7) ->; - -class Vector16_Build : Pat < - (vecType (build_vector elemType:$sub0, elemType:$sub1, - elemType:$sub2, elemType:$sub3, - elemType:$sub4, elemType:$sub5, - elemType:$sub6, elemType:$sub7, - elemType:$sub8, elemType:$sub9, - elemType:$sub10, elemType:$sub11, - elemType:$sub12, elemType:$sub13, - elemType:$sub14, elemType:$sub15)), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1), - $sub2, sub2), $sub3, sub3), - $sub4, sub4), $sub5, sub5), - $sub6, sub6), $sub7, sub7), - $sub8, sub8), $sub9, sub9), - $sub10, sub10), $sub11, sub11), - $sub12, sub12), $sub13, sub13), - $sub14, sub14), $sub15, sub15) ->; - // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. // bitconvert pattern @@ -258,7 +442,7 @@ class DwordAddrPat : Pat < // BFI_INT patterns -multiclass BFIPatterns { +multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) @@ -274,9 +458,118 @@ multiclass BFIPatterns { (BFI_INT $x, $y, $z) >; + def : Pat < + (fcopysign f32:$src0, f32:$src1), + (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) + >; + + def : Pat < + (f64 (fcopysign f64:$src0, f64:$src1)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0), + (BFI_INT (LoadImm32 0x7fffffff), + (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + >; +} + +// SHA-256 Ma patterns + +// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y +class SHA256MaPattern : Pat < + (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), + (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) +>; + +// Bitfield extract patterns + +/* + +XXX: The BFE pattern is not working correctly because the XForm is not being +applied. + +def legalshift32 : ImmLeaf =0 && Imm < 32;}]>; +def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], + SDNodeXFormgetTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; + +class BFEPattern : Pat < + (and (srl i32:$x, legalshift32:$y), bfemask:$z), + (BFE $x, $y, $z) +>; + +*/ + +// rotr pattern +class ROTRPattern : Pat < + (rotr i32:$src0, i32:$src1), + (BIT_ALIGN $src0, $src0, $src1) +>; + +// 24-bit arithmetic patterns +def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; + +/* +class UMUL24Pattern : Pat < + (mul U24:$x, U24:$y), + (UMUL24 $x, $y) +>; +*/ + +class IMad24Pat : Pat < + (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +class UMad24Pat : Pat < + (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +multiclass Expand24IBitOps { + def _expand_imad24 : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_imul24 : Pat < + (AMDGPUmul_i24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +multiclass Expand24UBitOps { + def _expand_umad24 : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_umul24 : Pat < + (AMDGPUmul_u24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +class RcpPat : Pat < + (fdiv FP_ONE, vt:$src), + (RcpInst $src) +>; + +multiclass RsqPat { + def : Pat < + (fdiv FP_ONE, (fsqrt vt:$src)), + (RsqInst $src) + >; + + def : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) + >; } include "R600Instructions.td" +include "R700Instructions.td" +include "EvergreenInstructions.td" +include "CaymanInstructions.td" include "SIInstrInfo.td"