X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUInstructions.td;h=eeb7f3fcde51abcd0d95f7daa611e429232a948b;hb=0973b7ddb8f8267132147c8b24dae7b2dfa1fd02;hp=127b74a0edbb77eeb1c05b61f88df4384d74846b;hpb=f49da4338a71b7a6053ce8bfcb070a9f145b4d69;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 127b74a0edb..eeb7f3fcde5 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -34,9 +34,15 @@ class AMDGPUShaderInst pattern> } +def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; +def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; +def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; + def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; +let OperandType = "OPERAND_IMMEDIATE" in { + def u32imm : Operand { let PrintMethod = "printU32ImmOperand"; } @@ -49,6 +55,13 @@ def u8imm : Operand { let PrintMethod = "printU8ImmOperand"; } +} // End OperandType = "OPERAND_IMMEDIATE" + +//===--------------------------------------------------------------------===// +// Custom Operands +//===--------------------------------------------------------------------===// +def brtarget : Operand; + //===----------------------------------------------------------------------===// // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// @@ -58,6 +71,11 @@ def COND_OEQ : PatLeaf < [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] >; +def COND_ONE : PatLeaf < + (cond), + [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] +>; + def COND_OGT : PatLeaf < (cond), [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] @@ -78,23 +96,28 @@ def COND_OLE : PatLeaf < [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] >; -def COND_UNE : PatLeaf < - (cond), - [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] ->; def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; //===----------------------------------------------------------------------===// -// PatLeafs for unsigned comparisons +// PatLeafs for unsigned / unordered comparisons //===----------------------------------------------------------------------===// +def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; +def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; +// XXX - For some reason R600 version is preferring to use unordered +// for setne? +def COND_UNE_NE : PatLeaf < + (cond), + [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] +>; + //===----------------------------------------------------------------------===// // PatLeafs for signed comparisons //===----------------------------------------------------------------------===// @@ -120,13 +143,46 @@ def COND_NE : PatLeaf < def COND_NULL : PatLeaf < (cond), - [{return false;}] + [{(void)N; return false;}] >; //===----------------------------------------------------------------------===// // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +class PrivateMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; +}]>; + +class PrivateLoad : PrivateMemOp < + (ops node:$ptr), (op node:$ptr) +>; + +class PrivateStore : PrivateMemOp < + (ops node:$value, node:$ptr), (op node:$value, node:$ptr) +>; + +def load_private : PrivateLoad ; + +def truncstorei8_private : PrivateStore ; +def truncstorei16_private : PrivateStore ; +def store_private : PrivateStore ; + +def global_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + +// Global address space loads +def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +// Constant address space loads +def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ LoadSDNode *L = cast(N); return L->getExtensionType() == ISD::ZEXTLOAD || @@ -145,6 +201,14 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; +def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isFlatLoad(dyn_cast(N)); +}]>; + +def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isFlatLoad(dyn_cast(N)); +}]>; + def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); }]>; @@ -161,6 +225,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; +def extloadi8_private : PrivateLoad ; +def sextloadi8_private : PrivateLoad ; + def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; @@ -173,6 +240,14 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; +def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isFlatLoad(dyn_cast(N)); +}]>; + +def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isFlatLoad(dyn_cast(N)); +}]>; + def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); }]>; @@ -189,6 +264,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; +def extloadi16_private : PrivateLoad ; +def sextloadi16_private : PrivateLoad ; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; @@ -198,6 +276,11 @@ def az_extloadi32_global : PatFrag<(ops node:$ptr), return isGlobalLoad(dyn_cast(N)); }]>; +def az_extloadi32_flat : PatFrag<(ops node:$ptr), + (az_extloadi32 node:$ptr), [{ + return isFlatLoad(dyn_cast(N)); +}]>; + def az_extloadi32_constant : PatFrag<(ops node:$ptr), (az_extloadi32 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); @@ -213,6 +296,16 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), return isGlobalStore(dyn_cast(N)); }]>; +def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isFlatStore(dyn_cast(N)); +}]>; + +def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isFlatStore(dyn_cast(N)); +}]>; + def local_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return isLocalStore(dyn_cast(N)); @@ -232,26 +325,101 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; -def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_add node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; }]>; -def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_sub node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +def local_load_aligned8bytes : Aligned8Bytes < + (ops node:$ptr), (local_load node:$ptr) +>; + +def local_store_aligned8bytes : Aligned8Bytes < + (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) +>; + +class local_binary_atomic_op : + PatFrag<(ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; + +def atomic_swap_local : local_binary_atomic_op; +def atomic_load_add_local : local_binary_atomic_op; +def atomic_load_sub_local : local_binary_atomic_op; +def atomic_load_and_local : local_binary_atomic_op; +def atomic_load_or_local : local_binary_atomic_op; +def atomic_load_xor_local : local_binary_atomic_op; +def atomic_load_nand_local : local_binary_atomic_op; +def atomic_load_min_local : local_binary_atomic_op; +def atomic_load_max_local : local_binary_atomic_op; +def atomic_load_umin_local : local_binary_atomic_op; +def atomic_load_umax_local : local_binary_atomic_op; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +}]>; + + +def atomic_cmp_swap_32_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast(N); + return AN->getMemoryVT() == MVT::i32 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +def atomic_cmp_swap_64_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast(N); + return AN->getMemoryVT() == MVT::i64 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isFlatLoad(dyn_cast(N)); +}]>; + +def flat_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isFlatStore(dyn_cast(N)); +}]>; + +def mskor_flat : PatFrag<(ops node:$val, node:$ptr), + (AMDGPUstore_mskor node:$val, node:$ptr), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]>; +class global_binary_atomic_op : PatFrag< + (ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] +>; + +def atomic_swap_global : global_binary_atomic_op; +def atomic_add_global : global_binary_atomic_op; +def atomic_and_global : global_binary_atomic_op; +def atomic_max_global : global_binary_atomic_op; +def atomic_min_global : global_binary_atomic_op; +def atomic_or_global : global_binary_atomic_op; +def atomic_sub_global : global_binary_atomic_op; +def atomic_umax_global : global_binary_atomic_op; +def atomic_umin_global : global_binary_atomic_op; +def atomic_xor_global : global_binary_atomic_op; + +//===----------------------------------------------------------------------===// +// Misc Pattern Fragments +//===----------------------------------------------------------------------===// + class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP32_NEG_ONE = 0xbf800000; +int FP32_ONE = 0x3f800000; } def CONST : Constants; @@ -265,6 +433,11 @@ def FP_ONE : PatLeaf < [{return N->isExactlyValue(1.0);}] >; +def FP_HALF : PatLeaf < + (fpimm), + [{return N->isExactlyValue(0.5);}] +>; + let isCodeGenOnly = 1, isPseudo = 1 in { let usesCustomInserter = 1 in { @@ -273,7 +446,7 @@ class CLAMP : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "CLAMP $dst, $src0", - [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] + [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] >; class FABS : AMDGPUShaderInst < @@ -363,8 +536,9 @@ class DwordAddrPat : Pat < // BFI_INT patterns -multiclass BFIPatterns { - +multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) def : Pat < @@ -379,6 +553,19 @@ multiclass BFIPatterns { (BFI_INT $x, $y, $z) >; + def : Pat < + (fcopysign f32:$src0, f32:$src1), + (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) + >; + + def : Pat < + (f64 (fcopysign f64:$src0, f64:$src1)), + (REG_SEQUENCE RC64, + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, + (BFI_INT (LoadImm32 0x7fffffff), + (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + >; } // SHA-256 Ma patterns @@ -391,22 +578,20 @@ class SHA256MaPattern : Pat < // Bitfield extract patterns -/* - -XXX: The BFE pattern is not working correctly because the XForm is not being -applied. +def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ + return isMask_32(N->getZExtValue()); +}]>; -def legalshift32 : ImmLeaf =0 && Imm < 32;}]>; -def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], - SDNodeXFormgetTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; +def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), + MVT::i32); +}]>; -class BFEPattern : Pat < - (and (srl i32:$x, legalshift32:$y), bfemask:$z), - (BFE $x, $y, $z) +class BFEPattern : Pat < + (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), + (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; -*/ - // rotr pattern class ROTRPattern : Pat < (rotr i32:$src0, i32:$src1), @@ -416,6 +601,20 @@ class ROTRPattern : Pat < // 24-bit arithmetic patterns def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; +// Special conversion patterns + +def cvt_rpi_i32_f32 : PatFrag < + (ops node:$src), + (fp_to_sint (ffloor (fadd $src, FP_HALF))), + [{ (void) N; return TM.Options.NoNaNsFPMath; }] +>; + +def cvt_flr_i32_f32 : PatFrag < + (ops node:$src), + (fp_to_sint (ffloor $src)), + [{ (void)N; return TM.Options.NoNaNsFPMath; }] +>; + /* class UMUL24Pattern : Pat < (mul U24:$x, U24:$y), @@ -433,6 +632,39 @@ class UMad24Pat : Pat < (Inst $src0, $src1, $src2) >; +multiclass Expand24IBitOps { + def _expand_imad24 : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_imul24 : Pat < + (AMDGPUmul_i24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +multiclass Expand24UBitOps { + def _expand_umad24 : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_umul24 : Pat < + (AMDGPUmul_u24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +class RcpPat : Pat < + (fdiv FP_ONE, vt:$src), + (RcpInst $src) +>; + +class RsqPat : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) +>; include "R600Instructions.td" include "R700Instructions.td"