X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUInstructions.td;h=eeb7f3fcde51abcd0d95f7daa611e429232a948b;hb=515cc265c96317bb4275939a90a3d723f10e7a23;hp=1562c2a047534d2903413a15b83eb94376395fc8;hpb=2107be5bc7a11f0682abb3d56e0da6e68e32187e;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 1562c2a0475..eeb7f3fcde5 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -71,6 +71,11 @@ def COND_OEQ : PatLeaf < [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] >; +def COND_ONE : PatLeaf < + (cond), + [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] +>; + def COND_OGT : PatLeaf < (cond), [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] @@ -91,23 +96,28 @@ def COND_OLE : PatLeaf < [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] >; -def COND_UNE : PatLeaf < - (cond), - [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] ->; def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; //===----------------------------------------------------------------------===// -// PatLeafs for unsigned comparisons +// PatLeafs for unsigned / unordered comparisons //===----------------------------------------------------------------------===// +def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; +def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; +// XXX - For some reason R600 version is preferring to use unordered +// for setne? +def COND_UNE_NE : PatLeaf < + (cond), + [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] +>; + //===----------------------------------------------------------------------===// // PatLeafs for signed comparisons //===----------------------------------------------------------------------===// @@ -152,10 +162,6 @@ class PrivateStore : PrivateMemOp < (ops node:$value, node:$ptr), (op node:$value, node:$ptr) >; -def extloadi8_private : PrivateLoad ; -def sextloadi8_private : PrivateLoad ; -def extloadi16_private : PrivateLoad ; -def sextloadi16_private : PrivateLoad ; def load_private : PrivateLoad ; def truncstorei8_private : PrivateStore ; @@ -219,6 +225,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; +def extloadi8_private : PrivateLoad ; +def sextloadi8_private : PrivateLoad ; + def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; @@ -255,6 +264,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; +def extloadi16_private : PrivateLoad ; +def sextloadi16_private : PrivateLoad ; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; @@ -346,7 +358,7 @@ def atomic_load_umax_local : local_binary_atomic_op; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; @@ -377,7 +389,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr), def mskor_flat : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]>; class global_binary_atomic_op : PatFrag< @@ -386,6 +398,7 @@ class global_binary_atomic_op : PatFrag< [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] >; +def atomic_swap_global : global_binary_atomic_op; def atomic_add_global : global_binary_atomic_op; def atomic_and_global : global_binary_atomic_op; def atomic_max_global : global_binary_atomic_op; @@ -400,11 +413,6 @@ def atomic_xor_global : global_binary_atomic_op; // Misc Pattern Fragments //===----------------------------------------------------------------------===// -def fmad : PatFrag < - (ops node:$src0, node:$src1, node:$src2), - (fadd (fmul node:$src0, node:$src1), node:$src2) ->; - class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; @@ -425,6 +433,11 @@ def FP_ONE : PatLeaf < [{return N->isExactlyValue(1.0);}] >; +def FP_HALF : PatLeaf < + (fpimm), + [{return N->isExactlyValue(0.5);}] +>; + let isCodeGenOnly = 1, isPseudo = 1 in { let usesCustomInserter = 1 in { @@ -523,8 +536,9 @@ class DwordAddrPat : Pat < // BFI_INT patterns -multiclass BFIPatterns { - +multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) def : Pat < @@ -546,8 +560,8 @@ multiclass BFIPatterns { def : Pat < (f64 (fcopysign f64:$src0, f64:$src1)), - (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (i32 (EXTRACT_SUBREG $src0, sub0)), sub0), + (REG_SEQUENCE RC64, + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, (BFI_INT (LoadImm32 0x7fffffff), (i32 (EXTRACT_SUBREG $src0, sub1)), (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) @@ -564,22 +578,20 @@ class SHA256MaPattern : Pat < // Bitfield extract patterns -/* - -XXX: The BFE pattern is not working correctly because the XForm is not being -applied. +def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ + return isMask_32(N->getZExtValue()); +}]>; -def legalshift32 : ImmLeaf =0 && Imm < 32;}]>; -def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], - SDNodeXFormgetTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; +def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), + MVT::i32); +}]>; -class BFEPattern : Pat < - (and (srl i32:$x, legalshift32:$y), bfemask:$z), - (BFE $x, $y, $z) +class BFEPattern : Pat < + (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), + (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; -*/ - // rotr pattern class ROTRPattern : Pat < (rotr i32:$src0, i32:$src1), @@ -589,6 +601,20 @@ class ROTRPattern : Pat < // 24-bit arithmetic patterns def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; +// Special conversion patterns + +def cvt_rpi_i32_f32 : PatFrag < + (ops node:$src), + (fp_to_sint (ffloor (fadd $src, FP_HALF))), + [{ (void) N; return TM.Options.NoNaNsFPMath; }] +>; + +def cvt_flr_i32_f32 : PatFrag < + (ops node:$src), + (fp_to_sint (ffloor $src)), + [{ (void)N; return TM.Options.NoNaNsFPMath; }] +>; + /* class UMUL24Pattern : Pat < (mul U24:$x, U24:$y), @@ -635,17 +661,10 @@ class RcpPat : Pat < (RcpInst $src) >; -multiclass RsqPat { - def : Pat < - (fdiv FP_ONE, (fsqrt vt:$src)), - (RsqInst $src) - >; - - def : Pat < - (AMDGPUrcp (fsqrt vt:$src)), - (RsqInst $src) - >; -} +class RsqPat : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) +>; include "R600Instructions.td" include "R700Instructions.td"