X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrSSE.td;h=bbdaed4116aeb52f62b16226c4052cd0dde3e381;hb=be1778fea76e1f63b08f1f838fca88a0c8d9a883;hp=694b91ea4298d51c779db15ee2404737a790d328;hpb=400073d5467b79534d8c63b0d996a55e4252ff4b;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 694b91ea429..bbdaed4116a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. -// FIXME: Actually implement support for targets that don't require the -// alignment. This probably wants a subtarget predicate. +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 16; + return Subtarget->hasVectorUAMem() + || cast(N)->getAlignment() >= 16; }]>; def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; @@ -503,9 +505,10 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, canFoldAsLoad = 1 in -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), - "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; + // FIXME: Set encoding to pseudo! +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are // disregarded. @@ -827,7 +830,7 @@ multiclass sse1_fp_unop_rm opc, string OpcodeStr, def SSm : I, XS, - Requires<[HasSSE1, NoSSEBreakDep]>; + Requires<[HasSSE1, OptForSize]>; // Vector operation, reg. def PSr : PSI; let Predicates = [HasSSE1] in { @@ -1120,7 +1123,7 @@ def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, - Requires<[HasSSE2, NoSSEBreakDep]>; + Requires<[HasSSE2, OptForSize]>; def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), "cvtsi2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp GR32:$src))]>; @@ -1157,10 +1160,10 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, - Requires<[HasSSE2, NoSSEBreakDep]>; + Requires<[HasSSE2, OptForSize]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; // Match intrinsics which expect XMM operand(s). def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -1267,8 +1270,8 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, canFoldAsLoad = 1 in -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), - "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>, +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are @@ -2327,8 +2330,8 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), - "pcmpeqd\t$dst, $dst", + // FIXME: Change encoding to pseudo. + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; // FR64 to 128-bit vector conversion. @@ -3232,7 +3235,7 @@ multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, TA, OpSize, - Requires<[HasSSE41, NoSSEBreakDep]>; + Requires<[HasSSE41]>; // Vector intrinsic operation, reg def PDr_Int : SS4AIi8