really kill off the last MRMInitReg inst, remove logic from encoder.

[oota-llvm.git] / lib / Target / X86 / X86InstrSSE.td
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 694b91ea4298d51c779db15ee2404737a790d328..bbdaed4116aeb52f62b16226c4052cd0dde3e381 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),
  
  // Like 'load', but uses special alignment checks suitable for use in
  // memory operands in most SSE instructions, which are required to
-// be naturally aligned on some targets but not on others.
-// FIXME: Actually implement support for targets that don't require the
-//        alignment. This probably wants a subtarget predicate.
+// be naturally aligned on some targets but not on others.  If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
  def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return cast<LoadSDNode>(N)->getAlignment() >= 16;
+  return    Subtarget->hasVectorUAMem()
+         || cast<LoadSDNode>(N)->getAlignment() >= 16;
  }]>;
  
  def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
@@ -503,9 +505,10 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
  // Alias instructions that map fld0 to pxor for sse.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
      canFoldAsLoad = 1 in
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
-                 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
-               Requires<[HasSSE1]>, TB, OpSize;
+  // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                 [(set FR32:$dst, fp32imm0)]>,
+                 Requires<[HasSSE1]>, TB, OpSize;
  
  // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
  // disregarded.
@@ -827,7 +830,7 @@ multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
    def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                  !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                  [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
-            Requires<[HasSSE1, NoSSEBreakDep]>;
+            Requires<[HasSSE1, OptForSize]>;
  
    // Vector operation, reg.
    def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1023,10 +1026,10 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
  // Alias instructions that map zero vector to pxor / xorp* for sse.
  // We set canFoldAsLoad because this can be converted to a constant-pool
  // load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
      isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
-                 "xorps\t$dst, $dst",
+def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
                   [(set VR128:$dst, (v4i32 immAllZerosV))]>;
  
  let Predicates = [HasSSE1] in {
@@ -1120,7 +1123,7 @@ def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
  def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                        "cvtsd2ss\t{$src, $dst|$dst, $src}",
                        [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
-                      Requires<[HasSSE2, NoSSEBreakDep]>;
+                  Requires<[HasSSE2, OptForSize]>;
  def CVTSI2SDrr  : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
                        "cvtsi2sd\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
@@ -1157,10 +1160,10 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
  def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                     "cvtss2sd\t{$src, $dst|$dst, $src}",
                     [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
-                 Requires<[HasSSE2, NoSSEBreakDep]>;
+                 Requires<[HasSSE2, OptForSize]>;
  
  def : Pat<(extloadf32 addr:$src),
-          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>;
+          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
  
  // Match intrinsics which expect XMM operand(s).
  def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -1267,8 +1270,8 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
  // Alias instructions that map fld0 to pxor for sse.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
      canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
-                 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                 [(set FR64:$dst, fpimm0)]>,
                 Requires<[HasSSE2]>, TB, OpSize;
  
  // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
@@ -2327,8 +2330,8 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
  // load of an all-ones value if folding it would be beneficial.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
      isCodeGenOnly = 1 in
-  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
-                         "pcmpeqd\t$dst, $dst",
+  // FIXME: Change encoding to pseudo.
+  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                           [(set VR128:$dst, (v4i32 immAllOnesV))]>;
  
  // FR64 to 128-bit vector conversion.
@@ -3232,7 +3235,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
                      [(set VR128:$dst,
                            (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
                      TA, OpSize,
-                Requires<[HasSSE41, NoSSEBreakDep]>;
+                Requires<[HasSSE41]>;
  
    // Vector intrinsic operation, reg
    def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,