completely disable folding of loads into scalar sse instructions and provide

author Chris Lattner <sabre@nondot.org>

Sat, 7 Oct 2006 21:55:32 +0000 (21:55 +0000)

committer Chris Lattner <sabre@nondot.org>

Sat, 7 Oct 2006 21:55:32 +0000 (21:55 +0000)
author Chris Lattner <sabre@nondot.org>
Sat, 7 Oct 2006 21:55:32 +0000 (21:55 +0000)
committer Chris Lattner <sabre@nondot.org>
Sat, 7 Oct 2006 21:55:32 +0000 (21:55 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index 68d390cf5ca7e1c750def349fbfc0fecf69740b3..785af30e3a7872e01077118572c6cc28ecdf8afb 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -147,6 +147,8 @@ namespace {
                      SDOperand &Index, SDOperand &Disp);
      bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
                         SDOperand &Index, SDOperand &Disp);
+    bool SelectScalarSSELoad(SDOperand N, SDOperand &Base, SDOperand &Scale,
+                             SDOperand &Index, SDOperand &Disp);
      bool TryFoldLoad(SDOperand P, SDOperand N,
                       SDOperand &Base, SDOperand &Scale,
                       SDOperand &Index, SDOperand &Disp);
@@ -724,6 +726,29 @@ bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
    return true;
  }
  
+/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
+/// match a load whose top elements are either undef or zeros.  The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand N, SDOperand &Base,
+                                          SDOperand &Scale,
+                                          SDOperand &Index, SDOperand &Disp) {
+#if 0
+  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    if (N.getOperand(0).getOpcode() == ISD::LOAD) {
+      SDOperand LoadAddr = N.getOperand(0).getOperand(0);
+      if (!SelectAddr(LoadAddr, Base, Scale, Index, Disp))
+        return false;
+      return true;
+    }
+  }
+  // TODO: Also handle the case where we explicitly require zeros in the top
+  // elements.  This is a vector shuffle from the zero vector.
+#endif
+  
+  return false;
+}
+
+
  /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
  /// mode it matches can be cost effectively emitted as an LEA instruction.
  bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 84ccfeaedfc5a6c13a5c005e2a04c605e74a0fbb..55f4528761932d43eda5b452f78d3ebb562af9ba 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -13,6 +13,7 @@
  //
  //===----------------------------------------------------------------------===//
  
+
  //===----------------------------------------------------------------------===//
  // SSE specific DAG Nodes.
  //===----------------------------------------------------------------------===//
@@ -31,6 +32,27 @@ def X86s2vec   : SDNode<"X86ISD::S2VEC",  SDTypeProfile<1, 1, []>, []>;
  def X86pextrw  : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
  def X86pinsrw  : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
  
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements.  These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", []>;
+def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", []>;
+
+def ssmem : Operand<v4f32> {
+  let PrintMethod = "printf32mem";
+  let NumMIOperands = 4;
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+def sdmem : Operand<v2f64> {
+  let PrintMethod = "printf64mem";
+  let NumMIOperands = 4;
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+
  //===----------------------------------------------------------------------===//
  // SSE pattern fragments
  //===----------------------------------------------------------------------===//
@@ -185,18 +207,18 @@ multiclass SS_IntUnary<bits<8> o, string OpcodeStr, Intrinsic IntId> {
    def r : SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
                !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
                [(set VR128:$dst, (v4f32 (IntId VR128:$src)))]>;
-  def m : SSI<o, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+  def m : SSI<o, MRMSrcMem, (ops VR128:$dst, ssmem:$src),
                !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
-              [(set VR128:$dst, (v4f32 (IntId (load addr:$src))))]>;
+              [(set VR128:$dst, (v4f32 (IntId sse_load_f32:$src)))]>;
  }
  
  multiclass SD_IntUnary<bits<8> o, string OpcodeStr, Intrinsic IntId> {
    def r : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
                !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
                [(set VR128:$dst, (v2f64 (IntId VR128:$src)))]>;
-  def m : SDI<o, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+  def m : SDI<o, MRMSrcMem, (ops VR128:$dst, sdmem:$src),
                !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
-              [(set VR128:$dst, (v2f64 (IntId (load addr:$src))))]>;
+              [(set VR128:$dst, (v2f64 (IntId sse_load_f64:$src)))]>;
  }
  
  class PS_Intr<bits<8> o, string OpcodeStr, Intrinsic IntId>
@@ -315,10 +337,10 @@ multiclass scalar_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
    // Scalar operation, reg+mem.
    def SSrm : SSI<opc, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
                   !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2"),
-                 [(set FR32:$dst, (OpNode FR32:$src1, (loadf32 addr:$src2)))]>;
+                 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
    def SDrm : SDI<opc, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
                   !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2"),
-                 [(set FR64:$dst, (OpNode FR64:$src1, (loadf64 addr:$src2)))]>;
+                 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
                   
    // Vector intrinsic operation, reg+reg.
    def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
@@ -332,14 +354,14 @@ multiclass scalar_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
      let isCommutable = Commutable;
    }
    // Vector intrinsic operation, reg+mem.
-  def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
+  def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
                       !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2"),
                       [(set VR128:$dst, (F32Int VR128:$src1,
-                                               (load addr:$src2)))]>;
-  def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+                                               sse_load_f32:$src2))]>;
+  def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
                       !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2"),
                       [(set VR128:$dst, (F64Int VR128:$src1,
-                                               (load addr:$src2)))]>;
+                                               sse_load_f64:$src2))]>;
  }
  }
  
@@ -373,17 +395,17 @@ class SS_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
          !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
          [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
  class SS_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
-  : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
+  : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
          !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
-        [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
+        [(set VR128:$dst, (v4f32 (IntId VR128:$src1, sse_load_f32:$src2)))]>;
  class SD_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
    : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
          !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
          [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
  class SD_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
-  : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+  : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
          !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
-        [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
+        [(set VR128:$dst, (v2f64 (IntId VR128:$src1, sse_load_f64:$src2)))]>;
  
  
  // Aliases to match intrinsics which expect XMM operand(s).
author	Chris Lattner <sabre@nondot.org>
	Sat, 7 Oct 2006 21:55:32 +0000 (21:55 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Sat, 7 Oct 2006 21:55:32 +0000 (21:55 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history