Remove intrinsic specific instructions for (V)SQRTPS/PD. Instead lower to target...

author Craig Topper <craig.topper@gmail.com>

Sat, 29 Dec 2012 18:18:20 +0000 (18:18 +0000)

committer Craig Topper <craig.topper@gmail.com>

Sat, 29 Dec 2012 18:18:20 +0000 (18:18 +0000)
author Craig Topper <craig.topper@gmail.com>
Sat, 29 Dec 2012 18:18:20 +0000 (18:18 +0000)
committer Craig Topper <craig.topper@gmail.com>
Sat, 29 Dec 2012 18:18:20 +0000 (18:18 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 2b48d50f92396d2783689fbdaaa7c0524fd6f5a3..d279d2da527a72c9c81f3db00da0aa93ec607ad2 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10527,6 +10527,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
      return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
                         Op.getOperand(2), Op.getOperand(1));
  
+  case Intrinsic::x86_sse_sqrt_ps:
+  case Intrinsic::x86_sse2_sqrt_pd:
+  case Intrinsic::x86_avx_sqrt_ps_256:
+  case Intrinsic::x86_avx_sqrt_pd_256:
+    return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+
    // ptest and testp intrinsics. The intrinsic these come from are designed to
    // return an integer value, not just an instruction so lower it to the ptest
    // or testp pattern and a setcc for the result.
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index f3ec067bdb328ef99116c8f541328fac3facd69b..a7424096a8c024573c4ae496e2aa5155e006ff12 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -467,9 +467,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
      { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        0 },
      { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
-    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int,         TB_ALIGN_16 },
      { X86::SQRTPSr,         X86::SQRTPSm,             TB_ALIGN_16 },
-    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int,         TB_ALIGN_16 },
      { X86::SQRTSDr,         X86::SQRTSDm,             0 },
      { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         0 },
      { X86::SQRTSSr,         X86::SQRTSSm,             0 },
@@ -528,9 +526,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::VRSQRTPSr,       X86::VRSQRTPSm,           0 },
      { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       0 },
      { X86::VSQRTPDr,        X86::VSQRTPDm,            0 },
-    { X86::VSQRTPDr_Int,    X86::VSQRTPDm_Int,        0 },
      { X86::VSQRTPSr,        X86::VSQRTPSm,            0 },
-    { X86::VSQRTPSr_Int,    X86::VSQRTPSm_Int,        0 },
      { X86::VUCOMISDrr,      X86::VUCOMISDrm,          0 },
      { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 },
      { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
@@ -554,11 +550,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
      { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        0 },
      { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          0 },
-    { X86::VRSQRTPSYr_Int,  X86::VRSQRTPSYm_Int,      0 },
      { X86::VSQRTPDYr,       X86::VSQRTPDYm,           0 },
-    { X86::VSQRTPDYr_Int,   X86::VSQRTPDYm_Int,       0 },
      { X86::VSQRTPSYr,       X86::VSQRTPSYm,           0 },
-    { X86::VSQRTPSYr_Int,   X86::VSQRTPSYm_Int,       0 },
      { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
      { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
  
@@ -4670,13 +4663,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
    case X86::DIVSSrr:
    case X86::DIVSSrr_Int:
    case X86::SQRTPDm:
-  case X86::SQRTPDm_Int:
    case X86::SQRTPDr:
-  case X86::SQRTPDr_Int:
    case X86::SQRTPSm:
-  case X86::SQRTPSm_Int:
    case X86::SQRTPSr:
-  case X86::SQRTPSr_Int:
    case X86::SQRTSDm:
    case X86::SQRTSDm_Int:
    case X86::SQRTSDr:
@@ -4695,13 +4684,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
    case X86::VDIVSSrr:
    case X86::VDIVSSrr_Int:
    case X86::VSQRTPDm:
-  case X86::VSQRTPDm_Int:
    case X86::VSQRTPDr:
-  case X86::VSQRTPDr_Int:
    case X86::VSQRTPSm:
-  case X86::VSQRTPSm_Int:
    case X86::VSQRTPSr:
-  case X86::VSQRTPSr_Int:
    case X86::VSQRTSDm:
    case X86::VSQRTSDm_Int:
    case X86::VSQRTSDr:
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 041b4ceec07e36d1bbdfc83a7a051e6ccd2fd050..120202fef880d171747a21e29de0469cb4c84572 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3116,14 +3116,6 @@ let Predicates = [HasAVX] in {
                  sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
                  sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
                  sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
-                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
-                                   SSE_SQRTP>,
-                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
-                                    SSE_SQRTP>,
-                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
-                                    SSE_SQRTP>,
-                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
-                                    SSE_SQRTP>,
                  VEX;
  
    // Reciprocal approximations. Note that these typically require refinement
@@ -3202,11 +3194,9 @@ let Predicates = [HasAVX] in {
  defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
                              SSE_SQRTS>,
               sse1_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
-             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps, SSE_SQRTS>,
               sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd,
                              SSE_SQRTS>,
-             sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
-             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
+             sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>;
  
  /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
  multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
author	Craig Topper <craig.topper@gmail.com>
	Sat, 29 Dec 2012 18:18:20 +0000 (18:18 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Sat, 29 Dec 2012 18:18:20 +0000 (18:18 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history