Expand the x86 V_SET0* pseudos right after register allocation.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Thu, 29 Sep 2011 05:10:54 +0000 (05:10 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Thu, 29 Sep 2011 05:10:54 +0000 (05:10 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Thu, 29 Sep 2011 05:10:54 +0000 (05:10 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Thu, 29 Sep 2011 05:10:54 +0000 (05:10 +0000)
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h

index 616e89ac5065c10ec5cc8f50c27170682f3d8ed7..07f614d61d936f0a927cf4678a95b91f27bcebaa 100644 (file)
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -392,7 +392,7 @@ public:
    /// into real instructions. The target can edit MI in place, or it can insert
    /// new instructions and erase MI. The function should return true if
    /// anything was changed.
-  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
      return false;
    }
  
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 4eb6c3076f2cd0f18d267c523a55e31b3ec8c54e..bad978d874711f07e3fd4957c905f936450161fb 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2391,6 +2391,37 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
    NewMIs.push_back(MIB);
  }
  
+/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined.  This is
+/// used for mapping:
+///   %xmm4 = V_SET0
+/// to:
+///   %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
+///
+static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+  unsigned Reg = MI->getOperand(0).getReg();
+  MI->setDesc(Desc);
+
+  // MachineInstr::addOperand() will insert explicit operands before any
+  // implicit operands.
+  MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
+                         .addReg(Reg, RegState::Undef);
+  // But we don't trust that.
+  assert(MI->getOperand(1).getReg() == Reg &&
+         MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+  return true;
+}
+
+bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+  switch (MI->getOpcode()) {
+  case X86::V_SET0:
+    return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+  }
+  return false;
+}
+
  MachineInstr*
  X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
                                         int FrameIx, uint64_t Offset,
@@ -2679,13 +2710,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      case X86::AVX_SET0PDY:
        Alignment = 32;
        break;
-    case X86::V_SET0PS:
-    case X86::V_SET0PD:
-    case X86::V_SET0PI:
+    case X86::V_SET0:
      case X86::V_SETALLONES:
-    case X86::AVX_SET0PS:
-    case X86::AVX_SET0PD:
-    case X86::AVX_SET0PI:
      case X86::AVX_SETALLONES:
        Alignment = 16;
        break;
@@ -2722,13 +2748,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
  
    SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
    switch (LoadMI->getOpcode()) {
-  case X86::V_SET0PS:
-  case X86::V_SET0PD:
-  case X86::V_SET0PI:
+  case X86::V_SET0:
    case X86::V_SETALLONES:
-  case X86::AVX_SET0PS:
-  case X86::AVX_SET0PD:
-  case X86::AVX_SET0PI:
    case X86::AVX_SET0PSY:
    case X86::AVX_SET0PDY:
    case X86::AVX_SETALLONES:
@@ -2736,7 +2757,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    case X86::FsFLD0SS:
    case X86::VFsFLD0SD:
    case X86::VFsFLD0SS: {
-    // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
+    // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
      // Create a constant-pool entry and operands to load from it.
  
      // Medium and large mode can't fold loads this way.
@@ -3316,7 +3337,6 @@ static const unsigned ReplaceableInstrs[][3] = {
    { X86::ANDPSrr,    X86::ANDPDrr,   X86::PANDrr    },
    { X86::ORPSrm,     X86::ORPDrm,    X86::PORrm     },
    { X86::ORPSrr,     X86::ORPDrr,    X86::PORrr     },
-  { X86::V_SET0PS,   X86::V_SET0PD,  X86::V_SET0PI  },
    { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
    { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
    // AVX 128-bit support
@@ -3332,7 +3352,6 @@ static const unsigned ReplaceableInstrs[][3] = {
    { X86::VANDPSrr,   X86::VANDPDrr,   X86::VPANDrr    },
    { X86::VORPSrm,    X86::VORPDrm,    X86::VPORrm     },
    { X86::VORPSrr,    X86::VORPDrr,    X86::VPORrr     },
-  { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
    { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
    { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
    // AVX 256-bit support
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h

index 0f4022207eb1ac22d55545d1d5fbbf82d1504ef4..97009dbdbe501f2404a32315bf7391fd2f8bd78b 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -247,6 +247,9 @@ public:
                                 MachineInstr::mmo_iterator MMOBegin,
                                 MachineInstr::mmo_iterator MMOEnd,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
    virtual
    MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
                                           int FrameIx, uint64_t Offset,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 075be73f1a9f8df97066fd68c8df02a168dfacc6..4a51edfc5ad115b51e9ea78f21f619a0c54552bc 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -260,26 +260,26 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
  // AVX & SSE - Zero/One Vectors
  //===----------------------------------------------------------------------===//
  
-// Alias instructions that map zero vector to pxor / xorp* for sse.
+// Alias instruction that maps zero vector to pxor / xorp* for sse.
+// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
+// swizzled by ExecutionDepsFix to pxor.
  // We set canFoldAsLoad because this can be converted to a constant-pool
  // load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementation, it does not expand the instructions below like
-// X86MCInstLower does.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+    isPseudo = 1 in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
  }
  
-// The same as done above but for AVX. The 128-bit versions are the
-// same, but re-encoded. The 256-bit does not support PI version, and
-// doesn't need it because on sandy bridge the register is set to zero
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+
+
+// The same as done above but for AVX.  The 256-bit ISA does not support PI,
+// and doesn't need it because on sandy bridge the register is set to zero
  // at the rename stage without using any execution unit, so SET0PSY
  // and SET0PDY can be used for vector int instructions without penalty
  // FIXME: Change encoding to pseudo! This is blocked right now by the x86
@@ -287,32 +287,22 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
  // X86MCInstLower does.
  let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
      isCodeGenOnly = 1, Predicates = [HasAVX] in {
-def AVX_SET0PS  : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
-                   [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PD  : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
-                   [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
  def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
                     [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
  def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
                     [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
-let ExeDomain = SSEPackedInt in
-def AVX_SET0PI  : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
-                   [(set VR128:$dst, (v4i32 immAllZerosV))]>;
  }
  
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
  
  // AVX has no support for 256-bit integer instructions, but since the 128-bit
  // VPXOR instruction writes zero to its upper part, it's safe build zeros.
-def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (AVX_SET0PI), sub_xmm)>;
+def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
  def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
-          (SUBREG_TO_REG (i32 0), (AVX_SET0PI), sub_xmm)>;
+          (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
  
-def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (AVX_SET0PI), sub_xmm)>;
+def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
  def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
-          (SUBREG_TO_REG (i64 0), (AVX_SET0PI), sub_xmm)>;
+          (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
  
  // We set canFoldAsLoad because this can be converted to a constant-pool
  // load of an all-ones value if folding it would be beneficial.
@@ -427,12 +417,12 @@ let Predicates = [HasSSE1] in {
    // Move scalar to XMM zero-extended, zeroing a VR128 then do a
    // MOVSS to the lower bits.
    def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-            (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
+            (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
    def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (MOVSSrr (v4f32 (V_SET0PS)),
+            (MOVSSrr (v4f32 (V_SET0)),
                       (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
    def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (MOVSSrr (v4i32 (V_SET0PI)),
+            (MOVSSrr (v4i32 (V_SET0)),
                       (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
    }
  
@@ -483,7 +473,7 @@ let Predicates = [HasSSE2] in {
    // Move scalar to XMM zero-extended, zeroing a VR128 then do a
    // MOVSD to the lower bits.
    def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
-            (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
+            (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
    }
  
    let AddedComplexity = 20 in {
@@ -558,15 +548,15 @@ let Predicates = [HasAVX] in {
    // Move scalar to XMM zero-extended, zeroing a VR128 then do a
    // MOVS{S,D} to the lower bits.
    def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-            (VMOVSSrr (v4f32 (AVX_SET0PS)), FR32:$src)>;
+            (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
    def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (VMOVSSrr (v4f32 (AVX_SET0PS)),
+            (VMOVSSrr (v4f32 (V_SET0)),
                        (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
    def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (VMOVSSrr (v4i32 (AVX_SET0PI)),
+            (VMOVSSrr (v4i32 (V_SET0)),
                        (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
    def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
-            (VMOVSDrr (v2f64 (AVX_SET0PS)), FR64:$src)>;
+            (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
    }
  
    let AddedComplexity = 20 in {
@@ -604,12 +594,12 @@ let Predicates = [HasAVX] in {
    def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                     (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
              (SUBREG_TO_REG (i32 0),
-                           (v4f32 (VMOVSSrr (v4f32 (AVX_SET0PS)), FR32:$src)),
+                           (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
                             sub_xmm)>;
    def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
                     (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
              (SUBREG_TO_REG (i64 0),
-                           (v2f64 (VMOVSDrr (v2f64 (AVX_SET0PS)), FR64:$src)),
+                           (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
                             sub_xmm)>;
  
    // Extract and store.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp

index 6cb5831f551a6cfebf9201d416ac5a11718178df..1d603cf6e1b425ebf05f13a8b564163debc8b57e 100644 (file)
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -372,15 +372,9 @@ ReSimplify:
    case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
    case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
    case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
-  case X86::V_SET0PS:      LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
-  case X86::V_SET0PD:      LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
-  case X86::V_SET0PI:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
    case X86::V_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
-  case X86::AVX_SET0PS:    LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
    case X86::AVX_SET0PSY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
-  case X86::AVX_SET0PD:    LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
    case X86::AVX_SET0PDY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
-  case X86::AVX_SET0PI:    LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
    case X86::AVX_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
  
    case X86::MOV16r0:
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Thu, 29 Sep 2011 05:10:54 +0000 (05:10 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Thu, 29 Sep 2011 05:10:54 +0000 (05:10 +0000)
include/llvm/Target/TargetInstrInfo.h		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.h		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
lib/Target/X86/X86MCInstLower.cpp		patch \| blob \| history