[x86] PR24562: fix incorrect folding of PSHUFB nodes with a mask where all indices...

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 9954b6c63cec6c6511cc26946e07c6e7d9a4fd92..82f9fa7a5e845483db20a3123c7fbb748c885416 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2701,10 +2701,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
    }
  
    MachineModuleInfo &MMI = MF.getMMI();
-  const Function *WinEHParent = nullptr;
-  if (MMI.hasWinEHFuncInfo(Fn))
-    WinEHParent = MMI.getWinEHParent(Fn);
-  bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
  
    // Figure out if XMM registers are in use.
    assert(!(Subtarget->useSoftFloat() &&
@@ -2861,7 +2857,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
  
    FuncInfo->setArgumentStackSize(StackSize);
  
-  if (IsWinEHParent) {
+  if (MMI.hasWinEHFuncInfo(Fn)) {
      if (Is64Bit) {
        int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
        SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
@@ -2951,20 +2947,6 @@ static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
    return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
  }
  
-/// Check if the fall through instruction after a call site is unreachable.
-/// FIXME: This will fail if there are interesting non-code generating IR
-/// instructions between the call and the unreachable (lifetime.end). In
-/// practice, this should be rare because optimizations like to delete non-call
-/// code before unreachable.
-static bool isCallFollowedByUnreachable(ImmutableCallSite CS) {
-  const Instruction *NextInst;
-  if (auto *II = dyn_cast<InvokeInst>(CS.getInstruction()))
-    NextInst = II->getNormalDest()->getFirstNonPHIOrDbg();
-  else
-    NextInst = CS.getInstruction()->getNextNode();
-  return isa<UnreachableInst>(NextInst);
-}
-
  SDValue
  X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
@@ -3470,15 +3452,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      InFlag = Chain.getValue(1);
    }
  
-  if (Subtarget->isTargetWin64() && CLI.CS) {
-    // Look for a call followed by unreachable. On Win64, we need to ensure that
-    // the call does not accidentally fall through to something that looks like
-    // an epilogue. We do this by inserting a DEBUGTRAP, which lowers to int3,
-    // which is what MSVC emits after noreturn calls.
-    if (isCallFollowedByUnreachable(*CLI.CS))
-      Chain = DAG.getNode(ISD::DEBUGTRAP, dl, MVT::Other, Chain);
-  }
-
    // Handle result values, copying them out of physregs into vregs that we
    // return.
    return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
@@ -14206,6 +14179,33 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
                           DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
    }
  
+  // Lower using XOP integer comparisons.
+  if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
+       VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) {
+    // Translate compare code to XOP PCOM compare mode.
+    unsigned CmpMode = 0;
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Unexpected SETCC condition");
+    case ISD::SETULT:
+    case ISD::SETLT: CmpMode = 0x00; break;
+    case ISD::SETULE:
+    case ISD::SETLE: CmpMode = 0x01; break;
+    case ISD::SETUGT:
+    case ISD::SETGT: CmpMode = 0x02; break;
+    case ISD::SETUGE:
+    case ISD::SETGE: CmpMode = 0x03; break;
+    case ISD::SETEQ: CmpMode = 0x04; break;
+    case ISD::SETNE: CmpMode = 0x05; break;
+    }
+
+    // Are we comparing unsigned or signed integers?
+    unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
+      ? X86ISD::VPCOMU : X86ISD::VPCOM;
+
+    return DAG.getNode(Opc, dl, VT, Op0, Op1,
+                       DAG.getConstant(CmpMode, dl, MVT::i8));
+  }
+
    // We are handling one of the integer comparisons here.  Since SSE only has
    // GT and EQ comparisons for integer, swapping operands and multiple
    // operations may be required for some comparisons.
@@ -19878,6 +19878,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::VPMADDWD:           return "X86ISD::VPMADDWD";
    case X86ISD::VPSHA:              return "X86ISD::VPSHA";
    case X86ISD::VPSHL:              return "X86ISD::VPSHL";
+  case X86ISD::VPCOM:              return "X86ISD::VPCOM";
+  case X86ISD::VPCOMU:             return "X86ISD::VPCOMU";
    case X86ISD::FMADD:              return "X86ISD::FMADD";
    case X86ISD::FMSUB:              return "X86ISD::FMSUB";
    case X86ISD::FNMADD:             return "X86ISD::FNMADD";
@@ -21989,10 +21991,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
    MVT RootVT = Root.getSimpleValueType();
    SDLoc DL(Root);
  
-  // Just remove no-op shuffle masks.
    if (Mask.size() == 1) {
-    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
-                  /*AddTo*/ true);
+    int Index = Mask[0];
+    assert((Index >= 0 || Index == SM_SentinelUndef ||
+            Index == SM_SentinelZero) &&
+           "Invalid shuffle index found!");
+
+    // We may end up with an accumulated mask of size 1 as a result of
+    // widening of shuffle operands (see function canWidenShuffleElements).
+    // If the only shuffle index is equal to SM_SentinelZero then propagate
+    // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+    // mask, and therefore the entire chain of shuffles can be folded away.
+    if (Index == SM_SentinelZero)
+      DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+    else
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+                    /*AddTo*/ true);
      return true;
    }
  
@@ -22936,15 +22950,15 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
                           InputVector.getNode()->getOperand(0));
  
      // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))).
-    SDValue MMXSrcOp = MMXSrc.getOperand(0);
      if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
-        MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() &&
-        MMXSrcOp.getOpcode() == ISD::BITCAST &&
-        MMXSrcOp.getValueType() == MVT::v1i64 &&
-        MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
-      return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
-                         N->getValueType(0),
-                         MMXSrcOp.getOperand(0));
+        MMXSrc.getValueType() == MVT::i64) {
+      SDValue MMXSrcOp = MMXSrc.getOperand(0);
+      if (MMXSrcOp.hasOneUse() && MMXSrcOp.getOpcode() == ISD::BITCAST &&
+          MMXSrcOp.getValueType() == MVT::v1i64 &&
+          MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
+        return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
+                           N->getValueType(0), MMXSrcOp.getOperand(0));
+    }
    }
  
    EVT VT = N->getValueType(0);