Add 'static' keyword to some const arrays for consistency.

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 954790b4bacb4c64b5b9d4246cc2b5f0c7dd745f..1a0c93734e73252fb1f2f5a324953955f0619cae 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -563,10 +563,6 @@ void X86TargetLowering::resetOperationActions() {
      setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
    }
  
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
-  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
-  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
    if (Subtarget->is64Bit()) {
      setExceptionPointerRegister(X86::RAX);
      setExceptionSelectorRegister(X86::RDX);
@@ -1884,13 +1880,19 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
            CC == CallingConv::HiPE);
  }
  
+/// \brief Return true if the calling convention is a C calling convention.
+static bool IsCCallConvention(CallingConv::ID CC) {
+  return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
+          CC == CallingConv::X86_64_SysV);
+}
+
  bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
    if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
      return false;
  
    CallSite CS(CI);
    CallingConv::ID CalleeCC = CS.getCallingConv();
-  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+  if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
      return false;
  
    return true;
@@ -1965,7 +1967,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
    MachineFrameInfo *MFI = MF.getFrameInfo();
    bool Is64Bit = Subtarget->is64Bit();
    bool IsWindows = Subtarget->isTargetWindows();
-  bool IsWin64 = Subtarget->isTargetWin64();
+  bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
  
    assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
           "Var args not supported with calling convention fastcc, ghc or hipe");
@@ -1976,9 +1978,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                   ArgLocs, *DAG.getContext());
  
    // Allocate shadow area for Win64
-  if (IsWin64) {
+  if (IsWin64)
      CCInfo.AllocateStack(32, 8);
-  }
  
    CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
  
@@ -2279,10 +2280,10 @@ SDValue
  X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
    SelectionDAG &DAG                     = CLI.DAG;
-  SDLoc &dl                          = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDLoc &dl                             = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
    SDValue Chain                         = CLI.Chain;
    SDValue Callee                        = CLI.Callee;
    CallingConv::ID CallConv              = CLI.CallConv;
@@ -2291,7 +2292,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
  
    MachineFunction &MF = DAG.getMachineFunction();
    bool Is64Bit        = Subtarget->is64Bit();
-  bool IsWin64        = Subtarget->isTargetWin64();
+  bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
    bool IsWindows      = Subtarget->isTargetWindows();
    StructReturnType SR = callIsStructReturn(Outs);
    bool IsSibcall      = false;
@@ -2324,9 +2325,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                   ArgLocs, *DAG.getContext());
  
    // Allocate shadow area for Win64
-  if (IsWin64) {
+  if (IsWin64)
      CCInfo.AllocateStack(32, 8);
-  }
  
    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
  
@@ -2837,13 +2837,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                                       SelectionDAG &DAG) const {
-  if (!IsTailCallConvention(CalleeCC) &&
-      CalleeCC != CallingConv::C)
+  if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
      return false;
  
    // If -tailcallopt is specified, make fastcc functions tail-callable.
    const MachineFunction &MF = DAG.getMachineFunction();
-  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  const Function *CallerF = MF.getFunction();
  
    // If the function return type is x86_fp80 and the callee return type is not,
    // then the FP_EXTEND of the call result is not a nop. It's not safe to
@@ -2853,6 +2852,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
  
    CallingConv::ID CallerCC = CallerF->getCallingConv();
    bool CCMatch = CallerCC == CalleeCC;
+  bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
+  bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
  
    if (getTargetMachine().Options.GuaranteedTailCallOpt) {
      if (IsTailCallConvention(CalleeCC) && CCMatch)
@@ -2886,7 +2887,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
  
      // Optimizing for varargs on Win64 is unlikely to be safe without
      // additional testing.
-    if (Subtarget->isTargetWin64())
+    if (IsCalleeWin64 || IsCallerWin64)
        return false;
  
      SmallVector<CCValAssign, 16> ArgLocs;
@@ -2961,9 +2962,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                     getTargetMachine(), ArgLocs, *DAG.getContext());
  
      // Allocate shadow area for Win64
-    if (Subtarget->isTargetWin64()) {
+    if (IsCalleeWin64)
        CCInfo.AllocateStack(32, 8);
-    }
  
      CCInfo.AnalyzeCallOperands(Outs, CC_X86);
      if (CCInfo.getNextStackOffset()) {
@@ -8100,7 +8100,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
    LLVMContext *Context = DAG.getContext();
  
    // Build some magic constants.
-  const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+  static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
    Constant *C0 = ConstantDataVector::get(*Context, CV0);
    SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
  
@@ -8837,7 +8837,7 @@ SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
    Opnds.push_back(N->getOperand(1));
  
    for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
-    SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
+    SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
      // BFS traverse all OR'd operands.
      if (I->getOpcode() == ISD::OR) {
        Opnds.push_back(I->getOperand(0));
@@ -9399,8 +9399,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
        SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
  
        // Create masks for only the low parts/high parts of the 64 bit integers.
-      const int MaskHi[] = { 1, 1, 3, 3 };
-      const int MaskLo[] = { 0, 0, 2, 2 };
+      static const int MaskHi[] = { 1, 1, 3, 3 };
+      static const int MaskLo[] = { 0, 0, 2, 2 };
        SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
        SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
        SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
@@ -9427,7 +9427,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
        SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
  
        // Make sure the lower and upper halves are both all-ones.
-      const int Mask[] = { 1, 0, 3, 2 };
+      static const int Mask[] = { 1, 0, 3, 2 };
        SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
        Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
  
@@ -11492,7 +11492,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
             "Should not custom lower when pmuldq is available!");
  
      // Extract the odd parts.
-    const int UnpackMask[] = { 1, -1, 3, -1 };
+    static const int UnpackMask[] = { 1, -1, 3, -1 };
      SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
      SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
  
@@ -11506,7 +11506,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
  
      // Merge the two vectors back together with a shuffle. This expands into 2
      // shuffles.
-    const int ShufMask[] = { 0, 4, 2, 6 };
+    static const int ShufMask[] = { 0, 4, 2, 6 };
      return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
    }
  
@@ -12970,6 +12970,27 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
    return false;
  }
  
+bool
+X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+    return false;
+
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
  bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
    // i16 instructions are longer (0x66 prefix) and potentially slower.
    return !(VT1 == MVT::i32 && VT2 == MVT::i16);
@@ -14436,12 +14457,11 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
      } else {
        // __chkstk(MSVCRT): does not update stack pointer.
        // Clobbers R10, R11 and EFLAGS.
-      // FIXME: RAX(allocated size) might be reused and not killed.
        BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
          .addExternalSymbol("__chkstk")
          .addReg(X86::RAX, RegState::Implicit)
          .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-      // RAX has the offset to subtracted from RSP.
+      // RAX has the offset to be subtracted from RSP.
        BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
          .addReg(X86::RSP)
          .addReg(X86::RAX);
@@ -16301,6 +16321,38 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
    return SDValue();
  }
  
+/// \brief Returns a vector of 0s if the node in input is a vector logical
+/// shift by a constant amount which is known to be bigger than or equal 
+/// to the vector element size in bits.
+static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
+                                      const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+      (!Subtarget->hasInt256() ||
+       (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
+    return SDValue();
+
+  SDValue Amt = N->getOperand(1);
+  SDLoc DL(N);
+  if (isSplatVector(Amt.getNode())) {
+    SDValue SclrAmt = Amt->getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+      APInt ShiftAmt = C->getAPIntValue();
+      unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
+
+      // SSE2/AVX2 logical shifts always return a vector of 0s
+      // if the shift amount is bigger than or equal to 
+      // the element size. The constant shift amount will be
+      // encoded as a 8-bit immediate.
+      if (ShiftAmt.trunc(8).uge(MaxAmount))
+        return getZeroVector(VT, Subtarget, DAG, DL);
+    }
+  }
+
+  return SDValue();
+}
+
  /// PerformShiftCombine - Combine shifts.
  static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
@@ -16310,6 +16362,12 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
      if (V.getNode()) return V;
    }
  
+  if (N->getOpcode() != ISD::SRA) {
+    // Try to fold this logical shift into a zero vector.
+    SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
+    if (V.getNode()) return V;
+  }
+
    return SDValue();
  }