Factor out EH landing pad code into a separate function, and constify

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index bd268eca8f59d01d5a26621fe08c57fe00bdf124..6e6d2f5ad0bb82de5d0c9821d1f5fa91b9eab346 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -794,9 +794,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      }
  
      // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
-    // FIXME: This produces lots of inefficiencies in isel since
-    // we then need notice that most of our operands have been implicitly
-    // converted to v2i64.
      for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
        MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
        EVT VT = SVT;
@@ -1071,30 +1068,45 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
  
  /// getOptimalMemOpType - Returns the target specific optimal type for load
  /// and store operations as a result of memset, memcpy, and memmove
-/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
-/// determining it.
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if SelectionDAG should be responsible for
+/// determining the type.
  EVT
  X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                         unsigned DstAlign, unsigned SrcAlign,
+                                       bool NonScalarIntSafe,
+                                       bool MemcpyStrSrc,
                                         SelectionDAG &DAG) const {
    // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
    // linux.  This is because the stack realignment code can't handle certain
    // cases like PR2962.  This should be removed when PR2962 is fixed.
    const Function *F = DAG.getMachineFunction().getFunction();
-  if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+  if (NonScalarIntSafe &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat)) {
      if (Size >= 16 &&
          (Subtarget->isUnalignedMemAccessFast() ||
-         (DstAlign == 0 || DstAlign >= 16) &&
-         (SrcAlign == 0 || SrcAlign >= 16)) &&
+         ((DstAlign == 0 || DstAlign >= 16) &&
+          (SrcAlign == 0 || SrcAlign >= 16))) &&
          Subtarget->getStackAlignment() >= 16) {
        if (Subtarget->hasSSE2())
          return MVT::v4i32;
        if (Subtarget->hasSSE1())
          return MVT::v4f32;
-    } else if (Size >= 8 &&
+    } else if (!MemcpyStrSrc && Size >= 8 &&
+               !Subtarget->is64Bit() &&
                 Subtarget->getStackAlignment() >= 8 &&
-               Subtarget->hasSSE2())
+               Subtarget->hasSSE2()) {
+      // Do not use f64 to lower memcpy if source is string constant. It's
+      // better to use i32 to avoid the loads.
        return MVT::f64;
+    }
    }
    if (Subtarget->is64Bit() && Size >= 8)
      return MVT::i64;
@@ -1144,8 +1156,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
    if (!Subtarget->is64Bit())
      // This doesn't have DebugLoc associated with it, but is not really the
      // same as a Register.
-    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
-                       getPointerTy());
+    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
    return Table;
  }
  
@@ -1300,7 +1311,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
      // If this is x86-64, and we disabled SSE, we can't return FP values
      if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
          ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
-      llvm_report_error("SSE register return with SSE disabled");
+      report_fatal_error("SSE register return with SSE disabled");
      }
  
      // If this is a call to a function that returns an fp value on the floating
@@ -1430,7 +1441,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                            DebugLoc dl) {
    SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
    return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       /*AlwaysInline=*/true, NULL, 0, NULL, 0);
+                       /*isVolatile*/false, /*AlwaysInline=*/true,
+                       NULL, 0, NULL, 0);
  }
  
  /// IsTailCallConvention - Return true if the calling convention is one that
@@ -1926,8 +1938,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      if (!isTailCall) {
        Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
                                 DAG.getNode(X86ISD::GlobalBaseReg,
-                                           DebugLoc::getUnknownLoc(),
-                                           getPointerTy()),
+                                           DebugLoc(), getPointerTy()),
                                 InFlag);
        InFlag = Chain.getValue(1);
      } else {
@@ -2394,16 +2405,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
  }
  
  FastISel *
-X86TargetLowering::createFastISel(MachineFunction &mf, MachineModuleInfo *mmo,
-                            DwarfWriter *dw,
+X86TargetLowering::createFastISel(MachineFunction &mf,
                              DenseMap<const Value *, unsigned> &vm,
                              DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
                              DenseMap<const AllocaInst *, int> &am
  #ifndef NDEBUG
-                          , SmallSet<Instruction*, 8> &cil
+                          , SmallSet<const Instruction *, 8> &cil
  #endif
                                    ) {
-  return X86::createFastISel(mf, mmo, dw, vm, bm, am
+  return X86::createFastISel(mf, vm, bm, am
  #ifndef NDEBUG
                               , cil
  #endif
@@ -3437,7 +3447,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
  /// FIXME: split into pslldqi, psrldqi, palignr variants.
  static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
                            bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  int NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
  
    isLeft = true;
    unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
@@ -3449,11 +3459,12 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
    }
    bool SeenV1 = false;
    bool SeenV2 = false;
-  for (int i = NumZeros; i < NumElems; ++i) {
-    int Val = isLeft ? (i - NumZeros) : i;
-    int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
-    if (Idx < 0)
+  for (unsigned i = NumZeros; i < NumElems; ++i) {
+    unsigned Val = isLeft ? (i - NumZeros) : i;
+    int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
+    if (Idx_ < 0)
        continue;
+    unsigned Idx = (unsigned) Idx_;
      if (Idx < NumElems)
        SeenV1 = true;
      else {
@@ -5056,7 +5067,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
    if (OpFlag) {
      Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                                     DebugLoc(), getPointerTy()),
                           Result);
    }
  
@@ -5089,7 +5100,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
    if (OpFlag) {
      Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                                     DebugLoc(), getPointerTy()),
                           Result);
    }
  
@@ -5125,8 +5136,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
        !Subtarget->is64Bit()) {
      Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(),
-                                     getPointerTy()),
+                                     DebugLoc(), getPointerTy()),
                           Result);
    }
  
@@ -5248,8 +5258,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
    DebugLoc dl = GA->getDebugLoc();  // ? function entry point might be better
    SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
                                       DAG.getNode(X86ISD::GlobalBaseReg,
-                                                 DebugLoc::getUnknownLoc(),
-                                                 PtrVT), InFlag);
+                                                 DebugLoc(), PtrVT), InFlag);
    InFlag = Chain.getValue(1);
  
    return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
@@ -5271,7 +5280,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
    DebugLoc dl = GA->getDebugLoc();
    // Get the Thread Pointer
    SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
-                             DebugLoc::getUnknownLoc(), PtrVT,
+                             DebugLoc(), PtrVT,
                               DAG.getRegister(is64Bit? X86::FS : X86::GS,
                                               MVT::i32));
  
@@ -6547,6 +6556,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                             SDValue Chain,
                                             SDValue Dst, SDValue Src,
                                             SDValue Size, unsigned Align,
+                                           bool isVolatile,
                                             const Value *DstSV,
                                             uint64_t DstSVOff) {
    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6675,7 +6685,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                        DAG.getConstant(Offset, AddrVT)),
                            Src,
                            DAG.getConstant(BytesLeft, SizeVT),
-                          Align, DstSV, DstSVOff + Offset);
+                          Align, isVolatile, DstSV, DstSVOff + Offset);
    }
  
    // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -6686,7 +6696,7 @@ SDValue
  X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                        SDValue Chain, SDValue Dst, SDValue Src,
                                        SDValue Size, unsigned Align,
-                                      bool AlwaysInline,
+                                      bool isVolatile, bool AlwaysInline,
                                        const Value *DstSV, uint64_t DstSVOff,
                                        const Value *SrcSV, uint64_t SrcSVOff) {
    // This requires the copy size to be a constant, preferrably
@@ -6718,7 +6728,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                              Count, InFlag);
    InFlag = Chain.getValue(1);
    Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
-                                                             X86::EDI,
+                                                              X86::EDI,
                              Dst, InFlag);
    InFlag = Chain.getValue(1);
    Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
@@ -6745,7 +6755,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                      DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                  DAG.getConstant(Offset, SrcVT)),
                                      DAG.getConstant(BytesLeft, SizeVT),
-                                    Align, AlwaysInline,
+                                    Align, isVolatile, AlwaysInline,
                                      DstSV, DstSVOff + Offset,
                                      SrcSV, SrcSVOff + Offset));
    }
@@ -6813,7 +6823,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
    SDValue SrcPtr = Op.getOperand(1);
    SDValue SrcSV = Op.getOperand(2);
  
-  llvm_report_error("VAArgInst is not yet implemented for x86-64!");
+  report_fatal_error("VAArgInst is not yet implemented for x86-64!");
    return SDValue();
  }
  
@@ -6828,8 +6838,8 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
    DebugLoc dl = Op.getDebugLoc();
  
    return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
-                       DAG.getIntPtrConstant(24), 8, false,
-                       DstSV, 0, SrcSV, 0);
+                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+                       false, DstSV, 0, SrcSV, 0);
  }
  
  SDValue
@@ -7230,7 +7240,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
              InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
  
          if (InRegCount > 2) {
-          llvm_report_error("Nest register in use - reduce number of inreg parameters!");
+          report_fatal_error("Nest register in use - reduce number of inreg parameters!");
          }
        }
        break;