Add a new codegen pass that normalizes dwarf exception handling

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuild.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp

index fd15603d9901e4283af4ffe7ed9f0b75d68bd3cd..889d7f5dd93444fe43af80560f4c2354cff291c2 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -45,7 +45,6 @@
  #include "llvm/Target/TargetInstrInfo.h"
  #include "llvm/Target/TargetIntrinsicInfo.h"
  #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/Support/Compiler.h"
  #include "llvm/Support/CommandLine.h"
@@ -129,12 +128,15 @@ static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
    // Given an array type, recursively traverse the elements.
    if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
      const Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypePaddedSize(EltTy);
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
      for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
        ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
                        StartingOffset + i * EltSize);
      return;
    }
+  // Interpret void as zero return values.
+  if (Ty == Type::VoidTy)
+    return;
    // Base case: we can get an MVT for this LLVM IR type.
    ValueVTs.push_back(TLI.getValueType(Ty));
    if (Offsets)
@@ -292,7 +294,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
        if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
          const Type *Ty = AI->getAllocatedType();
-        uint64_t TySize = TLI.getTargetData()->getTypePaddedSize(Ty);
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
          unsigned Align =
            std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
                     AI->getAlignment());
@@ -329,15 +331,12 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
            switch (F->getIntrinsicID()) {
            default: break;
            case Intrinsic::dbg_stoppoint: {
-            DwarfWriter *DW = DAG.getDwarfWriter();
              DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
  
-            if (DW && DW->ValidDebugInfo(SPI->getContext())) {
+            if (DIDescriptor::ValidDebugInfo(SPI->getContext(),
+                                             CodeGenOpt::Default)) {
                DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
-              std::string Dir, FN;
-              unsigned SrcFile = DW->getOrCreateSourceID(CU.getDirectory(Dir),
-                                                         CU.getFilename(FN));
-              unsigned idx = MF->getOrCreateDebugLocID(SrcFile,
+              unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(),
                                                         SPI->getLine(),
                                                         SPI->getColumn());
                DL = DebugLoc::get(idx);
@@ -346,20 +345,15 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
              break;
            }
            case Intrinsic::dbg_func_start: {
-            DwarfWriter *DW = DAG.getDwarfWriter();
-            if (DW) {
-              DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
-              Value *SP = FSI->getSubprogram();
-
-              if (DW->ValidDebugInfo(SP)) {
-                DISubprogram Subprogram(cast<GlobalVariable>(SP));
-                DICompileUnit CU(Subprogram.getCompileUnit());
-                std::string Dir, FN;
-                unsigned SrcFile = DW->getOrCreateSourceID(CU.getDirectory(Dir),
-                                                           CU.getFilename(FN));
-                unsigned Line = Subprogram.getLineNumber();
-                DL = DebugLoc::get(MF->getOrCreateDebugLocID(SrcFile, Line, 0));
-              }
+            DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
+            Value *SP = FSI->getSubprogram();
+
+            if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) {
+              DISubprogram Subprogram(cast<GlobalVariable>(SP));
+              DICompileUnit CU(Subprogram.getCompileUnit());
+              unsigned Line = Subprogram.getLineNumber();
+              DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(),
+                                                           Line, 0));
              }
  
              break;
@@ -432,7 +426,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
  
    if (NumParts > 1) {
      // Assemble the value from multiple parts.
-    if (!ValueVT.isVector()) {
+    if (!ValueVT.isVector() && ValueVT.isInteger()) {
        unsigned PartBits = PartVT.getSizeInBits();
        unsigned ValueBits = ValueVT.getSizeInBits();
  
@@ -444,9 +438,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
          ValueVT : MVT::getIntegerVT(RoundBits);
        SDValue Lo, Hi;
  
-      MVT HalfVT = ValueVT.isInteger() ?
-        MVT::getIntegerVT(RoundBits/2) :
-        MVT::getFloatingPointVT(RoundBits/2);
+      MVT HalfVT = MVT::getIntegerVT(RoundBits/2);
  
        if (RoundParts > 2) {
          Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
@@ -479,7 +471,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
          Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
          Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
        }
-    } else {
+    } else if (ValueVT.isVector()) {
        // Handle a multi-element vector.
        MVT IntermediateVT, RegisterVT;
        unsigned NumIntermediates;
@@ -516,6 +508,22 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
        Val = DAG.getNode(IntermediateVT.isVector() ?
                          ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
                          ValueVT, &Ops[0], NumIntermediates);
+    } else if (PartVT.isFloatingPoint()) {
+      // FP split into multiple FP parts (for ppcf128)
+      assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) &&
+             "Unexpected split");
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+    } else {
+      // FP split into integer parts (soft fp)
+      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+             !PartVT.isVector() && "Unexpected split");
+      MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits());
+      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
      }
    }
  
@@ -867,8 +875,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
        return N = DAG.getConstantFP(*CFP, VT);
  
-    if (isa<UndefValue>(C) && !isa<VectorType>(V->getType()) &&
-        !V->getType()->isAggregateType())
+    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
        return N = DAG.getUNDEF(VT);
  
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@@ -922,14 +929,11 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
        for (unsigned i = 0; i != NumElements; ++i)
          Ops.push_back(getValue(CP->getOperand(i)));
      } else {
-      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
-             "Unknown vector constant!");
+      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
        MVT EltVT = TLI.getValueType(VecTy->getElementType());
  
        SDValue Op;
-      if (isa<UndefValue>(C))
-        Op = DAG.getUNDEF(EltVT);
-      else if (EltVT.isFloatingPoint())
+      if (EltVT.isFloatingPoint())
          Op = DAG.getConstantFP(0, EltVT);
        else
          Op = DAG.getConstant(0, EltVT);
@@ -1017,6 +1021,17 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) {
                            &NewValues[0], NewValues.size()));
  }
  
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {
+  if (!V->use_empty()) {
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI != FuncInfo.ValueMap.end())
+      CopyValueToVirtualRegister(V, VMI->second);
+  }
+}
+
  /// ExportFromCurrentBlock - If this condition isn't known to be exported from
  /// the current basic block, add it to ValueMap now so that we'll get a
  /// CopyTo/FromReg.
@@ -1569,11 +1584,7 @@ void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
  
    // If the value of the invoke is used outside of its defining block, make it
    // available as a virtual register.
-  if (!I.use_empty()) {
-    DenseMap<const Value*, unsigned>::iterator VMI = FuncInfo.ValueMap.find(&I);
-    if (VMI != FuncInfo.ValueMap.end())
-      CopyValueToVirtualRegister(&I, VMI->second);
-  }
+  CopyToExportRegsIfNeeded(&I);
  
    // Update successor info
    CurMBB->addSuccessor(Return);
@@ -1926,6 +1937,10 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
    // inserting any additional MBBs necessary to represent the switch.
    MachineFunction *CurMF = CurMBB->getParent();
  
+  // If target does not have legal shift left, do not emit bit tests at all.
+  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+    return false;
+
    size_t numCmps = 0;
    for (CaseItr I = CR.Range.first, E = CR.Range.second;
         I!=E; ++I) {
@@ -2190,8 +2205,24 @@ void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
  void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
-  if (!isa<VectorType>(I.getType())) {
-    if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
+  if (!isa<VectorType>(I.getType()) &&
+      Op2.getValueType() != TLI.getShiftAmountTy()) {
+    // If the operand is smaller than the shift count type, promote it.
+    if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+                        TLI.getShiftAmountTy(), Op2);
+    // If the operand is larger than the shift count type but the shift
+    // count type has enough bits to represent any shift value, truncate
+    // it now. This is a common case and it exposes the truncate to
+    // optimization early.
+    else if (TLI.getShiftAmountTy().getSizeInBits() >=
+             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                        TLI.getShiftAmountTy(), Op2);
+    // Otherwise we'll need to temporarily settle for some other
+    // convenient type; type legalization will make adjustments as
+    // needed.
+    else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
        Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
                          TLI.getPointerTy(), Op2);
      else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))
@@ -2409,37 +2440,42 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
  
  // Utility for visitShuffleVector - Returns true if the mask is mask starting
  // from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SDValue Mask, unsigned SIndx) {
-  unsigned MaskNumElts = Mask.getNumOperands();
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
-      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
-      if (Idx != i + SIndx)
-        return false;
-    }
-  }
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+  unsigned MaskNumElts = Mask.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+      return false;
    return true;
  }
  
  void SelectionDAGLowering::visitShuffleVector(User &I) {
+  SmallVector<int, 8> Mask;
    SDValue Src1 = getValue(I.getOperand(0));
    SDValue Src2 = getValue(I.getOperand(1));
-  SDValue Mask = getValue(I.getOperand(2));
  
+  // Convert the ConstantVector mask operand into an array of ints, with -1
+  // representing undef values.
+  SmallVector<Constant*, 8> MaskElts;
+  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+  unsigned MaskNumElts = MaskElts.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (isa<UndefValue>(MaskElts[i]))
+      Mask.push_back(-1);
+    else
+      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+  }
+  
    MVT VT = TLI.getValueType(I.getType());
    MVT SrcVT = Src1.getValueType();
-  int MaskNumElts = Mask.getNumOperands();
-  int SrcNumElts = SrcVT.getVectorNumElements();
+  unsigned SrcNumElts = SrcVT.getVectorNumElements();
  
    if (SrcNumElts == MaskNumElts) {
-    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
-                             VT, Src1, Src2, Mask));
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &Mask[0]));
      return;
    }
  
    // Normalize the shuffle vector since mask and vector length don't match.
-  MVT MaskEltVT = Mask.getValueType().getVectorElementType();
-
    if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
      // Mask is longer than the source vectors and is a multiple of the source
      // vectors.  We can use concatenate vector to make the mask and vectors
@@ -2453,82 +2489,57 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
  
      // Pad both vectors with undefs to make them the same length as the mask.
      unsigned NumConcat = MaskNumElts / SrcNumElts;
+    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
      SDValue UndefVal = DAG.getUNDEF(SrcVT);
  
-    SDValue* MOps1 = new SDValue[NumConcat];
-    SDValue* MOps2 = new SDValue[NumConcat];
+    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
      MOps1[0] = Src1;
      MOps2[0] = Src2;
-    for (unsigned i = 1; i != NumConcat; ++i) {
-      MOps1[i] = UndefVal;
-      MOps2[i] = UndefVal;
-    }
-    Src1 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                       VT, MOps1, NumConcat);
-    Src2 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                       VT, MOps2, NumConcat);
-
-    delete [] MOps1;
-    delete [] MOps2;
+    
+    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 
+                                                  getCurDebugLoc(), VT, 
+                                                  &MOps1[0], NumConcat);
+    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT, 
+                                                  &MOps2[0], NumConcat);
  
      // Readjust mask for new input vector length.
-    SmallVector<SDValue, 8> MappedOps;
-    for (int i = 0; i != MaskNumElts; ++i) {
-      if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
-        MappedOps.push_back(Mask.getOperand(i));
-      } else {
-        int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
-        if (Idx < SrcNumElts)
-          MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
-        else
-          MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts,
-                                              MaskEltVT));
-      }
+    SmallVector<int, 8> MappedOps;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
+        MappedOps.push_back(Idx);
+      else
+        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
      }
-    Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
-                       Mask.getValueType(),
-                       &MappedOps[0], MappedOps.size());
-
-    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
-                             VT, Src1, Src2, Mask));
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 
+                                      &MappedOps[0]));
      return;
    }
  
    if (SrcNumElts > MaskNumElts) {
-    // Resulting vector is shorter than the incoming vector.
-    if (SrcNumElts == MaskNumElts && SequentialMask(Mask,0)) {
-      // Shuffle extracts 1st vector.
-      setValue(&I, Src1);
-      return;
-    }
-
-    if (SrcNumElts == MaskNumElts && SequentialMask(Mask,MaskNumElts)) {
-      // Shuffle extracts 2nd vector.
-      setValue(&I, Src2);
-      return;
-    }
-
      // Analyze the access pattern of the vector to see if we can extract
      // two subvectors and do the shuffle. The analysis is done by calculating
      // the range of elements the mask access on both vectors.
      int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
      int MaxRange[2] = {-1, -1};
  
-    for (int i = 0; i != MaskNumElts; ++i) {
-      SDValue Arg = Mask.getOperand(i);
-      if (Arg.getOpcode() != ISD::UNDEF) {
-        assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
-        int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
-        int Input = 0;
-        if (Idx >= SrcNumElts) {
-          Input = 1;
-          Idx -= SrcNumElts;
-        }
-        if (Idx > MaxRange[Input])
-          MaxRange[Input] = Idx;
-        if (Idx < MinRange[Input])
-          MinRange[Input] = Idx;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      int Input = 0;
+      if (Idx < 0)
+        continue;
+      
+      if (Idx >= (int)SrcNumElts) {
+        Input = 1;
+        Idx -= SrcNumElts;
        }
+      if (Idx > MaxRange[Input])
+        MaxRange[Input] = Idx;
+      if (Idx < MinRange[Input])
+        MinRange[Input] = Idx;
      }
  
      // Check if the access is smaller than the vector size and can we find
@@ -2536,18 +2547,18 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
      int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not Extract.
      int StartIdx[2];  // StartIdx to extract from
      for (int Input=0; Input < 2; ++Input) {
-      if (MinRange[Input] == SrcNumElts+1 && MaxRange[Input] == -1) {
+      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
          RangeUse[Input] = 0; // Unused
          StartIdx[Input] = 0;
-      } else if (MaxRange[Input] - MinRange[Input] < MaskNumElts) {
+      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
          // Fits within range but we should see if we can find a good
          // start index that is a multiple of the mask length.
-        if (MaxRange[Input] < MaskNumElts) {
+        if (MaxRange[Input] < (int)MaskNumElts) {
            RangeUse[Input] = 1; // Extract from beginning of the vector
            StartIdx[Input] = 0;
          } else {
            StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
-          if (MaxRange[Input] - StartIdx[Input] < MaskNumElts &&
+          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
                StartIdx[Input] + MaskNumElts < SrcNumElts)
              RangeUse[Input] = 1; // Extract from a multiple of the mask length.
          }
@@ -2570,26 +2581,18 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
          }
        }
        // Calculate new mask.
-      SmallVector<SDValue, 8> MappedOps;
-      for (int i = 0; i != MaskNumElts; ++i) {
-        SDValue Arg = Mask.getOperand(i);
-        if (Arg.getOpcode() == ISD::UNDEF) {
-          MappedOps.push_back(Arg);
-        } else {
-          int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
-          if (Idx < SrcNumElts)
-            MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT));
-          else {
-            Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
-            MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
-          }
-        }
+      SmallVector<int, 8> MappedOps;
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          MappedOps.push_back(Idx);
+        else if (Idx < (int)SrcNumElts)
+          MappedOps.push_back(Idx - StartIdx[0]);
+        else
+          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
        }
-      Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
-                         Mask.getValueType(),
-                         &MappedOps[0], MappedOps.size());
-      setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
-                               VT, Src1, Src2, Mask));
+      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                        &MappedOps[0]));
        return;
      }
    }
@@ -2600,14 +2603,12 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
    MVT EltVT = VT.getVectorElementType();
    MVT PtrVT = TLI.getPointerTy();
    SmallVector<SDValue,8> Ops;
-  for (int i = 0; i != MaskNumElts; ++i) {
-    SDValue Arg = Mask.getOperand(i);
-    if (Arg.getOpcode() == ISD::UNDEF) {
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Mask[i] < 0) {
        Ops.push_back(DAG.getUNDEF(EltVT));
      } else {
-      assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
-      int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
-      if (Idx < SrcNumElts)
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
          Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
                                    EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
        else
@@ -2713,7 +2714,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
        if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
          if (CI->getZExtValue() == 0) continue;
          uint64_t Offs =
-            TD->getTypePaddedSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
          SDValue OffsVal;
          unsigned PtrBits = TLI.getPointerTy().getSizeInBits();
          if (PtrBits < 64) {
@@ -2728,7 +2729,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
        }
  
        // N = N + Idx * ElementSize;
-      uint64_t ElementSize = TD->getTypePaddedSize(Ty);
+      uint64_t ElementSize = TD->getTypeAllocSize(Ty);
        SDValue IdxN = getValue(Idx);
  
        // If the index is smaller or larger than intptr_t, truncate or extend
@@ -2769,7 +2770,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
      return;   // getValue will auto-populate this.
  
    const Type *Ty = I.getAllocatedType();
-  uint64_t TySize = TLI.getTargetData()->getTypePaddedSize(Ty);
+  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
    unsigned Align =
      std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
               I.getAlignment());
@@ -2809,10 +2810,9 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
                            DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
  
    SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
-  const MVT *VTs = DAG.getNodeValueTypes(AllocSize.getValueType(),
-                                                    MVT::Other);
+  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
    SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
-                            VTs, 2, Ops, 3);
+                            VTs, Ops, 3);
    setValue(&I, DSA);
    DAG.setRoot(DSA.getValue(1));
  
@@ -2949,7 +2949,7 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
      Ops.push_back(Op);
    }
  
-  std::vector<MVT> VTs;
+  std::vector<MVT> VTArray;
    if (I.getType() != Type::VoidTy) {
      MVT VT = TLI.getValueType(I.getType());
      if (VT.isVector()) {
@@ -2961,36 +2961,32 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
      }
  
      assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
-    VTs.push_back(VT);
+    VTArray.push_back(VT);
    }
    if (HasChain)
-    VTs.push_back(MVT::Other);
+    VTArray.push_back(MVT::Other);
  
-  const MVT *VTList = DAG.getNodeValueTypes(VTs);
+  SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size());
  
    // Create the node.
    SDValue Result;
    if (IsTgtIntrinsic) {
      // This is target intrinsic that touches memory
      Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
-                                     VTList, VTs.size(),
-                                     &Ops[0], Ops.size(),
+                                     VTs, &Ops[0], Ops.size(),
                                       Info.memVT, Info.ptrVal, Info.offset,
                                       Info.align, Info.vol,
                                       Info.readMem, Info.writeMem);
    }
    else if (!HasChain)
      Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
-                         VTList, VTs.size(),
-                         &Ops[0], Ops.size());
+                         VTs, &Ops[0], Ops.size());
    else if (I.getType() != Type::VoidTy)
      Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
-                         VTList, VTs.size(),
-                         &Ops[0], Ops.size());
+                         VTs, &Ops[0], Ops.size());
    else
      Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
-                         VTList, VTs.size(),
-                         &Ops[0], Ops.size());
+                         VTs, &Ops[0], Ops.size());
  
    if (HasChain) {
      SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
@@ -3137,11 +3133,8 @@ SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
    SDValue Op1 = getValue(I.getOperand(1));
    SDValue Op2 = getValue(I.getOperand(2));
  
-  MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 };
-  SDValue Ops[] = { Op1, Op2 };
-
-  SDValue Result = DAG.getNode(Op, getCurDebugLoc(),
-                               DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2);
+  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+  SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
  
    setValue(&I, Result);
    return 0;
@@ -3911,34 +3904,32 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
      return 0;
    }
    case Intrinsic::dbg_stoppoint: {
-    DwarfWriter *DW = DAG.getDwarfWriter();
      DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
-    if (DW && DW->ValidDebugInfo(SPI.getContext())) {
+    if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) {
        MachineFunction &MF = DAG.getMachineFunction();
-      if (Fast)
-        DAG.setRoot(DAG.getDbgStopPoint(getRoot(),
+      DICompileUnit CU(cast<GlobalVariable>(SPI.getContext()));
+      DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(),
+                                              SPI.getLine(), SPI.getColumn()));
+      setCurDebugLoc(Loc);
+      
+      if (OptLevel == CodeGenOpt::None)
+        DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
                                          SPI.getLine(),
                                          SPI.getColumn(),
                                          SPI.getContext()));
-      DICompileUnit CU(cast<GlobalVariable>(SPI.getContext()));
-      std::string Dir, FN;
-      unsigned SrcFile = DW->getOrCreateSourceID(CU.getDirectory(Dir),
-                                                 CU.getFilename(FN));
-      unsigned idx = MF.getOrCreateDebugLocID(SrcFile,
-                                              SPI.getLine(), SPI.getColumn());
-      setCurDebugLoc(DebugLoc::get(idx));
      }
      return 0;
    }
    case Intrinsic::dbg_region_start: {
      DwarfWriter *DW = DAG.getDwarfWriter();
      DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
-    if (DW && DW->ValidDebugInfo(RSI.getContext())) {
+
+    if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) &&
+        DW && DW->ShouldEmitDwarfDebug()) {
        unsigned LabelID =
          DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));
-      if (Fast)
-        DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                                 getRoot(), LabelID));
+      DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+                               getRoot(), LabelID));
      }
  
      return 0;
@@ -3946,54 +3937,116 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
    case Intrinsic::dbg_region_end: {
      DwarfWriter *DW = DAG.getDwarfWriter();
      DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
-    if (DW && DW->ValidDebugInfo(REI.getContext())) {
-      unsigned LabelID =
-        DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
-      if (Fast)
+
+    if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) &&
+        DW && DW->ShouldEmitDwarfDebug()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+
+      if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) {
+        unsigned LabelID =
+          DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
          DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
                                   getRoot(), LabelID));
+      } else {
+        // This is end of inlined function. Debugging information for inlined
+        // function is not handled yet (only supported by FastISel).
+        if (OptLevel == CodeGenOpt::None) {
+          unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
+          if (ID != 0)
+            // Returned ID is 0 if this is unbalanced "end of inlined
+            // scope". This could happen if optimizer eats dbg intrinsics or
+            // "beginning of inlined scope" is not recoginized due to missing
+            // location info. In such cases, do ignore this region.end.
+            DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), 
+                                     getRoot(), ID));
+        }
+      }
      }
  
      return 0;
    }
    case Intrinsic::dbg_func_start: {
      DwarfWriter *DW = DAG.getDwarfWriter();
-    if (!DW) return 0;
      DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
      Value *SP = FSI.getSubprogram();
-    if (SP && DW->ValidDebugInfo(SP)) {
+    if (!DIDescriptor::ValidDebugInfo(SP, OptLevel))
+      return 0;
+
+    MachineFunction &MF = DAG.getMachineFunction();
+    if (OptLevel == CodeGenOpt::None) {
+      // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
+      // (most?) gdb expects.
+      DebugLoc PrevLoc = CurDebugLoc;
+      DISubprogram Subprogram(cast<GlobalVariable>(SP));
+      DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+      if (!Subprogram.describes(MF.getFunction())) {
+        // This is a beginning of an inlined function.
+
+        // If llvm.dbg.func.start is seen in a new block before any
+        // llvm.dbg.stoppoint intrinsic then the location info is unknown.
+        // FIXME : Why DebugLoc is reset at the beginning of each block ?
+        if (PrevLoc.isUnknown())
+          return 0;
+
+        // Record the source line.
+        unsigned Line = Subprogram.getLineNumber();
+        setCurDebugLoc(DebugLoc::get(
+                     MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+
+        if (DW && DW->ShouldEmitDwarfDebug()) {
+          DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+          unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
+                                          DICompileUnit(PrevLocTpl.CompileUnit),
+                                          PrevLocTpl.Line,
+                                          PrevLocTpl.Col);
+          DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+                                   getRoot(), LabelID));
+        }
+      } else {
+        // Record the source line.
+        unsigned Line = Subprogram.getLineNumber();
+        MF.setDefaultDebugLoc(DebugLoc::get(
+                     MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+        if (DW && DW->ShouldEmitDwarfDebug()) {
+          // llvm.dbg.func_start also defines beginning of function scope.
+          DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
+        }
+      }
+    } else {
+      DISubprogram Subprogram(cast<GlobalVariable>(SP));
+
+      std::string SPName;
+      Subprogram.getLinkageName(SPName);
+      if (!SPName.empty()
+          && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) {
+        // This is beginning of inlined function. Debugging information for
+        // inlined function is not handled yet (only supported by FastISel).
+        return 0;
+      }
+
        // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is
        // what (most?) gdb expects.
-      MachineFunction &MF = DAG.getMachineFunction();
-      DISubprogram Subprogram(cast<GlobalVariable>(SP));
        DICompileUnit CompileUnit = Subprogram.getCompileUnit();
-      std::string Dir, FN;
-      unsigned SrcFile = DW->getOrCreateSourceID(CompileUnit.getDirectory(Dir),
-                                                 CompileUnit.getFilename(FN));
  
        // Record the source line but does not create a label for the normal
        // function start. It will be emitted at asm emission time. However,
        // create a label if this is a beginning of inlined function.
        unsigned Line = Subprogram.getLineNumber();
-
-      if (Fast) {
-        unsigned LabelID = DW->RecordSourceLine(Line, 0, SrcFile);
-        if (DW->getRecordSourceLineCount() != 1)
-          DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                                   getRoot(), LabelID));
-      }
-
-      setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(SrcFile, Line, 0)));
+      setCurDebugLoc(DebugLoc::get(
+                     MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+      // FIXME -  Start new region because llvm.dbg.func_start also defines
+      // beginning of function scope.
      }
  
      return 0;
    }
    case Intrinsic::dbg_declare: {
-    if (Fast) {
-      DwarfWriter *DW = DAG.getDwarfWriter();
+    if (OptLevel == CodeGenOpt::None) {
        DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
        Value *Variable = DI.getVariable();
-      if (DW && DW->ValidDebugInfo(Variable))
+      if (DIDescriptor::ValidDebugInfo(Variable, OptLevel))
          DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
                                  getValue(DI.getAddress()), getValue(Variable)));
      } else {
@@ -4002,12 +4055,8 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
      return 0;
    }
    case Intrinsic::eh_exception: {
-    if (!CurMBB->isLandingPad()) {
-      // FIXME: Mark exception register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionAddressRegister();
-      if (Reg) CurMBB->addLiveIn(Reg);
-    }
      // Insert the EXCEPTIONADDR instruction.
+    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
      SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
      SDValue Ops[1];
      Ops[0] = DAG.getRoot();
@@ -4195,8 +4244,8 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
    case Intrinsic::readcyclecounter: {
      SDValue Op = getRoot();
      SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,
-                                DAG.getNodeValueTypes(MVT::i64, MVT::Other), 2,
-                                &Op, 1);
+                              DAG.getVTList(MVT::i64, MVT::Other),
+                              &Op, 1);
      setValue(&I, Tmp);
      DAG.setRoot(Tmp.getValue(1));
      return 0;
@@ -4240,7 +4289,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
    case Intrinsic::stacksave: {
      SDValue Op = getRoot();
      SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,
-              DAG.getNodeValueTypes(TLI.getPointerTy(), MVT::Other), 2, &Op, 1);
+              DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
      setValue(&I, Tmp);
      DAG.setRoot(Tmp.getValue(1));
      return 0;
@@ -4288,9 +4337,8 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
      Ops[5] = DAG.getSrcValue(F);
  
      SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,
-                                DAG.getNodeValueTypes(TLI.getPointerTy(),
-                                                      MVT::Other), 2,
-                                Ops, 6);
+                              DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+                              Ops, 6);
  
      setValue(&I, Tmp);
      DAG.setRoot(Tmp.getValue(1));
@@ -5026,6 +5074,10 @@ hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
        if (CType == TargetLowering::C_Memory)
          return true;
      }
+    
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
    }
  
    return false;
@@ -5039,9 +5091,6 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
    /// ConstraintOperands - Information about all of the constraints.
    std::vector<SDISelAsmOperandInfo> ConstraintOperands;
  
-  SDValue Chain = getRoot();
-  SDValue Flag;
-
    std::set<unsigned> OutputRegs, InputRegs;
  
    // Do a prepass over the constraints, canonicalizing them, and building up the
@@ -5050,6 +5099,15 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
      ConstraintInfos = IA->ParseConstraints();
  
    bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
+  
+  SDValue Chain, Flag;
+  
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
  
    unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
    unsigned ResNo = 0;   // ResNo - The result number of the next output.
@@ -5118,8 +5176,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
               Input.ConstraintVT.isInteger()) ||
              (OpInfo.ConstraintVT.getSizeInBits() !=
               Input.ConstraintVT.getSizeInBits())) {
-          cerr << "Unsupported asm: input constraint with a matching output "
-               << "constraint of incompatible type!\n";
+          cerr << "llvm: error: Unsupported asm: input constraint with a "
+               << "matching output constraint of incompatible type!\n";
            exit(1);
          }
          Input.ConstraintVT = OpInfo.ConstraintVT;
@@ -5151,7 +5209,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
          // Otherwise, create a stack slot and emit a store to it before the
          // asm.
          const Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getTargetData()->getTypePaddedSize(Ty);
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
          unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
          MachineFunction &MF = DAG.getMachineFunction();
          int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
@@ -5223,7 +5281,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
        // Copy the output from the appropriate register.  Find a register that
        // we can use.
        if (OpInfo.AssignedRegs.Regs.empty()) {
-        cerr << "Couldn't allocate output reg for constraint '"
+        cerr << "llvm: error: Couldn't allocate output reg for constraint '"
               << OpInfo.ConstraintCode << "'!\n";
          exit(1);
        }
@@ -5277,6 +5335,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
          if ((OpFlag & 7) == 2 /*REGDEF*/
              || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
            // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+          assert(!OpInfo.isIndirect &&
+                 "Don't know how to handle tied indirect register inputs yet!");
            RegsForValue MatchedRegs;
            MatchedRegs.TLI = &TLI;
            MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
@@ -5317,7 +5377,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
          TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                           hasMemory, Ops, DAG);
          if (Ops.empty()) {
-          cerr << "Invalid operand for inline asm constraint '"
+          cerr << "llvm: error: Invalid operand for inline asm constraint '"
                 << OpInfo.ConstraintCode << "'!\n";
            exit(1);
          }
@@ -5349,7 +5409,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
  
        // Copy the input into the appropriate registers.
        if (OpInfo.AssignedRegs.Regs.empty()) {
-        cerr << "Couldn't allocate output reg for constraint '"
+        cerr << "llvm: error: Couldn't allocate output reg for constraint '"
               << OpInfo.ConstraintCode << "'!\n";
          exit(1);
        }
@@ -5377,7 +5437,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
    if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
  
    Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
-                      DAG.getNodeValueTypes(MVT::Other, MVT::Flag), 2,
+                      DAG.getVTList(MVT::Other, MVT::Flag),
                        &AsmNodeOperands[0], AsmNodeOperands.size());
    Flag = Chain.getValue(1);
  
@@ -5412,6 +5472,9 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
      }
  
      setValue(CS.getInstruction(), Val);
+    // Don't need to use this as a chain in this case.
+    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+      return;
    }
  
    std::vector<std::pair<SDValue, Value*> > StoresToEmit;
@@ -5424,6 +5487,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
      SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
                                               Chain, &Flag);
      StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+
    }
  
    // Emit the non-flagged stores from the physregs.
@@ -5448,7 +5512,7 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) {
    // i32-ness of the optimizer: we do not want to promote to i64 and then
    // multiply on 64-bit targets.
    // FIXME: Malloc inst should go away: PR715.
-  uint64_t ElementSize = TD->getTypePaddedSize(I.getType()->getElementType());
+  uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType());
    if (ElementSize != 1)
      Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
                        Src, DAG.getConstant(ElementSize, Src.getValueType()));
@@ -5562,7 +5626,7 @@ void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
          const PointerType *Ty = cast<PointerType>(I->getType());
          const Type *ElementTy = Ty->getElementType();
          unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypePaddedSize(ElementTy);
+        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
          // For ByVal, alignment should be passed from FE.  BE will guess if
          // this info is not there but there are cases it cannot get right.
          if (F.getParamAlignment(j))
@@ -5695,7 +5759,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
          const PointerType *Ty = cast<PointerType>(Args[i].Ty);
          const Type *ElementTy = Ty->getElementType();
          unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypePaddedSize(ElementTy);
+        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
          // For ByVal, alignment should come from FE.  BE will guess if this
          // info is not there but there are cases it cannot get right.
          if (Args[i].Alignment)
@@ -5840,10 +5904,7 @@ LowerArguments(BasicBlock *LLVMBB) {
                                                  SDL->getCurDebugLoc()));
        // If this argument is live outside of the entry block, insert a copy from
        // whereever we got it to the vreg that other BB's will reference it as.
-      DenseMap<const Value*, unsigned>::iterator VMI=FuncInfo->ValueMap.find(AI);
-      if (VMI != FuncInfo->ValueMap.end()) {
-        SDL->CopyValueToVirtualRegister(AI, VMI->second);
-      }
+      SDL->CopyToExportRegsIfNeeded(AI);
      }
      a += NumValues;
    }