Implement inalloca codegen for x86 with the new inalloca design

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 30fde54e50e1648bbf958a789bd0148cdd60abb9..55e96a4dea2125f3a76b87e21c143fe808d719c2 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -33,6 +33,7 @@
  #include "llvm/CodeGen/MachineModuleInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/StackMaps.h"
  #include "llvm/DebugInfo.h"
  #include "llvm/IR/CallingConv.h"
  #include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetFrameLowering.h"
@@ -851,12 +851,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
    SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
    Ops.push_back(Res);
  
+  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
    for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
      unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
      MVT RegisterVT = RegVTs[Value];
      for (unsigned i = 0; i != NumRegs; ++i) {
        assert(Reg < Regs.size() && "Mismatch in # registers expected");
-      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+      unsigned TheReg = Regs[Reg++];
+      Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+
+      // Notice if we clobbered the stack pointer.  Yes, inline asm can do this.
+      if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
+        MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+        MFI->setHasInlineAsmWithSPAdjust(true);
+      }
      }
    }
  }
@@ -884,6 +892,7 @@ void SelectionDAGBuilder::clear() {
    PendingExports.clear();
    CurInst = NULL;
    HasTailCall = false;
+  SDNodeOrder = LowestSDNodeOrder;
  }
  
  /// clearDanglingDebugInfo - Clear the dangling debug information
@@ -1064,8 +1073,10 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
      if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
        return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
  
-    if (isa<ConstantPointerNull>(C))
-      return DAG.getConstant(0, TLI->getPointerTy());
+    if (isa<ConstantPointerNull>(C)) {
+      unsigned AS = V->getType()->getPointerAddressSpace();
+      return DAG.getConstant(0, TLI->getPointerTy(AS));
+    }
  
      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
        return DAG.getConstantFP(*CFP, VT);
@@ -1269,7 +1280,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
  
          for (unsigned i = 0; i < NumParts; ++i) {
            Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
-                                        /*isfixed=*/true, 0, 0));
+                                        VT, /*isfixed=*/true, 0, 0));
            OutVals.push_back(Parts[i]);
          }
        }
@@ -1382,7 +1393,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
                                                    MachineBasicBlock *TBB,
                                                    MachineBasicBlock *FBB,
                                                    MachineBasicBlock *CurBB,
-                                                  MachineBasicBlock *SwitchBB) {
+                                                  MachineBasicBlock *SwitchBB,
+                                                  uint32_t TWeight,
+                                                  uint32_t FWeight) {
    const BasicBlock *BB = CurBB->getBasicBlock();
  
    // If the leaf of the tree is a comparison, merge the condition into
@@ -1407,7 +1420,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
        }
  
        CaseBlock CB(Condition, BOp->getOperand(0),
-                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+                   BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight);
        SwitchCases.push_back(CB);
        return;
      }
@@ -1415,17 +1428,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
  
    // Create a CaseBlock record representing this branch.
    CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, TBB, FBB, CurBB);
+               NULL, TBB, FBB, CurBB, TWeight, FWeight);
    SwitchCases.push_back(CB);
  }
  
+/// Scale down both weights to fit into uint32_t.
+static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+  NewTrue = NewTrue / Scale;
+  NewFalse = NewFalse / Scale;
+}
+
  /// FindMergedConditions - If Cond is an expression like
  void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                 MachineBasicBlock *TBB,
                                                 MachineBasicBlock *FBB,
                                                 MachineBasicBlock *CurBB,
                                                 MachineBasicBlock *SwitchBB,
-                                               unsigned Opc) {
+                                               unsigned Opc, uint32_t TWeight,
+                                               uint32_t FWeight) {
    // If this node is not part of the or/and tree, emit it as a branch.
    const Instruction *BOp = dyn_cast<Instruction>(Cond);
    if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1433,7 +1455,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
        BOp->getParent() != CurBB->getBasicBlock() ||
        !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
        !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
-    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+                                 TWeight, FWeight);
      return;
    }
  
@@ -1445,6 +1468,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
  
    if (Opc == Instruction::Or) {
      // Codegen X | Y as:
+    // BB1:
      //   jmp_if_X TBB
      //   jmp TmpBB
      // TmpBB:
@@ -1452,14 +1476,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
      //   jmp FBB
      //
  
+    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+    // The requirement is that
+    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+    //     = TrueProb for orignal BB.
+    // Assuming the orignal weights are A and B, one choice is to set BB1's
+    // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+    // assumes that
+    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+    // TmpBB, but the math is more complicated.
+
+    uint64_t NewTrueWeight = TWeight;
+    uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
      // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
  
+    NewTrueWeight = TWeight;
+    NewFalseWeight = 2 * (uint64_t)FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
      // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
    } else {
      assert(Opc == Instruction::And && "Unknown merge op!");
      // Codegen X & Y as:
+    // BB1:
      //   jmp_if_X TmpBB
      //   jmp FBB
      // TmpBB:
@@ -1468,11 +1512,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
      //
      //  This requires creation of TmpBB after CurBB.
  
+    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+    // The requirement is that
+    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+    //     = FalseProb for orignal BB.
+    // Assuming the orignal weights are A and B, one choice is to set BB1's
+    // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+    // assumes that
+    //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+
+    uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
+    uint64_t NewFalseWeight = FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
      // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
  
+    NewTrueWeight = 2 * (uint64_t)TWeight;
+    NewFalseWeight = FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
      // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
    }
  }
  
@@ -1559,7 +1620,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
          (BOp->getOpcode() == Instruction::And ||
           BOp->getOpcode() == Instruction::Or)) {
        FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
-                           BOp->getOpcode());
+                           BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
+                           getEdgeWeight(BrMBB, Succ1MBB));
        // If the compares in later blocks need to use values not currently
        // exported from this block, export them now.  This block should always
        // be the first entry.
@@ -1618,8 +1680,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
      } else
        Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
    } else {
-    assert(CB.CC == ISD::SETCC_INVALID &&
-           "Condition is undefined for to-the-range belonging check.");
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
  
      const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
      const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
@@ -1627,9 +1688,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
      SDValue CmpOp = getValue(CB.CmpMHS);
      EVT VT = CmpOp.getValueType();
  
-    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
        Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
-                          ISD::SETULE);
+                          ISD::SETLE);
      } else {
        SDValue SUB = DAG.getNode(ISD::SUB, dl,
                                  VT, CmpOp, DAG.getConstant(Low, VT));
@@ -2145,7 +2206,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
        CC = ISD::SETEQ;
        LHS = SV; RHS = I->High; MHS = NULL;
      } else {
-      CC = ISD::SETCC_INVALID;
+      CC = ISD::SETLE;
        LHS = I->Low; MHS = SV; RHS = I->High;
      }
  
@@ -2179,7 +2240,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
  
  static APInt ComputeRange(const APInt &First, const APInt &Last) {
    uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
+  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
    return (LastExt - FirstExt + 1ULL);
  }
  
@@ -2246,7 +2307,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
      const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
      const APInt &High = cast<ConstantInt>(I->High)->getValue();
  
-    if (Low.ule(TEI) && TEI.ule(High)) {
+    if (Low.sle(TEI) && TEI.sle(High)) {
        DestBBs.push_back(I->BB);
        if (TEI==High)
          ++I;
@@ -2350,7 +2411,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
      volatile double RDensity =
        (double)RSize.roundToDouble() /
                             (Last - RBegin + 1ULL).roundToDouble();
-    double Metric = Range.logBase2()*(LDensity+RDensity);
+    volatile double Metric = Range.logBase2()*(LDensity+RDensity);
      // Should always split in some non-trivial place
      DEBUG(dbgs() <<"=>Step\n"
                   << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
@@ -2420,7 +2481,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
    // Create a CaseBlock record representing a conditional branch to
    // the LHS node if the value being switched on SV is less than C.
    // Otherwise, branch to LHS.
-  CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
  
    if (CR.CaseBB == SwitchBB)
      visitSwitchCase(CB, SwitchBB);
@@ -2450,7 +2511,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
    MachineFunction *CurMF = FuncInfo.MF;
  
    // If target does not have legal shift left, do not emit bit tests at all.
-  if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy()))
+  if (!TLI->isOperationLegal(ISD::SHL, PTy))
      return false;
  
    size_t numCmps = 0;
@@ -2493,7 +2554,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
    // Optimize the case where all the case values fit in a
    // word without having to subtract minValue. In this case,
    // we can optimize away the subtraction.
-  if (maxValue.ult(IntPtrBits)) {
+  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
      cmpRange = maxValue;
    } else {
      lowBound = minValue;
@@ -2568,12 +2629,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
  /// Clusterify - Transform simple list of Cases into list of CaseRange's
  size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
                                         const SwitchInst& SI) {
-
-  /// Use a shorter form of declaration, and also
-  /// show the we want to use CRSBuilder as Clusterifier.
-  typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
-
-  Clusterifier TheClusterifier;
+  size_t numCmps = 0;
  
    BranchProbabilityInfo *BPI = FuncInfo.BPI;
    // Start with "simple" cases
@@ -2582,27 +2638,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
      const BasicBlock *SuccBB = i.getCaseSuccessor();
      MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
  
-    TheClusterifier.add(i.getCaseValueEx(), SMBB,
-        BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
-  }
-
-  TheClusterifier.optimize();
-
-  size_t numCmps = 0;
-  for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
-       e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
-    Clusterifier::Cluster &C = *i;
-    // Update edge weight for the cluster.
-    unsigned W = C.first.Weight;
-
-    // FIXME: Currently work with ConstantInt based numbers.
-    // Changing it to APInt based is a pretty heavy for this commit.
-    Cases.push_back(Case(C.first.getLow().toConstantInt(),
-                         C.first.getHigh().toConstantInt(), C.second, W));
+    uint32_t ExtraWeight =
+      BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
+
+    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
+                         SMBB, ExtraWeight));
+  }
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size() >= 2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
+      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        I->ExtraWeight += J->ExtraWeight;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
  
-    if (C.first.getLow() != C.first.getHigh())
-    // A range counts double, since it requires two compares.
-    ++numCmps;
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
    }
  
    return numCmps;
@@ -2927,10 +2996,28 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
    if (DestVT != N.getValueType())
      setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(),
                               DestVT, N)); // convert types.
+  else if(ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+    setValue(&I, DAG.getConstant(C->getAPIntValue(), C->getValueType(0),
+                                 /*isTarget=*/false, /*isOpaque*/true));
    else
      setValue(&I, N);            // noop cast.
  }
  
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const Value *SV = I.getOperand(0);
+  SDValue N = getValue(SV);
+  EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+
+  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+  unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+    N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+  setValue(&I, N);
+}
+
  void SelectionDAGBuilder::visitInsertElement(const User &I) {
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    SDValue InVec = getValue(I.getOperand(0));
@@ -3223,10 +3310,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
  }
  
  void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
-  SDValue N = getValue(I.getOperand(0));
+  Value *Op0 = I.getOperand(0);
    // Note that the pointer operand may be a vector of pointers. Take the scalar
    // element which holds a pointer.
-  Type *Ty = I.getOperand(0)->getType()->getScalarType();
+  Type *Ty = Op0->getType()->getScalarType();
+  unsigned AS = Ty->getPointerAddressSpace();
+  SDValue N = getValue(Op0);
  
    for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
         OI != E; ++OI) {
@@ -3251,14 +3340,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
          uint64_t Offs =
              TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
          SDValue OffsVal;
-        EVT PTy = TLI->getPointerTy();
+        EVT PTy = TLI->getPointerTy(AS);
          unsigned PtrBits = PTy.getSizeInBits();
          if (PtrBits < 64)
-          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
-                                TLI->getPointerTy(),
+          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
                                  DAG.getConstant(Offs, MVT::i64));
          else
-          OffsVal = DAG.getIntPtrConstant(Offs);
+          OffsVal = DAG.getConstant(Offs, PTy);
  
          N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
                          OffsVal);
@@ -3266,7 +3354,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
        }
  
        // N = N + Idx * ElementSize;
-      APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
+      APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
                                  TD->getTypeAllocSize(Ty));
        SDValue IdxN = getValue(Idx);
  
@@ -3347,7 +3435,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
  
    // Inform the Frame Information that we have just allocated a variable-sized
    // object.
-  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, &I);
  }
  
  void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3375,7 +3463,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
  
    SDValue Root;
    bool ConstantMemory = false;
-  if (I.isVolatile() || NumValues > MaxParallelChains)
+  if (isVolatile || NumValues > MaxParallelChains)
      // Serialize volatile loads with other side effects.
      Root = getRoot();
    else if (AA->pointsToConstantMemory(
@@ -3388,6 +3476,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
      Root = DAG.getRoot();
    }
  
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (isVolatile)
+    Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+
    SmallVector<SDValue, 4> Values(NumValues);
    SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
                                            NumValues));
@@ -3612,6 +3704,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
    if (I.getAlignment() < VT.getSizeInBits() / 8)
      report_fatal_error("Cannot generate unaligned atomic load");
  
+  InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
    SDValue L =
      DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
                    getValue(I.getPointerOperand()),
@@ -4265,7 +4358,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
  static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
                           SelectionDAG &DAG, const TargetLowering &TLI) {
    bool IsExp10 = false;
-  if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
+  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
        APFloat Ten(10.0f);
@@ -4777,14 +4870,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
                                          TLI->getPointerTy());
      SDValue Offset = DAG.getNode(ISD::ADD, sdl,
-                                 TLI->getPointerTy(),
+                                 CfaArg.getValueType(),
                                   DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
-                                             TLI->getPointerTy()),
+                                             CfaArg.getValueType()),
                                   CfaArg);
      SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl,
                               TLI->getPointerTy(),
                               DAG.getConstant(0, TLI->getPointerTy()));
-    setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(),
+    setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
                               FA, Offset));
      return 0;
    }
@@ -5302,6 +5395,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::donothing:
      // ignore
      return 0;
+  case Intrinsic::experimental_stackmap: {
+    visitStackmap(I);
+    return 0;
+  }
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64: {
+    visitPatchpoint(I);
+    return 0;
+  }
    }
  }
  
@@ -5332,6 +5434,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
    int DemoteStackIdx = -100;
  
    if (!CanLowerReturn) {
+    assert(!CS.hasInAllocaArgument() &&
+           "sret demotion is incompatible with inalloca");
      uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
                        FTy->getReturnType());
      unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(
@@ -5366,15 +5470,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      SDValue ArgNode = getValue(V);
      Entry.Node = ArgNode; Entry.Ty = V->getType();
  
-    unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt     = CS.paramHasAttr(attrInd, Attribute::SExt);
-    Entry.isZExt     = CS.paramHasAttr(attrInd, Attribute::ZExt);
-    Entry.isInReg    = CS.paramHasAttr(attrInd, Attribute::InReg);
-    Entry.isSRet     = CS.paramHasAttr(attrInd, Attribute::StructRet);
-    Entry.isNest     = CS.paramHasAttr(attrInd, Attribute::Nest);
-    Entry.isByVal    = CS.paramHasAttr(attrInd, Attribute::ByVal);
-    Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
-    Entry.Alignment  = CS.getParamAlignment(attrInd);
+    // Skip the first return-type Attribute to get to params.
+    Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
      Args.push_back(Entry);
    }
  
@@ -5456,8 +5553,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
    }
  
    if (!Result.second.getNode()) {
-    // As a special case, a null chain means that a tail call has been emitted and
-    // the DAG root is already updated.
+    // As a special case, a null chain means that a tail call has been emitted
+    // and the DAG root is already updated.
      HasTailCall = true;
  
      // Since there's no actual continuation from this block, nothing can be
@@ -6714,6 +6811,285 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
                            DAG.getSrcValue(I.getArgOperand(1))));
  }
  
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// \return A tuple of <return-value, token-chain>
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
+                                       unsigned NumArgs, SDValue Callee,
+                                       bool useVoidTy) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumArgs);
+
+  // Populate the argument list.
+  // Attributes for args start at offset 1, after the return attribute.
+  ImmutableCallSite CS(&CI);
+  for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+       ArgI != ArgE; ++ArgI) {
+    const Value *V = CI.getOperand(ArgI);
+
+    assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+    TargetLowering::ArgListEntry Entry;
+    Entry.Node = getValue(V);
+    Entry.Ty = V->getType();
+    Entry.setAttributes(&CS, AttrI);
+    Args.push_back(Entry);
+  }
+
+  Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
+  TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false,
+    /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs,
+    CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false,
+    /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc());
+
+  const TargetLowering *TLI = TM.getTargetLowering();
+  return TLI->LowerCallTo(CLI);
+}
+
+/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx,
+                                SmallVectorImpl<SDValue> &Ops,
+                                SelectionDAGBuilder &Builder) {
+  for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) {
+    SDValue OpVal = Builder.getValue(CI.getArgOperand(i));
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+      Ops.push_back(
+        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+      Ops.push_back(
+        Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+    } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
+      const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+      Ops.push_back(
+        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+    } else
+      Ops.push_back(OpVal);
+  }
+}
+
+/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+  // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+  //                                  [live variables...])
+
+  assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+  SDValue Callee = getValue(CI.getCalledValue());
+
+  // Lower into a call sequence with no args and no return value.
+  std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee);
+  // Set the root to the target-lowered call chain.
+  SDValue Chain = Result.second;
+  DAG.setRoot(Chain);
+
+  /// Get a call instruction from the call sequence chain.
+  /// Tail calls are not allowed.
+  SDNode *CallEnd = Chain.getNode();
+  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+         "Expected a callseq node.");
+  SDNode *Call = CallEnd->getOperand(0).getNode();
+  bool hasGlue = Call->getGluedNode();
+
+  // Replace the target specific call node with the stackmap intrinsic.
+  SmallVector<SDValue, 8> Ops;
+
+  // Add the <id> and <numShadowBytes> constants.
+  for (unsigned i = 0; i < 2; ++i) {
+    SDValue tmp = getValue(CI.getOperand(i));
+    Ops.push_back(DAG.getTargetConstant(
+        cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
+  }
+  // Push live variables for the stack map.
+  addStackMapLiveVars(CI, 2, Ops, *this);
+
+  // Push the chain (this is originally the first operand of the call, but
+  // becomes now the last or second to last operand).
+  Ops.push_back(*(Call->op_begin()));
+
+    // Push the glue flag (last operand).
+  if (hasGlue)
+    Ops.push_back(*(Call->op_end()-1));
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  // Replace the target specific call node with a STACKMAP node.
+  MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(),
+                                         NodeTys, Ops);
+
+  // StackMap generates no value, so nothing goes in the NodeMap.
+
+  // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+  // call sequence.
+  DAG.ReplaceAllUsesWith(Call, MN);
+
+  DAG.DeleteNode(Call);
+
+  // Inform the Frame Information that we have a stackmap in this function.
+  FuncInfo.MF->getFrameInfo()->setHasStackMap();
+}
+
+/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
+  // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+  //                                                 i32 <numBytes>,
+  //                                                 i8* <target>,
+  //                                                 i32 <numArgs>,
+  //                                                 [Args...],
+  //                                                 [live variables...])
+
+  CallingConv::ID CC = CI.getCallingConv();
+  bool isAnyRegCC = CC == CallingConv::AnyReg;
+  bool hasDef = !CI.getType()->isVoidTy();
+  SDValue Callee = getValue(CI.getOperand(2)); // <target>
+
+  // Get the real number of arguments participating in the call <numArgs>
+  SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos));
+  unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
+
+  // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+  // Intrinsics include all meta-operands up to but not including CC.
+  unsigned NumMetaOpers = PatchPointOpers::CCPos;
+  assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs &&
+         "Not enough arguments provided to the patchpoint intrinsic");
+
+  // For AnyRegCC the arguments are lowered later on manually.
+  unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
+  std::pair<SDValue, SDValue> Result =
+    LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC);
+
+  // Set the root to the target-lowered call chain.
+  SDValue Chain = Result.second;
+  DAG.setRoot(Chain);
+
+  SDNode *CallEnd = Chain.getNode();
+  if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+    CallEnd = CallEnd->getOperand(0).getNode();
+
+  /// Get a call instruction from the call sequence chain.
+  /// Tail calls are not allowed.
+  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+         "Expected a callseq node.");
+  SDNode *Call = CallEnd->getOperand(0).getNode();
+  bool hasGlue = Call->getGluedNode();
+
+  // Replace the target specific call node with the patchable intrinsic.
+  SmallVector<SDValue, 8> Ops;
+
+  // Add the <id> and <numBytes> constants.
+  SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+  Ops.push_back(DAG.getTargetConstant(
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+  SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+  Ops.push_back(DAG.getTargetConstant(
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+
+  // Assume that the Callee is a constant address.
+  // FIXME: handle function symbols in the future.
+  Ops.push_back(
+    DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
+                          /*isTarget=*/true));
+
+  // Adjust <numArgs> to account for any arguments that have been passed on the
+  // stack instead.
+  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+  unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3);
+  NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs;
+  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+
+  // Add the calling convention
+  Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
+
+  // Add the arguments we omitted previously. The register allocator should
+  // place these in any free register.
+  if (isAnyRegCC)
+    for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
+      Ops.push_back(getValue(CI.getArgOperand(i)));
+
+  // Push the arguments from the call instruction up to the register mask.
+  SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
+  for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
+    Ops.push_back(*i);
+
+  // Push live variables for the stack map.
+  addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this);
+
+  // Push the register mask info.
+  if (hasGlue)
+    Ops.push_back(*(Call->op_end()-2));
+  else
+    Ops.push_back(*(Call->op_end()-1));
+
+  // Push the chain (this is originally the first operand of the call, but
+  // becomes now the last or second to last operand).
+  Ops.push_back(*(Call->op_begin()));
+
+  // Push the glue flag (last operand).
+  if (hasGlue)
+    Ops.push_back(*(Call->op_end()-1));
+
+  SDVTList NodeTys;
+  if (isAnyRegCC && hasDef) {
+    // Create the return types based on the intrinsic definition
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    SmallVector<EVT, 3> ValueVTs;
+    ComputeValueVTs(TLI, CI.getType(), ValueVTs);
+    assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+    // There is always a chain and a glue type at the end
+    ValueVTs.push_back(MVT::Other);
+    ValueVTs.push_back(MVT::Glue);
+    NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+  } else
+    NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  // Replace the target specific call node with a PATCHPOINT node.
+  MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
+                                         getCurSDLoc(), NodeTys, Ops);
+
+  // Update the NodeMap.
+  if (hasDef) {
+    if (isAnyRegCC)
+      setValue(&CI, SDValue(MN, 0));
+    else
+      setValue(&CI, Result.first);
+  }
+
+  // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+  // call sequence. Furthermore the location of the chain and glue can change
+  // when the AnyReg calling convention is used and the intrinsic returns a
+  // value.
+  if (isAnyRegCC && hasDef) {
+    SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+    SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+  } else
+    DAG.ReplaceAllUsesWith(Call, MN);
+  DAG.DeleteNode(Call);
+
+  // Inform the Frame Information that we have a patchpoint in this function.
+  FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+}
+
  /// TargetLowering::LowerCallTo - This is the default LowerCallTo
  /// implementation, which just calls LowerCall.
  /// FIXME: When all targets are
@@ -6731,6 +7107,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
      for (unsigned i = 0; i != NumRegs; ++i) {
        ISD::InputArg MyFlags;
        MyFlags.VT = RegisterVT;
+      MyFlags.ArgVT = VT;
        MyFlags.Used = CLI.IsReturnValueUsed;
        if (CLI.RetSExt)
          MyFlags.Flags.setSExt();
@@ -6767,8 +7144,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
          Flags.setInReg();
        if (Args[i].isSRet)
          Flags.setSRet();
-      if (Args[i].isByVal) {
+      if (Args[i].isByVal)
+        Flags.setByVal();
+      if (Args[i].isInAlloca) {
+        Flags.setInAlloca();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // inalloca.  This way we can know how many bytes we should've allocated
+        // and how many bytes a callee cleanup function will pop.  If we port
+        // inalloca to more targets, we'll have to add custom inalloca handling
+        // in the various CC lowering callbacks.
          Flags.setByVal();
+      }
+      if (Args[i].isByVal || Args[i].isInAlloca) {
          PointerType *Ty = cast<PointerType>(Args[i].Ty);
          Type *ElementTy = Ty->getElementType();
          Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
@@ -6820,7 +7207,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
  
        for (unsigned j = 0; j != NumParts; ++j) {
          // if it isn't first piece, alignment must be 1
-        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
                                 i < CLI.NumFixedArgs,
                                 i, j*Parts[j].getValueType().getStoreSize());
          if (NumParts > 1 && j == 0)
@@ -6959,7 +7346,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      ISD::ArgFlagsTy Flags;
      Flags.setSRet();
      MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
-    ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
+    ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
      Ins.push_back(RetArg);
    }
  
@@ -6970,6 +7357,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      SmallVector<EVT, 4> ValueVTs;
      ComputeValueVTs(*TLI, I->getType(), ValueVTs);
      bool isArgValueUsed = !I->use_empty();
+    unsigned PartBase = 0;
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
@@ -6986,8 +7374,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
          Flags.setInReg();
        if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
          Flags.setSRet();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+        Flags.setByVal();
+      if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+        Flags.setInAlloca();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // inalloca.  This way we can know how many bytes we should've allocated
+        // and how many bytes a callee cleanup function will pop.  If we port
+        // inalloca to more targets, we'll have to add custom inalloca handling
+        // in the various CC lowering callbacks.
          Flags.setByVal();
+      }
+      if (Flags.isByVal() || Flags.isInAlloca()) {
          PointerType *Ty = cast<PointerType>(I->getType());
          Type *ElementTy = Ty->getElementType();
          Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
@@ -7007,8 +7405,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
        MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
        unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
        for (unsigned i = 0; i != NumRegs; ++i) {
-        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
-                              Idx-1, i*RegisterVT.getStoreSize());
+        ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
+                              Idx-1, PartBase+i*RegisterVT.getStoreSize());
          if (NumRegs > 1 && i == 0)
            MyFlags.Flags.setSplit();
          // if it isn't first piece, alignment must be 1
@@ -7016,6 +7414,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
            MyFlags.Flags.setOrigAlign(1);
          Ins.push_back(MyFlags);
        }
+      PartBase += VT.getStoreSize();
      }
    }