#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
llvm_unreachable("Unknown mismatch!");
}
+static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
+ const Twine &ErrMsg) {
+ const Instruction *I = dyn_cast_or_null<Instruction>(V);
+ if (!V)
+ return Ctx.emitError(ErrMsg);
+
+ const char *AsmError = ", possible invalid constraint for vector type";
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ return Ctx.emitError(I, ErrMsg + AsmError);
+
+ return Ctx.emitError(I, ErrMsg);
+}
+
/// getCopyFromPartsVector - Create a value that contains the specified legal
/// parts combined into the value they represent. If the parts combine to a
/// type larger then ValueVT then AssertOp can be used to specify whether the
// Handle cases such as i8 -> <1 x i1>
if (ValueVT.getVectorNumElements() != 1) {
- LLVMContext &Ctx = *DAG.getContext();
- Twine ErrMsg("non-trivial scalar-to-vector conversion");
- if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (isa<InlineAsm>(CI->getCalledValue()))
- ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
- Ctx.emitError(I, ErrMsg);
- } else {
- Ctx.emitError(ErrMsg);
- }
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "non-trivial scalar-to-vector conversion");
return DAG.getUNDEF(ValueVT);
}
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
- if (PartEVT != ValueVT) {
- LLVMContext &Ctx = *DAG.getContext();
- Twine ErrMsg("scalar-to-vector conversion failed");
- if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (isa<InlineAsm>(CI->getCalledValue()))
- ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
- Ctx.emitError(I, ErrMsg);
- } else {
- Ctx.emitError(ErrMsg);
- }
- }
+ if (PartEVT != ValueVT)
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "scalar-to-vector conversion failed");
Parts[0] = Val;
return;
}
}
- /// areValueTypesLegal - Return true if types of all the values are legal.
- bool areValueTypesLegal(const TargetLowering &TLI) {
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- MVT RegisterVT = RegVTs[Value];
- if (!TLI.isTypeLegal(RegisterVT))
- return false;
- }
- return true;
- }
-
/// append - Add the specified values to this one.
void append(const RegsForValue &RHS) {
ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
Ops.push_back(Res);
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
MVT RegisterVT = RegVTs[Value];
for (unsigned i = 0; i != NumRegs; ++i) {
assert(Reg < Regs.size() && "Mismatch in # registers expected");
- Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ unsigned TheReg = Regs[Reg++];
+ Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+
+ if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
+ // If we clobbered the stack pointer, MFI should know about it.
+ assert(DAG.getMachineFunction().getFrameInfo()->
+ hasInlineAsmWithSPAdjust());
+ }
}
}
}
AA = &aa;
GFI = gfi;
LibInfo = li;
- TD = DAG.getTarget().getDataLayout();
+ DL = DAG.getTarget().getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
PendingExports.clear();
CurInst = NULL;
HasTailCall = false;
+ SDNodeOrder = LowestSDNodeOrder;
}
/// clearDanglingDebugInfo - Clear the dangling debug information
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
- if (isa<ConstantPointerNull>(C))
- return DAG.getConstant(0, TLI->getPointerTy());
+ if (isa<ConstantPointerNull>(C)) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return DAG.getConstant(0, TLI->getPointerTy(AS));
+ }
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, VT);
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
- /*isfixed=*/true, 0, 0));
+ VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
}
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB) {
+ MachineBasicBlock *SwitchBB,
+ uint32_t TWeight,
+ uint32_t FWeight) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
}
CaseBlock CB(Condition, BOp->getOperand(0),
- BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight);
SwitchCases.push_back(CB);
return;
}
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
- NULL, TBB, FBB, CurBB);
+ NULL, TBB, FBB, CurBB, TWeight, FWeight);
SwitchCases.push_back(CB);
}
+/// Scale down both weights to fit into uint32_t.
+static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+ uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+ uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+ NewTrue = NewTrue / Scale;
+ NewFalse = NewFalse / Scale;
+}
+
/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- unsigned Opc) {
+ unsigned Opc, uint32_t TWeight,
+ uint32_t FWeight) {
// If this node is not part of the or/and tree, emit it as a branch.
const Instruction *BOp = dyn_cast<Instruction>(Cond);
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
- EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+ TWeight, FWeight);
return;
}
if (Opc == Instruction::Or) {
// Codegen X | Y as:
+ // BB1:
// jmp_if_X TBB
// jmp TmpBB
// TmpBB:
// jmp FBB
//
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+ // assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ uint64_t NewTrueWeight = TWeight;
+ uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
+ NewTrueWeight = TWeight;
+ NewFalseWeight = 2 * (uint64_t)FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
+ // BB1:
// jmp_if_X TmpBB
// jmp FBB
// TmpBB:
//
// This requires creation of TmpBB after CurBB.
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+ // assumes that
+ // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+
+ uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
+ uint64_t NewFalseWeight = FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
+ NewTrueWeight = 2 * (uint64_t)TWeight;
+ NewFalseWeight = FWeight;
+ ScaleWeights(NewTrueWeight, NewFalseWeight);
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ NewTrueWeight, NewFalseWeight);
}
}
(BOp->getOpcode() == Instruction::And ||
BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- BOp->getOpcode());
+ BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
+ getEdgeWeight(BrMBB, Succ1MBB));
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
volatile double RDensity =
(double)RSize.roundToDouble() /
(Last - RBegin + 1ULL).roundToDouble();
- double Metric = Range.logBase2()*(LDensity+RDensity);
+ volatile double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
DEBUG(dbgs() <<"=>Step\n"
<< "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
if (DestVT != N.getValueType())
setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(),
DestVT, N)); // convert types.
+ // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
+ // might fold any kind of constant expression to an integer constant and that
+ // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // constant integer as an opaque constant.
+ else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
+ setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false,
+ /*isOpaque*/true));
else
setValue(&I, N); // noop cast.
}
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ SDValue N = getValue(SV);
+ EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+
+ unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+ unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+ if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+ setValue(&I, N);
+}
+
void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
DAG.getConstant(Offset, N.getValueType()));
}
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
uint64_t Offs =
- TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
EVT PTy = TLI->getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
// N = N + Idx * ElementSize;
APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
- TD->getTypeAllocSize(Ty));
+ DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
setValue(&I, DSA);
DAG.setRoot(DSA.getValue(1));
- // Inform the Frame Information that we have just allocated a variable-sized
- // object.
- FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+ assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile() || NumValues > MaxParallelChains)
+ if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (AA->pointsToConstantMemory(
Root = DAG.getRoot();
}
+ const TargetLowering *TLI = TM.getTargetLowering();
+ if (isVolatile)
+ Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
NumValues));
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
- AtomicOrdering Order = I.getOrdering();
+ AtomicOrdering SuccessOrder = I.getSuccessOrdering();
+ AtomicOrdering FailureOrder = I.getFailureOrdering();
SynchronizationScope Scope = I.getSynchScope();
SDValue InChain = getRoot();
const TargetLowering *TLI = TM.getTargetLowering();
if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
DAG, *TLI);
SDValue L =
getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()),
MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+ TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
Scope);
SDValue OutChain = L.getValue(1);
if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
DAG, *TLI);
setValue(&I, L);
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
+ InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
getValue(I.getPointerOperand()),
static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const TargetLowering &TLI) {
bool IsExp10 = false;
- if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
+ if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
APFloat Ten(10.0f);
return 0;
SmallVector<Value *, 4> Allocas;
- GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
(void)getControlRoot();
return 0;
}
+ case Intrinsic::clear_cache:
+ return TLI->getClearCacheBuiltinName();
case Intrinsic::donothing:
// ignore
return 0;
+ case Intrinsic::experimental_stackmap: {
+ visitStackmap(I);
+ return 0;
+ }
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64: {
+ visitPatchpoint(I);
+ return 0;
+ }
}
}
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
+ assert(!CS.hasInAllocaArgument() &&
+ "sret demotion is incompatible with inalloca");
uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
FTy->getReturnType());
unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- unsigned attrInd = i - CS.arg_begin() + 1;
- Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
- Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
- Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
- Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
- Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
- Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
- Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
- Entry.Alignment = CS.getParamAlignment(attrInd);
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);
}
}
if (!Result.second.getNode()) {
- // As a special case, a null chain means that a tail call has been emitted and
- // the DAG root is already updated.
+ // As a special case, a null chain means that a tail call has been emitted
+ // and the DAG root is already updated.
HasTailCall = true;
// Since there's no actual continuation from this block, nothing can be
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
/// value is equal or not-equal to zero.
static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ for (const User *U : V->users()) {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (IC->isEquality())
if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
if (C->isNullValue())
if (const Constant *LoadCst =
ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
- Builder.TD))
+ Builder.DL))
return Builder.getValue(LoadCst);
}
// bloat the code.
const TargetLowering *TLI = TM.getTargetLowering();
if (ActuallyDoIt && CSize->getZExtValue() > 4) {
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
- if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
+ if (!TLI->isTypeLegal(LoadVT) ||
+ !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) ||
+ !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS))
ActuallyDoIt = false;
}
/// MVT::Other.
EVT getCallOperandValEVT(LLVMContext &Context,
const TargetLowering &TLI,
- const DataLayout *TD) const {
+ const DataLayout *DL) const {
if (CallOperandVal == 0) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ unsigned BitSize = DL->getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types).
MVT RegVT = *PhysReg.second->vt_begin();
- if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD).
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL).
getSimpleVT();
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering *TLI = TM.getTargetLowering();
- const DataLayout &TD = *TLI->getDataLayout();
+ const DataLayout &DL = *TLI->getDataLayout();
SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)),
DAG.getSrcValue(I.getOperand(0)),
- TD.getABITypeAlignment(I.getType()));
+ DL.getABITypeAlignment(I.getType()));
setValue(&I, V);
DAG.setRoot(V.getValue(1));
}
DAG.getSrcValue(I.getArgOperand(1))));
}
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// \return A tuple of <return-value, token-chain>
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ bool useVoidTy) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ ImmutableCallSite CS(&CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ const Value *V = CI.getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(V);
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
+ TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false,
+ /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs,
+ CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false,
+ /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc());
+
+ const TargetLowering *TLI = TM.getTargetLowering();
+ return TLI->LowerCallTo(CLI);
+}
+
+/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(CI.getArgOperand(i));
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
+ const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+ } else
+ Ops.push_back(OpVal);
+ }
+}
+
+/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+ // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+
+ assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+ SDValue Chain, InFlag, Callee, NullPtr;
+ SmallVector<SDValue, 32> Ops;
+
+ SDLoc DL = getCurSDLoc();
+ Callee = getValue(CI.getCalledValue());
+ NullPtr = DAG.getIntPtrConstant(0, true);
+
+ // The stackmap intrinsic only records the live variables (the arguemnts
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // chain, flag = CALLSEQ_START(chain, 0)
+ // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
+ // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
+ //
+ Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL);
+ InFlag = Chain.getValue(1);
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+ SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(CI, 2, Ops, *this);
+
+ // We are not pushing any register mask info here on the operands list,
+ // because the stackmap doesn't clobber anything.
+
+ // Push the chain and the glue flag.
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
+
+ // Create the STACKMAP node.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
+ Chain = SDValue(SM, 0);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
+
+ // Stackmaps don't generate values, so nothing goes into the NodeMap.
+
+ // Set the root to the target-lowered call chain.
+ DAG.setRoot(Chain);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo()->setHasStackMap();
+}
+
+/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+
+ CallingConv::ID CC = CI.getCallingConv();
+ bool isAnyRegCC = CC == CallingConv::AnyReg;
+ bool hasDef = !CI.getType()->isVoidTy();
+ SDValue Callee = getValue(CI.getOperand(2)); // <target>
+
+ // Get the real number of arguments participating in the call <numArgs>
+ SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // Intrinsics include all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
+ std::pair<SDValue, SDValue> Result =
+ LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC);
+
+ // Set the root to the target-lowered call chain.
+ SDValue Chain = Result.second;
+ DAG.setRoot(Chain);
+
+ SDNode *CallEnd = Chain.getNode();
+ if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ CallEnd = CallEnd->getOperand(0).getNode();
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool hasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the patchable intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+ SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+
+ // Assume that the Callee is a constant address.
+ // FIXME: handle function symbols in the future.
+ Ops.push_back(
+ DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
+ /*isTarget=*/true));
+
+ // Adjust <numArgs> to account for any arguments that have been passed on the
+ // stack instead.
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3);
+ NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs;
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+
+ // Add the calling convention
+ Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (isAnyRegCC)
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
+ Ops.push_back(getValue(CI.getArgOperand(i)));
+
+ // Push the arguments from the call instruction up to the register mask.
+ SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
+ Ops.push_back(*i);
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this);
+
+ // Push the register mask info.
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-2));
+ else
+ Ops.push_back(*(Call->op_end()-1));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys;
+ if (isAnyRegCC && hasDef) {
+ // Create the return types based on the intrinsic definition
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 3> ValueVTs;
+ ComputeValueVTs(TLI, CI.getType(), ValueVTs);
+ assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+ // There is always a chain and a glue type at the end
+ ValueVTs.push_back(MVT::Other);
+ ValueVTs.push_back(MVT::Glue);
+ NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+ } else
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a PATCHPOINT node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
+ getCurSDLoc(), NodeTys, Ops);
+
+ // Update the NodeMap.
+ if (hasDef) {
+ if (isAnyRegCC)
+ setValue(&CI, SDValue(MN, 0));
+ else
+ setValue(&CI, Result.first);
+ }
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence. Furthermore the location of the chain and glue can change
+ // when the AnyReg calling convention is used and the intrinsic returns a
+ // value.
+ if (isAnyRegCC && hasDef) {
+ SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+ SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ DAG.ReplaceAllUsesWith(Call, MN);
+ DAG.DeleteNode(Call);
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+}
+
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
Flags.setInReg();
if (Args[i].isSRet)
Flags.setSRet();
- if (Args[i].isByVal) {
+ if (Args[i].isByVal)
Flags.setByVal();
+ if (Args[i].isInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].isByVal || Args[i].isInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
i < CLI.NumFixedArgs,
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
return A->use_empty();
const BasicBlock *Entry = A->getParent()->begin();
- for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
+ for (const User *U : A->users())
if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
- }
+
return true;
}
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
const TargetLowering *TLI = getTargetLowering();
- const DataLayout *TD = TLI->getDataLayout();
+ const DataLayout *DL = TLI->getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
if (!FuncInfo->CanLowerReturn) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
Ins.push_back(RetArg);
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, I->getType(), ValueVTs);
bool isArgValueUsed = !I->use_empty();
+ unsigned PartBase = 0;
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment =
- TD->getABITypeAlignment(ArgTy);
+ DL->getABITypeAlignment(ArgTy);
if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
Flags.setZExt();
Flags.setInReg();
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
- if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ Flags.setByVal();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
Flags.setByVal();
+ }
+ if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+ Flags.setByValSize(DL->getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
- Idx-1, i*RegisterVT.getStoreSize());
+ ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
+ Idx-1, PartBase+i*RegisterVT.getStoreSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
MyFlags.Flags.setOrigAlign(1);
Ins.push_back(MyFlags);
}
+ PartBase += VT.getStoreSize();
}
}