#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/CallingConv.h"
#include "llvm/DebugInfo.h"
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
ISD::SETNE);
}
- addSuccessorWithWeight(SwitchBB, B.TargetBB);
- addSuccessorWithWeight(SwitchBB, NextMBB);
+ // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
+ addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
+ // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
+ addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
// If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
ISD::SETEQ);
// Update successor info.
- addSuccessorWithWeight(SwitchBB, Small.BB);
- addSuccessorWithWeight(SwitchBB, Default);
+ // Both Small and Big will jump to Small.BB, so we sum up the weights.
+ addSuccessorWithWeight(SwitchBB, Small.BB,
+ Small.ExtraWeight + Big.ExtraWeight);
+ addSuccessorWithWeight(SwitchBB, Default,
+ // The default destination is the first successor in IR.
+ BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
// Insert the true branch.
SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
}
// Order cases by weight so the most likely case will be checked first.
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ uint32_t UnhandledWeights = 0;
if (BPI) {
for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
- uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
- I->BB->getBasicBlock());
+ uint32_t IWeight = I->ExtraWeight;
+ UnhandledWeights += IWeight;
for (CaseItr J = CR.Range.first; J < I; ++J) {
- uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
- J->BB->getBasicBlock());
+ uint32_t JWeight = J->ExtraWeight;
if (IWeight > JWeight)
std::swap(*I, *J);
}
LHS = I->Low; MHS = SV; RHS = I->High;
}
- uint32_t ExtraWeight = I->ExtraWeight;
+ // The false weight should be sum of all un-handled cases.
+ UnhandledWeights -= I->ExtraWeight;
CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
/* me */ CurBlock,
- /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
+ /* trueweight */ I->ExtraWeight,
+ /* falseweight */ UnhandledWeights);
// If emitting the first comparison, just call visitSwitchCase to emit the
// code into the current block. Otherwise, push the CaseBlock onto the
}
}
+ // Calculate weight for each unique destination in CR.
+ DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
+ if (FuncInfo.BPI)
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(I->BB);
+ if (Itr != DestWeights.end())
+ Itr->second += I->ExtraWeight;
+ else
+ DestWeights[I->BB] = I->ExtraWeight;
+ }
+
// Update successor info. Add one edge to each unique successor.
BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
E = DestBBs.end(); I != E; ++I) {
if (!SuccsHandled[(*I)->getNumber()]) {
SuccsHandled[(*I)->getNumber()] = true;
- addSuccessorWithWeight(JumpTableBB, *I);
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(*I);
+ addSuccessorWithWeight(JumpTableBB, *I,
+ Itr != DestWeights.end() ? Itr->second : 0);
}
}
if (i == count) {
assert((count < 3) && "Too much destinations to test!");
- CasesBits.push_back(CaseBits(0, Dest, 0));
+ CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
count++;
}
uint64_t lo = (lowValue - lowBound).getZExtValue();
uint64_t hi = (highValue - lowBound).getZExtValue();
+ CasesBits[i].ExtraWeight += I->ExtraWeight;
for (uint64_t j = lo; j <= hi; j++) {
CasesBits[i].Mask |= 1ULL << j;
CurMF->insert(BBI, CaseBB);
BTC.push_back(BitTestCase(CasesBits[i].Mask,
CaseBB,
- CasesBits[i].BB));
+ CasesBits[i].BB, CasesBits[i].ExtraWeight));
// Put SV in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(SV);
Clusterifier TheClusterifier;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
i != e; ++i) {
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
- TheClusterifier.add(i.getCaseValueEx(), SMBB);
+ TheClusterifier.add(i.getCaseValueEx(), SMBB,
+ BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
}
TheClusterifier.optimize();
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
size_t numCmps = 0;
for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
Clusterifier::Cluster &C = *i;
- unsigned W = 0;
- if (BPI) {
- W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock());
- if (!W)
- W = 16;
- W *= C.first.Weight;
- BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W);
- }
+ // Update edge weight for the cluster.
+ unsigned W = C.first.Weight;
// FIXME: Currently work with ConstantInt based numbers.
// Changing it to APInt based is a pretty heavy for this commit.
Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
- DAG.getConstant(Idx, MVT::i32));
+ DAG.getIntPtrConstant(Idx));
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::x86_avx_vextractf128_pd_256:
+ case Intrinsic::x86_avx_vextractf128_ps_256:
+ case Intrinsic::x86_avx_vextractf128_si_256:
+ case Intrinsic::x86_avx2_vextracti128: {
+ DebugLoc dl = getCurDebugLoc();
+ EVT DestVT = TLI.getValueType(I.getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
+ DestVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ DAG.getIntPtrConstant(Idx));
setValue(&I, Res);
return 0;
}
rw==1)); /* write */
return 0;
}
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None) {
+ if (Intrinsic == Intrinsic::lifetime_start)
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ }
+ SDValue Ops[2];
+ AllocaInst *LifetimeObject =dyn_cast_or_null<AllocaInst>(
+ GetUnderlyingObject(I.getArgOperand(1), TD));
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ return 0;
+ int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+ Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
+ DAG.setRoot(Res);
+ return 0;
+ }
case Intrinsic::invariant_start:
- case Intrinsic::lifetime_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
return 0;
case Intrinsic::invariant_end:
- case Intrinsic::lifetime_end:
// Discard region information.
return 0;
case Intrinsic::donothing:
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
- // Remember the HasSideEffect and AlignStack bits as operand 3.
+ // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3.
unsigned ExtraInfo = 0;
if (IA->hasSideEffects())
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ // Set the asm dialect.
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
TLI.getPointerTy()));