//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "codegenprepare"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "codegenprepare"
+
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
return MadeChange;
}
+/// isExtractBitsCandidateUse - Check if the candidates could
+/// be combined with shift instruction, which includes:
+/// 1. Truncate instruction
+/// 2. And instruction and the imm is a mask of the low bits:
+/// imm & (imm+1) == 0
+static bool isExtractBitsCandidateUse(Instruction *User) {
+ if (!isa<TruncInst>(User)) {
+ if (User->getOpcode() != Instruction::And ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return false;
+
+ const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
+
+ if ((Cimm & (Cimm + 1)).getBoolValue())
+ return false;
+ }
+ return true;
+}
+
+/// SinkShiftAndTruncate - sink both shift and truncate instruction
+/// to the use of truncate's BB.
+static bool
+SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
+ DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
+ const TargetLowering &TLI) {
+ BasicBlock *UserBB = User->getParent();
+ DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
+ TruncInst *TruncI = dyn_cast<TruncInst>(User);
+ bool MadeChange = false;
+
+ for (Value::user_iterator TruncUI = TruncI->user_begin(),
+ TruncE = TruncI->user_end();
+ TruncUI != TruncE;) {
+
+ Use &TruncTheUse = TruncUI.getUse();
+ Instruction *TruncUser = cast<Instruction>(*TruncUI);
+ // Preincrement use iterator so we don't invalidate it.
+
+ ++TruncUI;
+
+ int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
+ if (!ISDOpcode)
+ continue;
+
+ // If the use is actually a legal node, there will not be an implicit
+ // truncate.
+ if (TLI.isOperationLegalOrCustom(ISDOpcode,
+ EVT::getEVT(TruncUser->getType())))
+ continue;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(TruncUser))
+ continue;
+
+ BasicBlock *TruncUserBB = TruncUser->getParent();
+
+ if (UserBB == TruncUserBB)
+ continue;
+
+ BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
+ CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
+
+ if (!InsertedShift && !InsertedTrunc) {
+ BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ // Sink the shift
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ else
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+
+ // Sink the trunc
+ BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
+ TruncInsertPt++;
+
+ InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
+ TruncI->getType(), "", TruncInsertPt);
+
+ MadeChange = true;
+
+ TruncTheUse = InsertedTrunc;
+ }
+ }
+ return MadeChange;
+}
+
+/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
+/// the uses could potentially be combined with this shift instruction and
+/// generate BitExtract instruction. It will only be applied if the architecture
+/// supports BitExtract instruction. Here is an example:
+/// BB1:
+/// %x.extract.shift = lshr i64 %arg1, 32
+/// BB2:
+/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
+/// ==>
+///
+/// BB2:
+/// %x.extract.shift.1 = lshr i64 %arg1, 32
+/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
+///
+/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
+/// instruction.
+/// Return true if any changes are made.
+static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
+ const TargetLowering &TLI) {
+ BasicBlock *DefBB = ShiftI->getParent();
+
+ /// Only insert instructions in each block once.
+ DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
+
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ if (!isExtractBitsCandidateUse(User))
+ continue;
+
+ BasicBlock *UserBB = User->getParent();
+
+ if (UserBB == DefBB) {
+ // If the shift and truncate instruction are in the same BB. The use of
+ // the truncate(TruncUse) may still introduce another truncate if not
+ // legal. In this case, we would like to sink both shift and truncate
+ // instruction to the BB of TruncUse.
+ // for example:
+ // BB1:
+ // i64 shift.result = lshr i64 opnd, imm
+ // trunc.result = trunc shift.result to i16
+ //
+ // BB2:
+ // ----> We will have an implicit truncate here if the architecture does
+ // not have i16 compare.
+ // cmp i16 trunc.result, opnd2
+ //
+ if (isa<TruncInst>(User) && shiftIsLegal
+ // If the type of the truncate is legal, no trucate will be
+ // introduced in other basic blocks.
+ && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
+ MadeChange =
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
+
+ continue;
+ }
+ // If we have already inserted a shift into this block, use it.
+ BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
+
+ if (!InsertedShift) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ else
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+
+ MadeChange = true;
+ }
+
+ // Replace a use of the shift with a use of the new shift.
+ TheUse = InsertedShift;
+ }
+
+ // If we removed all uses, nuke the shift.
+ if (ShiftI->use_empty())
+ ShiftI->eraseFromParent();
+
+ return MadeChange;
+}
+
namespace {
class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
protected:
}
return true;
}
- // Lower all uses of llvm.safe.[us]{div|rem}...
- if (II &&
- (II->getIntrinsicID() == Intrinsic::safe_sdiv ||
- II->getIntrinsicID() == Intrinsic::safe_udiv ||
- II->getIntrinsicID() == Intrinsic::safe_srem ||
- II->getIntrinsicID() == Intrinsic::safe_urem)) {
- // Given
- // result_struct = type {iN, i1}
- // %R = call result_struct llvm.safe.sdiv.iN(iN %x, iN %y)
- // Expand it to actual IR, which produces result to the same variable %R.
- // First element of the result %R.1 is the result of division, second
- // element shows whether the division was correct or not.
- // If %y is 0, %R.1 is 0, %R.2 is 1. (1)
- // If %x is minSignedValue and %y is -1, %R.1 is %x, %R.2 is 1. (2)
- // In other cases %R.1 is (sdiv %x, %y), %R.2 is 0. (3)
- //
- // Similar applies to srem, udiv, and urem builtins, except that in unsigned
- // variants we don't check condition (2).
-
- bool IsSigned;
- BinaryOperator::BinaryOps Op;
- switch (II->getIntrinsicID()) {
- case Intrinsic::safe_sdiv:
- IsSigned = true;
- Op = Instruction::SDiv;
- break;
- case Intrinsic::safe_udiv:
- IsSigned = false;
- Op = Instruction::UDiv;
- break;
- case Intrinsic::safe_srem:
- IsSigned = true;
- Op = Instruction::SRem;
- break;
- case Intrinsic::safe_urem:
- IsSigned = false;
- Op = Instruction::URem;
- break;
- default:
- llvm_unreachable("Only Div/Rem intrinsics are handled here.");
- }
-
- Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
- bool DivWellDefined = TLI && TLI->isDivWellDefined();
-
- bool ResultNeeded[2] = {false, false};
- SmallVector<User*, 1> ResultsUsers[2];
- bool BadCase = false;
- for (User *U: II->users()) {
- ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
- if (!EVI || EVI->getNumIndices() > 1 || EVI->getIndices()[0] > 1) {
- BadCase = true;
- break;
- }
- ResultNeeded[EVI->getIndices()[0]] = true;
- ResultsUsers[EVI->getIndices()[0]].push_back(U);
- }
- // Behave conservatively, if there is an unusual user of the results.
- if (BadCase)
- ResultNeeded[0] = ResultNeeded[1] = true;
-
- // Early exit if non of the results is ever used.
- if (!ResultNeeded[0] && !ResultNeeded[1]) {
- II->eraseFromParent();
- return true;
- }
-
- // Early exit if the second result (flag) isn't used and target
- // div-instruction computes exactly what we want to get as the first result
- // and never traps.
- if (ResultNeeded[0] && !ResultNeeded[1] && DivWellDefined) {
- BinaryOperator *Div = BinaryOperator::Create(Op, LHS, RHS);
- Div->insertAfter(II);
- for (User *U: ResultsUsers[0]) {
- Instruction *UserInst = dyn_cast<Instruction>(U);
- assert(UserInst && "Unexpected null-instruction");
- UserInst->replaceAllUsesWith(Div);
- UserInst->eraseFromParent();
- }
- II->eraseFromParent();
- CurInstIterator = Div;
- ModifiedDT = true;
- return true;
- }
-
- // Check if the flag is used to jump out to a 'trap' block
- // If it's the case, we want to use this block directly when we create
- // branches after comparing with 0 and comparing with -1 (signed case).
- // We can do it only iff we can track all the uses of the flag, i.e. the
- // only users are EXTRACTVALUE-insns, and their users are conditional
- // branches, targeting the same 'trap' basic block.
- BasicBlock *TrapBB = nullptr;
- bool DoRelinkTrap = true;
- for (User *FlagU: ResultsUsers[1]) {
- for (User *U: FlagU->users()) {
- BranchInst *TrapBranch = dyn_cast<BranchInst>(U);
- // If the user isn't a branch-insn, or it jumps to another BB, don't
- // try to use TrapBB in the lowering.
- if (!TrapBranch || (TrapBB && TrapBB != TrapBranch->getSuccessor(0))) {
- DoRelinkTrap = false;
- break;
- }
- TrapBB = TrapBranch->getSuccessor(0);
- }
- }
- if (!TrapBB)
- DoRelinkTrap = false;
- // We want to reuse TrapBB if possible, because in that case we can avoid
- // creating new basic blocks and thus overcomplicating the IR. However, if
- // DIV instruction isn't well defined, we still need those blocks to model
- // well-defined behaviour. Thus, we can't reuse TrapBB in this case.
- if (!DivWellDefined)
- DoRelinkTrap = false;
-
- Value *MinusOne = Constant::getAllOnesValue(LHS->getType());
- Value *Zero = Constant::getNullValue(LHS->getType());
-
- // Split the original BB and create other basic blocks that will be used
- // for checks.
- BasicBlock *StartBB = II->getParent();
- BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(II));
- BasicBlock *NextBB = StartBB->splitBasicBlock(SplitPt, "div.end");
-
- BasicBlock *DivByZeroBB;
- if (!DoRelinkTrap) {
- DivByZeroBB = BasicBlock::Create(II->getContext(), "div.divz",
- NextBB->getParent(), NextBB);
- BranchInst::Create(NextBB, DivByZeroBB);
- }
- BasicBlock *DivBB = BasicBlock::Create(II->getContext(), "div.div",
- NextBB->getParent(), NextBB);
- BranchInst::Create(NextBB, DivBB);
-
- // For signed variants, check the condition (2):
- // LHS == SignedMinValue, RHS == -1.
- Value *CmpMinusOne;
- Value *CmpMinValue;
- BasicBlock *ChkDivMinBB;
- BasicBlock *DivMinBB;
- Value *MinValue;
- if (IsSigned) {
- APInt SignedMinValue =
- APInt::getSignedMinValue(LHS->getType()->getPrimitiveSizeInBits());
- MinValue = Constant::getIntegerValue(LHS->getType(), SignedMinValue);
- ChkDivMinBB = BasicBlock::Create(II->getContext(), "div.chkdivmin",
- NextBB->getParent(), NextBB);
- BranchInst::Create(NextBB, ChkDivMinBB);
- if (!DoRelinkTrap) {
- DivMinBB = BasicBlock::Create(II->getContext(), "div.divmin",
- NextBB->getParent(), NextBB);
- BranchInst::Create(NextBB, DivMinBB);
- }
- CmpMinusOne = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
- RHS, MinusOne, "cmp.rhs.minus.one",
- ChkDivMinBB->getTerminator());
- CmpMinValue = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
- LHS, MinValue, "cmp.lhs.signed.min",
- ChkDivMinBB->getTerminator());
- BinaryOperator *CmpSignedOvf = BinaryOperator::Create(Instruction::And,
- CmpMinusOne,
- CmpMinValue);
- // Here we're interested in the case when both %x is TMin and %y is -1.
- // In this case the result will overflow.
- // If that's not the case, we can perform usual division. These blocks
- // will be inserted after DivByZero, so the division will be safe.
- CmpSignedOvf->insertBefore(ChkDivMinBB->getTerminator());
- BranchInst::Create(DoRelinkTrap ? TrapBB : DivMinBB, DivBB, CmpSignedOvf,
- ChkDivMinBB->getTerminator());
- ChkDivMinBB->getTerminator()->eraseFromParent();
- }
-
- // Check the condition (1):
- // RHS == 0.
- Value *CmpDivZero = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
- RHS, Zero, "cmp.rhs.zero",
- StartBB->getTerminator());
-
- // If RHS != 0, we want to check condition (2) in signed case, or proceed
- // to usual division in unsigned case.
- BranchInst::Create(DoRelinkTrap ? TrapBB : DivByZeroBB,
- IsSigned ? ChkDivMinBB : DivBB, CmpDivZero,
- StartBB->getTerminator());
- StartBB->getTerminator()->eraseFromParent();
-
- // At the moment we have all the control flow created. We just need to
- // insert DIV and PHI (if needed) to get the result value.
- Instruction *DivRes, *FlagRes;
- Instruction *InsPoint = nullptr;
- if (ResultNeeded[0]) {
- BinaryOperator *Div = BinaryOperator::Create(Op, LHS, RHS);
- if (DivWellDefined) {
- // The result value is the result of DIV operation placed right at the
- // original place of the intrinsic.
- Div->insertAfter(II);
- DivRes = Div;
- } else {
- // The result is a PHI-node.
- Div->insertBefore(DivBB->getTerminator());
- PHINode *DivResPN =
- PHINode::Create(LHS->getType(), IsSigned ? 3 : 2, "div.res.phi",
- NextBB->begin());
- DivResPN->addIncoming(Div, DivBB);
- DivResPN->addIncoming(Zero, DivByZeroBB);
- if (IsSigned)
- DivResPN->addIncoming(MinValue, DivMinBB);
- DivRes = DivResPN;
- InsPoint = DivResPN;
- }
- }
-
- // Prepare a value for the second result (flag) if it is needed.
- if (ResultNeeded[1] && !DoRelinkTrap) {
- Type *FlagTy = II->getType()->getStructElementType(1);
- PHINode *FlagResPN =
- PHINode::Create(FlagTy, IsSigned ? 3 : 2, "div.flag.phi",
- NextBB->begin());
- FlagResPN->addIncoming(Constant::getNullValue(FlagTy), DivBB);
- FlagResPN->addIncoming(Constant::getAllOnesValue(FlagTy), DivByZeroBB);
- if (IsSigned)
- FlagResPN->addIncoming(Constant::getAllOnesValue(FlagTy), DivMinBB);
- FlagRes = FlagResPN;
- if (!InsPoint)
- InsPoint = FlagRes;
- }
-
- // If possible, propagate the results to the user. Otherwise, create alloca,
- // and create a struct with the results on stack.
- if (!BadCase) {
- if (ResultNeeded[0]) {
- for (User *U: ResultsUsers[0]) {
- Instruction *UserInst = dyn_cast<Instruction>(U);
- assert(UserInst && "Unexpected null-instruction");
- UserInst->replaceAllUsesWith(DivRes);
- UserInst->eraseFromParent();
- }
- }
- if (ResultNeeded[1]) {
- for (User *FlagU: ResultsUsers[1]) {
- Instruction *FlagUInst = dyn_cast<Instruction>(FlagU);
- if (DoRelinkTrap) {
- // Replace
- // %flag = extractvalue %intrinsic.res, 1
- // br i1 %flag, label %trap.bb, label %other.bb
- // With
- // br label %other.bb
- // We've already created checks that are pointing to %trap.bb, there
- // is no need to have the same checks here.
- for (User *U: FlagUInst->users()) {
- BranchInst *TrapBranch = dyn_cast<BranchInst>(U);
- BasicBlock *CurBB = TrapBranch->getParent();
- BasicBlock *SuccessorBB = TrapBranch->getSuccessor(1);
- CurBB->getTerminator()->eraseFromParent();
- BranchInst::Create(SuccessorBB, CurBB);
- }
- } else {
- FlagUInst->replaceAllUsesWith(FlagRes);
- }
- dyn_cast<Instruction>(FlagUInst)->eraseFromParent();
- }
- }
- } else {
- // Create alloca, store our new values to it, and then load the final
- // result from it.
- Constant *Idx0 = ConstantInt::get(Type::getInt32Ty(II->getContext()), 0);
- Constant *Idx1 = ConstantInt::get(Type::getInt32Ty(II->getContext()), 1);
- Value *Idxs_DivRes[2] = {Idx0, Idx0};
- Value *Idxs_FlagRes[2] = {Idx0, Idx1};
- Value *NewRes = new llvm::AllocaInst(II->getType(), 0, "div.res.ptr", II);
- Instruction *ResDivAddr = GetElementPtrInst::Create(NewRes, Idxs_DivRes);
- Instruction *ResFlagAddr =
- GetElementPtrInst::Create(NewRes, Idxs_FlagRes);
- ResDivAddr->insertAfter(InsPoint);
- ResFlagAddr->insertAfter(ResDivAddr);
- StoreInst *StoreResDiv = new StoreInst(DivRes, ResDivAddr);
- StoreInst *StoreResFlag = new StoreInst(FlagRes, ResFlagAddr);
- StoreResDiv->insertAfter(ResFlagAddr);
- StoreResFlag->insertAfter(StoreResDiv);
- LoadInst *LoadRes = new LoadInst(NewRes, "div.res");
- LoadRes->insertAfter(StoreResFlag);
- II->replaceAllUsesWith(LoadRes);
- }
-
- II->eraseFromParent();
- CurInstIterator = StartBB->end();
- ModifiedDT = true;
- return true;
- }
if (II && TLI) {
SmallVector<Value*, 2> PtrOps;
static bool MightBeFoldableInst(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
// Don't touch identity bitcasts.
if (I->getType() == I->getOperand(0)->getType())
return false;
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
Value *&SunkAddr = SunkAddrs[Addr];
if (SunkAddr) {
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
+ << *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
+ << *MemoryInst << "\n");
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *ResultPtr = nullptr, *ResultIndex = nullptr;
}
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
+ << *MemoryInst << "\n");
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = nullptr;
// the original IR value was tossed in favor of a constant back when
// the AddrMode was created we need to bail out gracefully if widths
// do not match instead of extending it.
- Instruction *I = dyn_cast<Instruction>(Result);
+ Instruction *I = dyn_cast_or_null<Instruction>(Result);
if (I && (Result != AddrMode.BaseReg))
I->eraseFromParent();
return false;
return false;
}
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+
+ if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
+ BinOp->getOpcode() == Instruction::LShr)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (TLI && CI && TLI->hasExtractBitsInsn())
+ return OptimizeExtractBits(BinOp, CI, *TLI);
+
+ return false;
+ }
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
Instruction *Insn = BI; ++BI;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
- if (!DVI) {
+ // Leave dbg.values that refer to an alloca alone. These
+ // instrinsics describe the address of a variable (= the alloca)
+ // being taken. They should not be moved next to the alloca
+ // (and to the beginning of the scope), but rather stay close to
+ // where said address is used.
+ if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
PrevNonDbgInst = Insn;
continue;
}