X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstCombineCasts.cpp;h=da835a19232200e321fb957d83b6a7a57993df77;hp=98fd05a8a309eecf6a7bb8e8799288c5071024e4;hb=e78257c891d8a6148703cb74655640d175e3f570;hpb=39b5f12dd68430c4794b1d24af0fd204c82bc12f diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 98fd05a8a30..da835a19232 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -11,19 +11,21 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" +#include "InstCombineInternal.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/DataLayout.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; -/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear -/// expression. If so, decompose it, returning some value X, such that Val is +#define DEBUG_TYPE "instcombine" + +/// Analyze 'Val', seeing if it is a simple linear expression. +/// If so, decompose it, returning some value X, such that Val is /// X*Scale+Offset. /// -static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, +static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset) { if (ConstantInt *CI = dyn_cast(Val)) { Offset = CI->getZExtValue(); @@ -60,7 +62,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, // where C1 is divisible by C2. unsigned SubScale; Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; return SubVal; @@ -74,50 +76,53 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, return Val; } -/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, -/// try to eliminate the cast by moving the type information into the alloc. +/// If we find a cast of an allocation instruction, try to eliminate the cast by +/// moving the type information into the alloc. Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { - // This requires DataLayout to get the alloca alignment and size information. - if (!TD) return 0; - PointerType *PTy = cast(CI.getType()); BuilderTy AllocaBuilder(*Builder); - AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + AllocaBuilder.SetInsertPoint(&AI); // Get the type really allocated and the type casted to. Type *AllocElTy = AI.getAllocatedType(); Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; - unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; + unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy); + if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the // same, we open the door to infinite loops of various kinds. - if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; - uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; + uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy); + if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; + + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. - DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); + decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr; unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; + Value *Amt = nullptr; if (Scale == 1) { Amt = NumElements; } else { @@ -135,6 +140,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); + New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it @@ -148,22 +154,21 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that -/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually -/// insert the code to evaluate the expression. +/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns +/// true for, actually insert the code to evaluate the expression. Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); - // If we got a constantexpr back, try to simplify it with TD info. + // If we got a constantexpr back, try to simplify it with DL info. if (ConstantExpr *CE = dyn_cast(C)) - C = ConstantFoldConstantExpression(CE, TD, TLI); + C = ConstantFoldConstantExpression(CE, DL, TLI); return C; } // Otherwise, it must be an instruction. Instruction *I = cast(V); - Instruction *Res = 0; + Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { case Instruction::Add: @@ -206,7 +211,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, PHINode *OPN = cast(I); PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + Value *V = + EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); } Res = NPN; @@ -225,25 +231,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - Type *DstTy, ///< The target type for the second cast instruction - DataLayout *TD ///< The target data for pointer size -) { - +isEliminableCastPair(const CastInst *CI, ///< First cast instruction + unsigned opcode, ///< Opcode for the second cast + Type *DstTy, ///< Target type for the second cast + const DataLayout &DL) { Type *SrcTy = CI->getOperand(0)->getType(); // A from above Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); - Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ? - TD->getIntPtrType(SrcTy) : 0; - Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ? - TD->getIntPtrType(MidTy) : 0; - Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ? - TD->getIntPtrType(DstTy) : 0; + Type *SrcIntPtrTy = + SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr; + Type *MidIntPtrTy = + MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr; + Type *DstIntPtrTy = + DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -257,9 +260,9 @@ isEliminableCastPair( return Instruction::CastOps(Res); } -/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually -/// results in any code being generated and is interesting to optimize out. If -/// the cast can be eliminated by some other simple transformation, we prefer +/// Return true if the cast from "V to Ty" actually results in any code being +/// generated and is interesting to optimize out. +/// If the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { @@ -269,7 +272,7 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, // If this is another cast that can be eliminated, we prefer to have it // eliminated. if (const CastInst *CI = dyn_cast(V)) - if (isEliminableCastPair(CI, opc, Ty, TD)) + if (isEliminableCastPair(CI, opc, Ty, DL)) return false; // If this is a vector sext from a compare, then we don't want to break the @@ -289,7 +292,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // eliminate it now. if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); @@ -305,19 +308,18 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (isa(Src)) { // We don't do this if this would create a PHI node with an illegal type if // it is currently legal. - if (!Src->getType()->isIntegerTy() || - !CI.getType()->isIntegerTy() || + if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || ShouldChangeType(CI.getType(), Src->getType())) if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; } - return 0; + return nullptr; } -/// CanEvaluateTruncated - Return true if we can evaluate the specified -/// expression tree as type Ty instead of its larger type, and arrive with the -/// same value. This is used by code that tries to eliminate truncates. +/// Return true if we can evaluate the specified expression tree as type Ty +/// instead of its larger type, and arrive with the same value. +/// This is used by code that tries to eliminate truncates. /// /// Ty will always be a type smaller than V. We should return true if trunc(V) /// can be computed by computing V in the smaller type. If V is an instruction, @@ -326,7 +328,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, Type *Ty) { +static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -355,8 +358,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { case Instruction::Or: case Instruction::Xor: // These operators can all arbitrarily be extended or truncated. - return CanEvaluateTruncated(I->getOperand(0), Ty) && - CanEvaluateTruncated(I->getOperand(1), Ty); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); case Instruction::UDiv: case Instruction::URem: { @@ -365,10 +368,10 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { uint32_t BitWidth = Ty->getScalarSizeInBits(); if (BitWidth < OrigBitWidth) { APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); - if (MaskedValueIsZero(I->getOperand(0), Mask) && - MaskedValueIsZero(I->getOperand(1), Mask)) { - return CanEvaluateTruncated(I->getOperand(0), Ty) && - CanEvaluateTruncated(I->getOperand(1), Ty); + if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) && + IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) { + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); } } break; @@ -379,7 +382,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t BitWidth = Ty->getScalarSizeInBits(); if (CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateTruncated(I->getOperand(0), Ty); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } break; case Instruction::LShr: @@ -389,10 +392,10 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + if (IC.MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) && CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateTruncated(I->getOperand(0), Ty); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } } break; @@ -406,16 +409,16 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { return true; case Instruction::Select: { SelectInst *SI = cast(I); - return CanEvaluateTruncated(SI->getTrueValue(), Ty) && - CanEvaluateTruncated(SI->getFalseValue(), Ty); + return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) && + canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty)) + for (Value *IncValue : PN->incoming_values()) + if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI)) return false; return true; } @@ -427,10 +430,63 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { return false; } +/// Given a vector that is bitcast to an integer, optionally logically +/// right-shifted, and truncated, convert it to an extractelement. +/// Example (big endian): +/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 +/// ---> +/// extractelement <4 x i32> %X, 1 +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, + const DataLayout &DL) { + Value *TruncOp = Trunc.getOperand(0); + Type *DestType = Trunc.getType(); + if (!TruncOp->hasOneUse() || !isa(DestType)) + return nullptr; + + Value *VecInput = nullptr; + ConstantInt *ShiftVal = nullptr; + if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)), + m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShiftVal)))) || + !isa(VecInput->getType())) + return nullptr; + + VectorType *VecType = cast(VecInput->getType()); + unsigned VecWidth = VecType->getPrimitiveSizeInBits(); + unsigned DestWidth = DestType->getPrimitiveSizeInBits(); + unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0; + + if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0)) + return nullptr; + + // If the element type of the vector doesn't match the result type, + // bitcast it to a vector type that we can extract from. + unsigned NumVecElts = VecWidth / DestWidth; + if (VecType->getElementType() != DestType) { + VecType = VectorType::get(DestType, NumVecElts); + VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc"); + } + + unsigned Elt = ShiftAmount / DestWidth; + if (DL.isBigEndian()) + Elt = NumVecElts - 1 - Elt; + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; + // Test if the trunc is the user of a select which is part of a + // minimum or maximum operation. If so, don't do any more simplification. + // Even simplifying demanded bits can break the canonical form of a + // min/max. + Value *LHS, *RHS; + if (SelectInst *SI = dyn_cast(CI.getOperand(0))) + if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN) + return nullptr; + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) @@ -444,7 +500,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateTruncated(Src, DestTy)) { + canEvaluateTruncated(Src, DestTy, *this, &CI)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. @@ -457,14 +513,14 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { - Constant *One = ConstantInt::get(Src->getType(), 1); + Constant *One = ConstantInt::get(SrcTy, 1); Src = Builder->CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. - Value *A = 0; ConstantInt *Cst = 0; + Value *A = nullptr; ConstantInt *Cst = nullptr; if (Src->hasOneUse() && match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { // We have three types to worry about here, the type of A, the source of @@ -476,31 +532,54 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) - return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy)); // Since we're doing an lshr and a zero extend, and know that the shift // amount is smaller than ASize, it is always safe to do the shift in A's // type, then zero extend or truncate to the result. Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); Shift->takeName(Src); - return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + return CastInst::CreateIntegerCast(Shift, DestTy, false); + } + + // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type + // conversion. + // It works because bits coming from sign extension have the same value as + // the sign bit of the original value; performing ashr instead of lshr + // generates bits of the same value as the sign bit. + if (Src->hasOneUse() && + match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) && + cast(Src)->getOperand(0)->hasOneUse()) { + const unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + // This optimization can be only performed when zero bits generated by + // the original lshr aren't pulled into the value after truncation, so we + // can only shift by values smaller than the size of destination type (in + // bits). + if (Cst->getValue().ult(ASize)) { + Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + } } // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. - if (Src->hasOneUse() && isa(Src->getType()) && - ShouldChangeType(Src->getType(), CI.getType()) && + if (Src->hasOneUse() && isa(SrcTy) && + ShouldChangeType(SrcTy, DestTy) && match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) { - Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr"); + Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr"); return BinaryOperator::CreateAnd(NewTrunc, - ConstantExpr::getTrunc(Cst, CI.getType())); + ConstantExpr::getTrunc(Cst, DestTy)); } - return 0; + if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) + return I; + + return nullptr; } -/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. +/// Transform (zext icmp) to bitwise / integer operations in order to eliminate +/// the icmp. Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform) { // If we are just checking for a icmp eq of a single bit and zext'ing it @@ -544,7 +623,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -592,8 +671,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); + computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI); + computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -621,11 +700,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, } } - return 0; + return nullptr; } -/// CanEvaluateZExtd - Determine if the specified value can be computed in the -/// specified wider type and produce the same low bits. If not, return false. +/// Determine if the specified value can be computed in the specified wider type +/// and produce the same low bits. If not, return false. /// /// If this function returns true, it can also return a non-zero number of bits /// (in BitsToClear) which indicates that the value it computes is correct for @@ -642,7 +721,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { +static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, + InstCombiner &IC, Instruction *CxtI) { BitsToClear = 0; if (isa(V)) return true; @@ -671,9 +751,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: - case Instruction::Shl: - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) || - !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp)) + if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) || + !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI)) return false; // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) @@ -687,19 +766,31 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // We use MaskedValueIsZero here for generality, but the case we care // about the most is constant RHS. unsigned VSize = V->getType()->getScalarSizeInBits(); - if (MaskedValueIsZero(I->getOperand(1), - APInt::getHighBitsSet(VSize, BitsToClear))) + if (IC.MaskedValueIsZero(I->getOperand(1), + APInt::getHighBitsSet(VSize, BitsToClear), + 0, CxtI)) return true; } // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; + case Instruction::Shl: + // We can promote shl(x, cst) if we can promote x. Since shl overwrites the + // upper bits we can reduce BitsToClear by the shift amount. + if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { + if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) + return false; + uint64_t ShiftAmt = Amt->getZExtValue(); + BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0; + return true; + } + return false; case Instruction::LShr: // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; BitsToClear += Amt->getZExtValue(); if (BitsToClear > V->getType()->getScalarSizeInBits()) @@ -709,8 +800,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // Cannot promote variable LSHR. return false; case Instruction::Select: - if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) || - !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) || + if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) || + !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) || // TODO: If important, we could handle the case when the BitsToClear are // known zero in the disagreeing side. Tmp != BitsToClear) @@ -722,10 +813,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast(I); - if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear)) + if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI)) return false; for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) || + if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) || // TODO: If important, we could handle the case when the BitsToClear // are known zero in the disagreeing input. Tmp != BitsToClear) @@ -741,8 +832,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. - if (CI.hasOneUse() && isa(CI.use_back())) - return 0; + if (CI.hasOneUse() && isa(CI.user_back())) + return nullptr; // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) @@ -762,13 +853,13 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid zero extend: " << CI); + " to avoid zero extend: " << CI << '\n'); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); @@ -777,8 +868,10 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If the high bits are already filled with zeros, just replace this // cast with the result. - if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, - DestBitSize-SrcBitsKept))) + if (MaskedValueIsZero(Res, + APInt::getHighBitsSet(DestBitSize, + DestBitSize-SrcBitsKept), + 0, &CI)) return ReplaceInstUsesWith(CI, Res); // We need to emit an AND to clear the high bits. @@ -842,54 +935,47 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { } } - // zext(trunc(t) & C) -> (t & zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) - if (TruncInst *TI = dyn_cast(SrcI->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) - return - BinaryOperator::CreateAnd(TI0, - ConstantExpr::getZExt(C, CI.getType())); - } - - // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast(SrcI->getOperand(1))) - if (BinaryOperator *And = dyn_cast(SrcI->getOperand(0))) - if (And->getOpcode() == Instruction::And && And->hasOneUse() && - And->getOperand(1) == C) - if (TruncInst *TI = dyn_cast(And->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) { - Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - Value *NewAnd = Builder->CreateAnd(TI0, ZC); - return BinaryOperator::CreateXor(NewAnd, ZC); - } - } + // zext(trunc(X) & C) -> (X & zext(C)). + Constant *C; + Value *X; + if (SrcI && + match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) && + X->getType() == CI.getType()) + return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType())); + + // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)). + Value *And; + if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) && + match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) && + X->getType() == CI.getType()) { + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC); + } // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1 - Value *X; - if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) && - match(SrcI, m_Not(m_Value(X))) && - (!X->hasOneUse() || !isa(X))) { + if (SrcI && SrcI->hasOneUse() && + SrcI->getType()->getScalarType()->isIntegerTy(1) && + match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa(X))) { Value *New = Builder->CreateZExt(X, CI.getType()); return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - return 0; + return nullptr; } -/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. +/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp. Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); ICmpInst::Predicate Pred = ICI->getPredicate(); - if (ConstantInt *Op1C = dyn_cast(Op1)) { + // Don't bother if Op1 isn't of vector or integer type. + if (!Op1->getType()->isIntOrIntVectorTy()) + return nullptr; + + if (Constant *Op1C = dyn_cast(Op1)) { // (x ashr x, 31 -> all ones if negative // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive - if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isNullValue()) || (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { Value *Sh = ConstantInt::get(Op0->getType(), @@ -902,7 +988,9 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { In = Builder->CreateNot(In, In->getName()+".not"); return ReplaceInstUsesWith(CI, In); } + } + if (ConstantInt *Op1C = dyn_cast(Op1)) { // If we know that only one bit of the LHS of the icmp can be set and we // have an equality comparison with zero or a power of 2, we can transform // the icmp and sext into bitwise/integer operations. @@ -910,7 +998,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero, KnownOne); + computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { @@ -959,31 +1047,17 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { } } - // vector (x ashr x, 31 -> all ones if signed. - if (VectorType *VTy = dyn_cast(CI.getType())) { - if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && - Op0->getType() == CI.getType()) { - Type *EltTy = VTy->getElementType(); - - // splat the shift constant to a constant vector. - Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); - return ReplaceInstUsesWith(CI, In); - } - } - - return 0; + return nullptr; } -/// CanEvaluateSExtd - Return true if we can take the specified value -/// and return it as type Ty without inserting any new casts and without -/// changing the value of the common low bits. This is used by code that tries -/// to promote integer operations to a wider types will allow us to eliminate -/// the extension. +/// Return true if we can take the specified value and return it as type Ty +/// without inserting any new casts and without changing the value of the common +/// low bits. This is used by code that tries to promote integer operations to +/// a wider types will allow us to eliminate the extension. /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateSExtd(Value *V, Type *Ty) { +static bool canEvaluateSExtd(Value *V, Type *Ty) { assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && "Can't sign extend type to a smaller type"); // If this is a constant, it can be trivially promoted. @@ -1013,23 +1087,23 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { case Instruction::Sub: case Instruction::Mul: // These operators can all arbitrarily be extended if their inputs can. - return CanEvaluateSExtd(I->getOperand(0), Ty) && - CanEvaluateSExtd(I->getOperand(1), Ty); + return canEvaluateSExtd(I->getOperand(0), Ty) && + canEvaluateSExtd(I->getOperand(1), Ty); //case Instruction::Shl: TODO //case Instruction::LShr: TODO case Instruction::Select: - return CanEvaluateSExtd(I->getOperand(1), Ty) && - CanEvaluateSExtd(I->getOperand(2), Ty); + return canEvaluateSExtd(I->getOperand(1), Ty) && + canEvaluateSExtd(I->getOperand(2), Ty); case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false; + for (Value *IncValue : PN->incoming_values()) + if (!canEvaluateSExtd(IncValue, Ty)) return false; return true; } default: @@ -1041,10 +1115,10 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { } Instruction *InstCombiner::visitSExt(SExtInst &CI) { - // If this sign extend is only used by a truncate, let the truncate by - // eliminated before we try to optimize this zext. - if (CI.hasOneUse() && isa(CI.use_back())) - return 0; + // If this sign extend is only used by a truncate, let the truncate be + // eliminated before we try to optimize this sext. + if (CI.hasOneUse() && isa(CI.user_back())) + return nullptr; if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1057,15 +1131,24 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + // If we know that the value being extended is positive, we can use a zext + // instead. + bool KnownZero, KnownOne; + ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); + if (KnownZero) { + Value *ZExt = Builder->CreateZExt(Src, DestTy); + return ReplaceInstUsesWith(CI, ZExt); + } + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateSExtd(Src, DestTy)) { + canEvaluateSExtd(Src, DestTy)) { // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid sign extend: " << CI); + " to avoid sign extend: " << CI << '\n'); Value *Res = EvaluateInDifferentType(Src, DestTy, true); assert(Res->getType() == DestTy); @@ -1074,7 +1157,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // If the high bits are already filled with sign bit, just replace this // cast with the result. - if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) + if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); // We need to emit a shl + ashr to do the sign extend. @@ -1112,9 +1195,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // into: // %a = shl i32 %i, 30 // %d = ashr i32 %a, 30 - Value *A = 0; + Value *A = nullptr; // TODO: Eventually this could be subsumed by EvaluateInDifferentType. - ConstantInt *BA = 0, *CA = 0; + ConstantInt *BA = nullptr, *CA = nullptr; if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), m_ConstantInt(CA))) && BA == CA && A->getType() == CI.getType()) { @@ -1126,27 +1209,27 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return BinaryOperator::CreateAShr(A, ShAmtV); } - return 0; + return nullptr; } -/// FitsInFPType - Return a Constant* for the specified FP constant if it fits +/// Return a Constant* for the specified floating-point constant if it fits /// in the specified FP type without changing its value. -static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { +static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { bool losesInfo; APFloat F = CFP->getValueAPF(); (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); if (!losesInfo) return ConstantFP::get(CFP->getContext(), F); - return 0; + return nullptr; } -/// LookThroughFPExtensions - If this is an fp extension instruction, look +/// If this is a floating-point extension instruction, look /// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V) { +static Value *lookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast(V)) if (I->getOpcode() == Instruction::FPExt) - return LookThroughFPExtensions(I->getOperand(0)); + return lookThroughFPExtensions(I->getOperand(0)); // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn @@ -1155,14 +1238,14 @@ static Value *LookThroughFPExtensions(Value *V) { if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) return V; // No constant folding of this. // See if the value can be truncated to half and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf)) + if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf)) return V; // See if the value can be truncated to float and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) + if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle)) return V; if (CFP->getType()->isDoubleTy()) return V; // Won't shrink. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) + if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble)) return V; // Don't try to shrink to various long double types. } @@ -1173,46 +1256,138 @@ static Value *LookThroughFPExtensions(Value *V) { Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - - // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are - // smaller than the destination type, we can eliminate the truncate by doing - // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well - // as many builtins (sqrt, etc). + // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to + // simplify this expression to avoid one or more of the trunc/extend + // operations if we can do so without changing the numerical results. + // + // The exact manner in which the widths of the operands interact to limit + // what we can and cannot do safely varies from operation to operation, and + // is explained below in the various case statements. BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); if (OpI && OpI->hasOneUse()) { + Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1)); + unsigned OpWidth = OpI->getType()->getFPMantissaWidth(); + unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth(); + unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth(); + unsigned SrcWidth = std::max(LHSWidth, RHSWidth); + unsigned DstWidth = CI.getType()->getFPMantissaWidth(); switch (OpI->getOpcode()) { - default: break; - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - Type *SrcTy = OpI->getType(); - Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); - Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && - RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If the source types were both smaller than the destination type of - // the cast, do this xform. - if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && - RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); - RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); - return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + default: break; + case Instruction::FAdd: + case Instruction::FSub: + // For addition and subtraction, the infinitely precise result can + // essentially be arbitrarily wide; proving that double rounding + // will not occur because the result of OpI is exact (as we will for + // FMul, for example) is hopeless. However, we *can* nonetheless + // frequently know that double rounding cannot occur (or that it is + // innocuous) by taking advantage of the specific structure of + // infinitely-precise results that admit double rounding. + // + // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient + // to represent both sources, we can guarantee that the double + // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis, + // "A Rigorous Framework for Fully Supporting the IEEE Standard ..." + // for proof of this fact). + // + // Note: Figueroa does not consider the case where DstFormat != + // SrcFormat. It's possible (likely even!) that this analysis + // could be tightened for those cases, but they are rare (the main + // case of interest here is (float)((double)float + float)). + if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { + if (LHSOrig->getType() != CI.getType()) + LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + if (RHSOrig->getType() != CI.getType()) + RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + Instruction *RI = + BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig); + RI->copyFastMathFlags(OpI); + return RI; + } + break; + case Instruction::FMul: + // For multiplication, the infinitely precise result has at most + // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient + // that such a value can be exactly represented, then no double + // rounding can possibly occur; we can safely perform the operation + // in the destination format if it can represent both sources. + if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { + if (LHSOrig->getType() != CI.getType()) + LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + if (RHSOrig->getType() != CI.getType()) + RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + Instruction *RI = + BinaryOperator::CreateFMul(LHSOrig, RHSOrig); + RI->copyFastMathFlags(OpI); + return RI; + } + break; + case Instruction::FDiv: + // For division, we use again use the bound from Figueroa's + // dissertation. I am entirely certain that this bound can be + // tightened in the unbalanced operand case by an analysis based on + // the diophantine rational approximation bound, but the well-known + // condition used here is a good conservative first pass. + // TODO: Tighten bound via rigorous analysis of the unbalanced case. + if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { + if (LHSOrig->getType() != CI.getType()) + LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + if (RHSOrig->getType() != CI.getType()) + RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + Instruction *RI = + BinaryOperator::CreateFDiv(LHSOrig, RHSOrig); + RI->copyFastMathFlags(OpI); + return RI; + } + break; + case Instruction::FRem: + // Remainder is straightforward. Remainder is always exact, so the + // type of OpI doesn't enter into things at all. We simply evaluate + // in whichever source type is larger, then convert to the + // destination type. + if (SrcWidth == OpWidth) + break; + if (LHSWidth < SrcWidth) + LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType()); + else if (RHSWidth <= SrcWidth) + RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType()); + if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) { + Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig); + if (Instruction *RI = dyn_cast(ExactResult)) + RI->copyFastMathFlags(OpI); + return CastInst::CreateFPCast(ExactResult, CI.getType()); } - } - break; } // (fptrunc (fneg x)) -> (fneg (fptrunc x)) if (BinaryOperator::isFNeg(OpI)) { Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), CI.getType()); - return BinaryOperator::CreateFNeg(InnerTrunc); + Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc); + RI->copyFastMathFlags(OpI); + return RI; } } + // (fptrunc (select cond, R1, Cst)) --> + // (select cond, (fptrunc R1), (fptrunc Cst)) + // + // - but only if this isn't part of a min/max operation, else we'll + // ruin min/max canonical form which is to have the select and + // compare's operands be of the same type with no casts to look through. + Value *LHS, *RHS; + SelectInst *SI = dyn_cast(CI.getOperand(0)); + if (SI && + (isa(SI->getOperand(1)) || + isa(SI->getOperand(2))) && + matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) { + Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), + CI.getType()); + Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), + CI.getType()); + return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc); + } + IntrinsicInst *II = dyn_cast(CI.getOperand(0)); if (II) { switch (II->getIntrinsicID()) { @@ -1222,9 +1397,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), CI.getType()); Type *IntrinsicType[] = { CI.getType() }; - Function *Overload = - Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(), - II->getIntrinsicID(), IntrinsicType); + Function *Overload = Intrinsic::getDeclaration( + CI.getModule(), II->getIntrinsicID(), IntrinsicType); Value *Args[] = { InnerTrunc }; return CallInst::Create(Overload, Args, II->getName()); @@ -1232,80 +1406,75 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) - CallInst *Call = dyn_cast(CI.getOperand(0)); - if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && - Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) && - Call->getNumArgOperands() == 1 && - Call->hasOneUse()) { - CastInst *Arg = dyn_cast(Call->getArgOperand(0)); - if (Arg && Arg->getOpcode() == Instruction::FPExt && - CI.getType()->isFloatTy() && - Call->getType()->isDoubleTy() && - Arg->getType()->isDoubleTy() && - Arg->getOperand(0)->getType()->isFloatTy()) { - Function *Callee = Call->getCalledFunction(); - Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", - Callee->getAttributes(), - Builder->getFloatTy(), - Builder->getFloatTy(), - NULL); - CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), - "sqrtfcall"); - ret->setAttributes(Callee->getAttributes()); - - - // Remove the old Call. With -fmath-errno, it won't get marked readnone. - ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); - EraseInstFromFunction(*Call); - return ret; - } - } - - return 0; + return nullptr; } Instruction *InstCombiner::visitFPExt(CastInst &CI) { return commonCastTransforms(CI); } +// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) +// This is safe if the intermediate type has enough bits in its mantissa to +// accurately represent all values of X. For example, this won't work with +// i64 -> float -> i64. +Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) { + if (!isa(FI.getOperand(0)) && !isa(FI.getOperand(0))) + return nullptr; + Instruction *OpI = cast(FI.getOperand(0)); + + Value *SrcI = OpI->getOperand(0); + Type *FITy = FI.getType(); + Type *OpITy = OpI->getType(); + Type *SrcTy = SrcI->getType(); + bool IsInputSigned = isa(OpI); + bool IsOutputSigned = isa(FI); + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned; + int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned; + int ActualSize = std::min(InputSize, OutputSize); + + if (ActualSize <= OpITy->getFPMantissaWidth()) { + if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) { + if (IsInputSigned && IsOutputSigned) + return new SExtInst(SrcI, FITy); + return new ZExtInst(SrcI, FITy); + } + if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()) + return new TruncInst(SrcI, FITy); + if (SrcTy == FITy) + return ReplaceInstUsesWith(FI, SrcI); + return new BitCastInst(SrcI, FITy); + } + return nullptr; +} + Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); - // fptoui(uitofp(X)) --> X - // fptoui(sitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + if (Instruction *I = FoldItoFPtoI(FI)) + return I; return commonCastTransforms(FI); } Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); - // fptosi(sitofp(X)) --> X - // fptosi(uitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() <= - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + if (Instruction *I = FoldItoFPtoI(FI)) + return I; return commonCastTransforms(FI); } @@ -1322,9 +1491,10 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() != - TD->getPointerSizeInBits()) { - Type *Ty = TD->getIntPtrType(CI.getContext()); + unsigned AS = CI.getAddressSpace(); + if (CI.getOperand(0)->getType()->getScalarSizeInBits() != + DL.getPointerSizeInBits(AS)) { + Type *Ty = DL.getIntPtrType(CI.getContext(), AS); if (CI.getType()->isVectorTy()) // Handle vectors of pointers. Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); @@ -1335,7 +1505,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - return 0; + return nullptr; } /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) @@ -1345,7 +1515,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { if (GetElementPtrInst *GEP = dyn_cast(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { + if (GEP->hasAllZeroIndices() && + // If CI is an addrspacecast and GEP changes the poiner type, merging + // GEP into CI would undo canonicalizing addrspacecast with different + // pointer types, causing infinite loops. + (!isa(CI) || + GEP->getType() == GEP->getPointerOperand()->getType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. @@ -1353,34 +1528,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { CI.setOperand(0, GEP->getOperand(0)); return &CI; } - - // If the GEP has a single use, and the base pointer is a bitcast, and the - // GEP computes a constant offset, see if we can convert these three - // instructions into fewer. This typically happens with unions and other - // non-type-safe code. - APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0); - if (TD && GEP->hasOneUse() && isa(GEP->getOperand(0)) && - GEP->accumulateConstantOffset(*TD, Offset)) { - // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); - Type *GEPIdxTy = - cast(OrigBase->getType())->getElementType(); - SmallVector NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) { - // If we were able to index down into an element, create the GEP - // and bitcast the result. This eliminates one bitcast, potentially - // two. - Value *NGEP = cast(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, NewIndices) : - Builder->CreateGEP(OrigBase, NewIndices); - NGEP->takeName(GEP); - - if (isa(CI)) - return new BitCastInst(NGEP, CI.getType()); - assert(isa(CI)); - return new PtrToIntInst(NGEP, CI.getType()); - } - } } return commonCastTransforms(CI); @@ -1390,24 +1537,27 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) { - Type *Ty = TD->getIntPtrType(CI.getContext()); - if (CI.getType()->isVectorTy()) // Handle vectors of pointers. - Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty); - return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false); - } + Type *Ty = CI.getType(); + unsigned AS = CI.getPointerAddressSpace(); - return commonPointerCastTransforms(CI); + if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS)) + return commonPointerCastTransforms(CI); + + Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS); + if (Ty->isVectorTy()) // Handle vectors of pointers. + PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); + + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy); + return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } -/// OptimizeVectorResize - This input value (which is known to have vector type) -/// is being zero extended or truncated to the specified vector type. Try to -/// replace it with a shuffle (and vector/vector bitcast) if possible. +/// This input value (which is known to have vector type) is being zero extended +/// or truncated to the specified vector type. +/// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, +static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. @@ -1421,7 +1571,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) - return 0; + return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); @@ -1467,17 +1617,22 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { return Value / Ty->getPrimitiveSizeInBits(); } -/// CollectInsertionElements - V is a value which is inserted into a vector of -/// VecEltTy. Look through the value to see if we can decompose it into +/// V is a value which is inserted into a vector of VecEltTy. +/// Look through the value to see if we can decompose it into /// insertions into the vector. See the example in the comment for /// OptimizeIntegerToVectorInsertions for the pattern this handles. /// The type of V is always a non-zero multiple of VecEltTy's size. +/// Shift is the number of bits between the lsb of V and the lsb of +/// the vector. /// /// This returns false if the pattern can't be matched or true if it can, /// filling in Elements with the elements found here. -static bool CollectInsertionElements(Value *V, unsigned ElementIndex, - SmallVectorImpl &Elements, - Type *VecEltTy) { +static bool collectInsertionElements(Value *V, unsigned Shift, + SmallVectorImpl &Elements, + Type *VecEltTy, bool isBigEndian) { + assert(isMultipleOfTypeSize(Shift, VecEltTy) && + "Shift should be a multiple of the element type size"); + // Undef values never contribute useful bits to the result. if (isa(V)) return true; @@ -1489,8 +1644,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (C->isNullValue()) return true; + unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy); + if (isBigEndian) + ElementIndex = Elements.size() - ElementIndex - 1; + // Fail if multiple elements are inserted into this slot. - if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) + if (Elements[ElementIndex]) return false; Elements[ElementIndex] = V; @@ -1505,8 +1664,8 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, // If the constant is the size of a vector element, we just need to bitcast // it to the right type so it gets properly inserted. if (NumElts == 1) - return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), - ElementIndex, Elements, VecEltTy); + return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + Shift, Elements, VecEltTy, isBigEndian); // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. @@ -1517,10 +1676,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); for (unsigned i = 0; i != NumElts; ++i) { + unsigned ShiftI = Shift+i*ElementSize; Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), - i*ElementSize)); + ShiftI)); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy)) + if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy, + isBigEndian)) return false; } return true; @@ -1529,41 +1690,40 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (!V->hasOneUse()) return false; Instruction *I = dyn_cast(V); - if (I == 0) return false; + if (!I) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: - return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Or: - return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy) && - CollectInsertionElements(I->getOperand(1), ElementIndex, - Elements, VecEltTy); + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian) && + collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast(I->getOperand(1)); - if (CI == 0) return false; - if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; - unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - - return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, - Elements, VecEltTy); + if (!CI) return false; + Shift += CI->getZExtValue(); + if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); } } } -/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we -/// may be doing shifts and ors to assemble the elements of the vector manually. +/// If the input is an 'or' instruction, we may be doing shifts and ors to +/// assemble the elements of the vector manually. /// Try to rip the code out and replace it with insertelements. This is to /// optimize code like this: /// @@ -1576,22 +1736,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, /// %tmp43 = bitcast i64 %ins35 to <2 x float> /// /// Into two insertelements that do "buildvector{%inc, %inc5}". -static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, +static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { VectorType *DestVecTy = cast(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector Elements(DestVecTy->getNumElements()); - if (!CollectInsertionElements(IntInput, 0, Elements, - DestVecTy->getElementType())) - return 0; + if (!collectInsertionElements(IntInput, 0, Elements, + DestVecTy->getElementType(), + IC.getDataLayout().isBigEndian())) + return nullptr; // If we succeeded, we know that all of the element are specified by Elements // or are zero if Elements has a null entry. Recast this as a set of // insertions. Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - if (Elements[i] == 0) continue; // Unset element. + if (!Elements[i]) continue; // Unset element. Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); @@ -1600,57 +1761,29 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, return Result; } - -/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double -/// bitcast. The various long double bitcasts can't get in here. -static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ - Value *Src = CI.getOperand(0); - Type *DestTy = CI.getType(); - - // If this is a bitcast from int to float, check to see if the int is an - // extraction from a vector. - Value *VecInput = 0; - // bitcast(trunc(bitcast(somevector))) - if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && - isa(VecInput->getType())) { - VectorType *VecTy = cast(VecInput->getType()); - unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); - - if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { - // If the element type of the vector doesn't match the result type, - // bitcast it to be a vector type we can extract from. - if (VecTy->getElementType() != DestTy) { - VecTy = VectorType::get(DestTy, - VecTy->getPrimitiveSizeInBits() / DestWidth); - VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); - } - - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); - } - } - - // bitcast(trunc(lshr(bitcast(somevector), cst)) - ConstantInt *ShAmt = 0; - if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), - m_ConstantInt(ShAmt)))) && - isa(VecInput->getType())) { - VectorType *VecTy = cast(VecInput->getType()); - unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); - if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && - ShAmt->getZExtValue() % DestWidth == 0) { - // If the element type of the vector doesn't match the result type, - // bitcast it to be a vector type we can extract from. - if (VecTy->getElementType() != DestTy) { - VecTy = VectorType::get(DestTy, - VecTy->getPrimitiveSizeInBits() / DestWidth); - VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); - } - - unsigned Elt = ShAmt->getZExtValue() / DestWidth; - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); - } - } - return 0; +/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the +/// vector followed by extract element. The backend tends to handle bitcasts of +/// vectors better than bitcasts of scalars because vector registers are +/// usually not type-specific like scalar integer or scalar floating-point. +static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, + InstCombiner &IC, + const DataLayout &DL) { + // TODO: Create and use a pattern matcher for ExtractElementInst. + auto *ExtElt = dyn_cast(BitCast.getOperand(0)); + if (!ExtElt || !ExtElt->hasOneUse()) + return nullptr; + + // The bitcast must be to a vectorizable type, otherwise we can't make a new + // type to extract from. + Type *DestType = BitCast.getType(); + if (!VectorType::isValidElementType(DestType)) + return nullptr; + + unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements(); + auto *NewVecType = VectorType::get(DestType, NumElts); + auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(), + NewVecType, "bc"); + return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); } Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { @@ -1670,11 +1803,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); - // If the address spaces don't match, don't eliminate the bitcast, which is - // required for changing types. - if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) - return 0; - // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that @@ -1686,28 +1814,21 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. - Constant *ZeroUInt = - Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; while (SrcElTy != DstElTy && isa(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { - SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); + SrcElTy = cast(SrcElTy)->getTypeAtIndex(0U); ++NumZeros; } // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros+1, ZeroUInt); + SmallVector Idxs(NumZeros + 1, Builder->getInt32(0)); return GetElementPtrInst::CreateInBounds(Src, Idxs); } } - // Try to optimize int -> float bitcasts. - if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa(SrcTy)) - if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) - return I; - if (VectorType *DestVTy = dyn_cast(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); @@ -1724,7 +1845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { CastInst *SrcCast = cast(Src); if (BitCastInst *BCIn = dyn_cast(SrcCast->getOperand(0))) if (isa(BCIn->getOperand(0)->getType())) - if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), cast(DestTy), *this)) return I; } @@ -1732,17 +1853,28 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. - if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + if (Value *V = optimizeIntegerToVectorInsertions(CI, *this)) return ReplaceInstUsesWith(CI, V); } } if (VectorType *SrcVTy = dyn_cast(SrcTy)) { - if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); + if (SrcVTy->getNumElements() == 1) { + // If our destination is not a vector, then make this a straight + // scalar-scalar cast. + if (!DestTy->isVectorTy()) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + + // Otherwise, see if our source is an insert. If so, then use the scalar + // component directly. + if (InsertElementInst *IEI = + dyn_cast(CI.getOperand(0))) + return CastInst::Create(Instruction::BitCast, IEI->getOperand(1), + DestTy); } } @@ -1750,10 +1882,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. if (SVI->hasOneUse() && DestTy->isVectorTy() && - cast(DestTy)->getNumElements() == - SVI->getType()->getNumElements() && + DestTy->getVectorNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == - cast(SVI->getOperand(0)->getType())->getNumElements()) { + SVI->getOperand(0)->getType()->getVectorNumElements()) { BitCastInst *Tmp; // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow @@ -1771,7 +1902,33 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } + if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL)) + return I; + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); } + +Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { + // If the destination pointer element type is not the same as the source's + // first do a bitcast to the destination type, and then the addrspacecast. + // This allows the cast to be exposed to other transforms. + Value *Src = CI.getOperand(0); + PointerType *SrcTy = cast(Src->getType()->getScalarType()); + PointerType *DestTy = cast(CI.getType()->getScalarType()); + + Type *DestElemTy = DestTy->getElementType(); + if (SrcTy->getElementType() != DestElemTy) { + Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace()); + if (VectorType *VT = dyn_cast(CI.getType())) { + // Handle vectors of pointers. + MidTy = VectorType::get(MidTy, VT->getNumElements()); + } + + Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + return new AddrSpaceCastInst(NewBitCast, CI.getType()); + } + + return commonPointerCastTransforms(CI); +}