X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FInstructionCombining.cpp;h=b80d4d630afc2d8144174494251d42a19a1a4779;hb=1997473cf72957d0e70322e2fe6fe2ab141c58a6;hp=78fc0de2cdf8667ac1e538b07a6868e9c586b675;hpb=ba41783dbb1b8da52fac9859bd9d4770de7a82bd;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 78fc0de2cdf..b80d4d630af 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -76,6 +76,9 @@ namespace { TargetData *TD; bool MustPreserveLCSSA; public: + static char ID; // Pass identifcation, replacement for typeid + InstCombiner() : FunctionPass((intptr_t)&ID) {} + /// AddToWorkList - Add the specified instruction to the worklist if it /// isn't already in it. void AddToWorkList(Instruction *I) { @@ -193,9 +196,10 @@ namespace { BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); Instruction *commonIntCastTransforms(CastInst &CI); - Instruction *visitTrunc(CastInst &CI); - Instruction *visitZExt(CastInst &CI); - Instruction *visitSExt(CastInst &CI); + Instruction *commonPointerCastTransforms(CastInst &CI); + Instruction *visitTrunc(TruncInst &CI); + Instruction *visitZExt(ZExtInst &CI); + Instruction *visitSExt(SExtInst &CI); Instruction *visitFPTrunc(CastInst &CI); Instruction *visitFPExt(CastInst &CI); Instruction *visitFPToUI(CastInst &CI); @@ -204,7 +208,7 @@ namespace { Instruction *visitSIToFP(CastInst &CI); Instruction *visitPtrToInt(CastInst &CI); Instruction *visitIntToPtr(CastInst &CI); - Instruction *visitBitCast(CastInst &CI); + Instruction *visitBitCast(BitCastInst &CI); Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); Instruction *visitSelectInst(SelectInst &CI); @@ -350,12 +354,14 @@ namespace { bool isSub, Instruction &I); Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, bool Inside, Instruction &IB); - Instruction *PromoteCastOfAllocation(CastInst &CI, AllocationInst &AI); + Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI); Instruction *MatchBSwap(BinaryOperator &I); + bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); }; + char InstCombiner::ID = 0; RegisterPass X("instcombine", "Combine redundant instructions"); } @@ -1489,7 +1495,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, UndefElts |= 1ULL << IdxNo; break; } + case Instruction::BitCast: { + // Packed->packed casts only. + const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + uint64_t InputDemandedElts = 0; + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts & (1ULL << OutIdx)) + InputDemandedElts |= 1ULL << (OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts & (1ULL << InIdx/Ratio)) + InputDemandedElts |= 1ULL << InIdx; + } + + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2 & (1ULL << (OutIdx/Ratio))) + UndefElts |= 1ULL << OutIdx; + } else if (VWidth < InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if ((UndefElts2 & (1ULL << InIdx)) == 0) // Not undef? + UndefElts &= ~(1ULL << (InIdx/Ratio)); // Clear undef bit. + } + break; + } case Instruction::And: case Instruction::Or: case Instruction::Xor: @@ -2157,7 +2229,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // 0 - (X sdiv C) -> (X sdiv -C) if (Op1I->getOpcode() == Instruction::SDiv) if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isNullValue()) + if (CSI->isZero()) if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) return BinaryOperator::createSDiv(Op1I->getOperand(0), ConstantExpr::getNeg(DivRHS)); @@ -2201,7 +2273,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) { switch (pred) { case ICmpInst::ICMP_SLT: // True if LHS s< RHS and RHS == 0 - return RHS->isNullValue(); + return RHS->isZero(); case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 return RHS->isAllOnesValue(); @@ -2236,7 +2308,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::createMul(SI->getOperand(0), ConstantExpr::getShl(CI, ShOp)); - if (CI->isNullValue()) + if (CI->isZero()) return ReplaceInstUsesWith(I, Op1); // X * 0 == 0 if (CI->equalsInt(1)) // X * 1 == X return ReplaceInstUsesWith(I, Op0); @@ -4269,38 +4341,66 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { Value *Result = Constant::getNullValue(IntPtrTy); // Build a mask for high order bits. - uint64_t PtrSizeMask = ~0ULL >> (64-TD.getPointerSize()*8); + unsigned IntPtrWidth = TD.getPointerSize()*8; + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { Value *Op = GEP->getOperand(i); uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask; - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - if (Constant *OpC = dyn_cast(Op)) { - if (!OpC->isNullValue()) { - OpC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OpC, Scale); - if (Constant *RC = dyn_cast(Result)) - Result = ConstantExpr::getAdd(RC, Scale); - else { - // Emit an add instruction. + if (ConstantInt *OpC = dyn_cast(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + if (ConstantInt *RC = dyn_cast(Result)) + Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size)); + else Result = IC.InsertNewInstBefore( - BinaryOperator::createAdd(Result, Scale, - GEP->getName()+".offs"), I); - } + BinaryOperator::createAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"), I); + continue; } - } else { - // Convert to correct type. - Op = IC.InsertNewInstBefore(CastInst::createSExtOrBitCast(Op, IntPtrTy, - Op->getName()+".c"), I); - if (Size != 1) - // We'll let instcombine(mul) convert this to a shl if possible. + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + if (Constant *RC = dyn_cast(Result)) + Result = ConstantExpr::getAdd(RC, Scale); + else { + // Emit an add instruction. + Result = IC.InsertNewInstBefore( + BinaryOperator::createAdd(Result, Scale, + GEP->getName()+".offs"), I); + } + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) { + if (Constant *OpC = dyn_cast(Op)) + Op = ConstantExpr::getSExt(OpC, IntPtrTy); + else + Op = IC.InsertNewInstBefore(new SExtInst(Op, IntPtrTy, + Op->getName()+".c"), I); + } + if (Size != 1) { + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + if (Constant *OpC = dyn_cast(Op)) + Op = ConstantExpr::getMul(OpC, Scale); + else // We'll let instcombine(mul) convert this to a shl if possible. Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale, - GEP->getName()+".idx"), I); + GEP->getName()+".idx"), I); + } - // Emit an add instruction. + // Emit an add instruction. + if (isa(Op) && isa(Result)) + Result = ConstantExpr::getAdd(cast(Op), + cast(Result)); + else Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result, - GEP->getName()+".offs"), I); - } + GEP->getName()+".offs"), I); } return Result; } @@ -5457,8 +5557,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } else { // Not a ICMP_EQ/ICMP_NE - // If the LHS is a cast from an integral value of the same size, then - // since we know the RHS is a constant, try to simlify. + // If the LHS is a cast from an integral value of the same size, + // then since we know the RHS is a constant, try to simlify. if (CastInst *Cast = dyn_cast(LHSI)) { Value *CastOp = Cast->getOperand(0); const Type *SrcTy = CastOp->getType(); @@ -6018,10 +6118,9 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// PromoteCastOfAllocation - If we find a cast of an allocation instruction, /// try to eliminate the cast by moving the type information into the alloc. -Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI, +Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI) { - const PointerType *PTy = dyn_cast(CI.getType()); - if (!PTy) return 0; // Not casting the allocation to a pointer type. + const PointerType *PTy = cast(CI.getType()); // Remove any uses of AI that are dead. assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); @@ -6165,7 +6264,7 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved); + return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved); } } break; @@ -6259,32 +6358,6 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { } } - // If casting the result of a getelementptr instruction with no offset, turn - // this into a cast of the original pointer! - // - if (GetElementPtrInst *GEP = dyn_cast(Src)) { - bool AllZeroOperands = true; - for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) - if (!isa(GEP->getOperand(i)) || - !cast(GEP->getOperand(i))->isNullValue()) { - AllZeroOperands = false; - break; - } - if (AllZeroOperands) { - // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another - // pointer operand so the opcode doesn't need to change. - CI.setOperand(0, GEP->getOperand(0)); - return &CI; - } - } - - // If we are casting a malloc or alloca to a pointer to a type of the same - // size, rewrite the allocation instruction to allocate the "right" type. - if (AllocationInst *AI = dyn_cast(Src)) - if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) - return V; - // If we are casting a select then fold the cast into the select if (SelectInst *SI = dyn_cast(Src)) if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) @@ -6298,6 +6371,100 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return 0; } +/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) +Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + if (GetElementPtrInst *GEP = dyn_cast(Src)) { + // If casting the result of a getelementptr instruction with no offset, turn + // this into a cast of the original pointer! + if (GEP->hasAllZeroIndices()) { + // Changing the cast operand is usually not a good idea but it is safe + // here because the pointer operand is being replaced with another + // pointer operand so the opcode doesn't need to change. + AddToWorkList(GEP); + CI.setOperand(0, GEP->getOperand(0)); + return &CI; + } + + // If the GEP has a single use, and the base pointer is a bitcast, and the + // GEP computes a constant offset, see if we can convert these three + // instructions into fewer. This typically happens with unions and other + // non-type-safe code. + if (GEP->hasOneUse() && isa(GEP->getOperand(0))) { + if (GEP->hasAllConstantIndices()) { + // We are guaranteed to get a constant from EmitGEPOffset. + ConstantInt *OffsetV = cast(EmitGEPOffset(GEP, CI, *this)); + int64_t Offset = OffsetV->getSExtValue(); + + // Get the base pointer input of the bitcast, and the type it points to. + Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); + const Type *GEPIdxTy = + cast(OrigBase->getType())->getElementType(); + if (GEPIdxTy->isSized()) { + SmallVector NewIndices; + + // Start with the index over the outer type. + const Type *IntPtrTy = TD->getIntPtrType(); + int64_t TySize = TD->getTypeSize(GEPIdxTy); + int64_t FirstIdx = Offset/TySize; + Offset %= TySize; + + // Handle silly modulus not returning values values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + if (const StructType *STy = dyn_cast(GEPIdxTy)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt)); + + Offset -= SL->getElementOffset(Elt); + GEPIdxTy = STy->getElementType(Elt); + } else if (isa(GEPIdxTy) || isa(GEPIdxTy)) { + const SequentialType *STy = cast(GEPIdxTy); + uint64_t EltSize = TD->getTypeSize(STy->getElementType()); + NewIndices.push_back(ConstantInt::get(IntPtrTy, Offset/EltSize)); + Offset %= EltSize; + GEPIdxTy = STy->getElementType(); + } else { + // Otherwise, we can't index into this, bail out. + Offset = 0; + OrigBase = 0; + } + } + if (OrigBase) { + // If we were able to index down into an element, create the GEP + // and bitcast the result. This eliminates one bitcast, potentially + // two. + Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0], + NewIndices.size(), ""); + InsertNewInstBefore(NGEP, CI); + NGEP->takeName(GEP); + + if (isa(CI)) + return new BitCastInst(NGEP, CI.getType()); + assert(isa(CI)); + return new PtrToIntInst(NGEP, CI.getType()); + } + } + } + } + } + + return commonCastTransforms(CI); +} + + + /// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as /// integer types. This function implements the common transforms for all those /// cases. @@ -6471,7 +6638,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { return 0; } -Instruction *InstCombiner::visitTrunc(CastInst &CI) { +Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonIntCastTransforms(CI)) return Result; @@ -6528,7 +6695,7 @@ Instruction *InstCombiner::visitTrunc(CastInst &CI) { return 0; } -Instruction *InstCombiner::visitZExt(CastInst &CI) { +Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If one of the common conversion will work .. if (Instruction *Result = commonIntCastTransforms(CI)) return Result; @@ -6570,6 +6737,33 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) { // cast to integer to avoid the comparison. if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { const APInt &Op1CV = Op1C->getValue(); + + // zext (x x>>u31 true if signbit set. + // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){ + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getPrimitiveSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh, + In->getName()+".lobit"), + CI); + if (In->getType() != CI.getType()) + In = CastInst::createIntegerCast(In, CI.getType(), + false/*ZExt*/, "tmp", &CI); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + Constant *One = ConstantInt::get(In->getType(), 1); + In = InsertNewInstBefore(BinaryOperator::createXor(In, One, + In->getName()+".not"), + CI); + } + + return ReplaceInstUsesWith(CI, In); + } + + + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. // zext (X == 1) to i32 --> X iff X has only the low bit set. @@ -6626,13 +6820,44 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) { return 0; } -Instruction *InstCombiner::visitSExt(CastInst &CI) { +Instruction *InstCombiner::visitSExt(SExtInst &CI) { if (Instruction *I = commonIntCastTransforms(CI)) return I; - // (x ashr x, 31 - // (x >u 2147483647) ? -1 : 0 -> ashr x, 31 - + Value *Src = CI.getOperand(0); + + // sext (x ashr x, 31 -> all ones if signed + // sext (x >s -1) -> ashr x, 31 -> all ones if not signed + if (ICmpInst *ICI = dyn_cast(Src)) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // sext (x x>>s31 true if signbit set. + // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){ + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getPrimitiveSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh, + In->getName()+".lobit"), + CI); + if (In->getType() != CI.getType()) + In = CastInst::createIntegerCast(In, CI.getType(), + true/*SExt*/, "tmp", &CI); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) + In = InsertNewInstBefore(BinaryOperator::createNot(In, + In->getName()+".not"), CI); + + return ReplaceInstUsesWith(CI, In); + } + } + } + return 0; } @@ -6661,15 +6886,14 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) { } Instruction *InstCombiner::visitPtrToInt(CastInst &CI) { - return commonCastTransforms(CI); + return commonPointerCastTransforms(CI); } Instruction *InstCombiner::visitIntToPtr(CastInst &CI) { return commonCastTransforms(CI); } -Instruction *InstCombiner::visitBitCast(CastInst &CI) { - +Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); @@ -6679,6 +6903,9 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) { if (SrcTy->isInteger() && DestTy->isInteger()) { if (Instruction *Result = commonIntCastTransforms(CI)) return Result; + } else if (isa(SrcTy)) { + if (Instruction *I = commonPointerCastTransforms(CI)) + return I; } else { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -6690,28 +6917,33 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) { if (DestTy == Src->getType()) return ReplaceInstUsesWith(CI, Src); - // If the source and destination are pointers, and this cast is equivalent to - // a getelementptr X, 0, 0, 0... turn it into the appropriate getelementptr. - // This can enhance SROA and other transforms that want type-safe pointers. if (const PointerType *DstPTy = dyn_cast(DestTy)) { - if (const PointerType *SrcPTy = dyn_cast(SrcTy)) { - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); - - Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty); - unsigned NumZeros = 0; - while (SrcElTy != DstElTy && - isa(SrcElTy) && !isa(SrcElTy) && - SrcElTy->getNumContainedTypes() /* not "{}" */) { - SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); - ++NumZeros; - } + const PointerType *SrcPTy = cast(SrcTy); + const Type *DstElTy = DstPTy->getElementType(); + const Type *SrcElTy = SrcPTy->getElementType(); + + // If we are casting a malloc or alloca to a pointer to a type of the same + // size, rewrite the allocation instruction to allocate the "right" type. + if (AllocationInst *AI = dyn_cast(Src)) + if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) + return V; + + // If the source and destination are pointers, and this cast is equivalent to + // a getelementptr X, 0, 0, 0... turn it into the appropriate getelementptr. + // This can enhance SROA and other transforms that want type-safe pointers. + Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty); + unsigned NumZeros = 0; + while (SrcElTy != DstElTy && + isa(SrcElTy) && !isa(SrcElTy) && + SrcElTy->getNumContainedTypes() /* not "{}" */) { + SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); + ++NumZeros; + } - // If we found a path from the src to dest, create the getelementptr now. - if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros+1, ZeroUInt); - return new GetElementPtrInst(Src, &Idxs[0], Idxs.size()); - } + // If we found a path from the src to dest, create the getelementptr now. + if (SrcElTy == DstElTy) { + SmallVector Idxs(NumZeros+1, ZeroUInt); + return new GetElementPtrInst(Src, &Idxs[0], Idxs.size()); } } @@ -7178,10 +7410,7 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { if (isa(CI->getOperand(0)->getType())) return GetKnownAlignment(CI->getOperand(0), TD); return 0; - } else if (isa(V) || - (isa(V) && - cast(V)->getOpcode()==Instruction::GetElementPtr)) { - User *GEPI = cast(V); + } else if (User *GEPI = dyn_castGetElementPtr(V)) { unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD); if (BaseAlignment == 0) return 0; @@ -7360,7 +7589,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { for (unsigned i = 0; i != 16; ++i) { if (isa(Mask->getOperand(i))) continue; - unsigned Idx =cast(Mask->getOperand(i))->getZExtValue(); + unsigned Idx=cast(Mask->getOperand(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. if (ExtractedElts[Idx] == 0) { @@ -7979,7 +8208,7 @@ static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); - // Is it 'getelementptr %P, long 0' or 'getelementptr %P' + // Is it 'getelementptr %P, i32 0' or 'getelementptr %P' // If so, eliminate the noop. if (GEP.getNumOperands() == 1) return ReplaceInstUsesWith(GEP, PtrOp); @@ -7994,22 +8223,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEP.getNumOperands() == 2 && HasZeroPointerIndex) return ReplaceInstUsesWith(GEP, PtrOp); - // Keep track of whether all indices are zero constants integers. - bool AllZeroIndices = true; - // Eliminate unneeded casts for indices. bool MadeChange = false; gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) { - // Track whether this GEP has all zero indices, if so, it doesn't move the - // input pointer, it just changes its type. - if (AllZeroIndices) { - if (ConstantInt *CI = dyn_cast(GEP.getOperand(i))) - AllZeroIndices = CI->isNullValue(); - else - AllZeroIndices = false; - } if (isa(*GTI)) { if (CastInst *CI = dyn_cast(GEP.getOperand(i))) { if (CI->getOpcode() == Instruction::ZExt || @@ -8045,7 +8263,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If this GEP instruction doesn't move the pointer, and if the input operand // is a bitcast of another pointer, just replace the GEP with a bitcast of the // real input to the dest type. - if (AllZeroIndices && isa(GEP.getOperand(0))) + if (GEP.hasAllZeroIndices() && isa(GEP.getOperand(0))) return new BitCastInst(cast(GEP.getOperand(0))->getOperand(0), GEP.getType()); @@ -8309,13 +8527,6 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { Value *Op = FI.getOperand(0); - // Change free * (cast * X to *) into free * X - if (CastInst *CI = dyn_cast(Op)) - if (isa(CI->getOperand(0)->getType())) { - FI.setOperand(0, CI->getOperand(0)); - return &FI; - } - // free undef -> unreachable. if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. @@ -8323,11 +8534,33 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { UndefValue::get(PointerType::get(Type::Int1Ty)), &FI); return EraseInstFromFunction(FI); } - + // If we have 'free null' delete the instruction. This can happen in stl code // when lots of inlining happens. if (isa(Op)) return EraseInstFromFunction(FI); + + // Change free * (cast * X to *) into free * X + if (BitCastInst *CI = dyn_cast(Op)) { + FI.setOperand(0, CI->getOperand(0)); + return &FI; + } + + // Change free (gep X, 0,0,0,0) into free(X) + if (GetElementPtrInst *GEPI = dyn_cast(Op)) { + if (GEPI->hasAllZeroIndices()) { + AddToWorkList(GEPI); + FI.setOperand(0, GEPI->getOperand(0)); + return &FI; + } + } + + // Change free(malloc) into nothing, if the malloc has a single use. + if (MallocInst *MI = dyn_cast(Op)) + if (MI->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*MI); + } return 0; } @@ -8430,8 +8663,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } if (GetElementPtrInst *GEPI = dyn_cast(Op)) - if (isa(GEPI->getOperand(0)) || - isa(GEPI->getOperand(0))) { + if (isa(GEPI->getOperand(0))) { // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the @@ -8679,66 +8911,118 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // ends with an unconditional branch, try to move it to the successor block. BBI = &SI; ++BBI; if (BranchInst *BI = dyn_cast(BBI)) - if (BI->isUnconditional()) { - // Check to see if the successor block has exactly two incoming edges. If - // so, see if the other predecessor contains a store to the same location. - // if so, insert a PHI node (if needed) and move the stores down. - BasicBlock *Dest = BI->getSuccessor(0); - - pred_iterator PI = pred_begin(Dest); - BasicBlock *Other = 0; - if (*PI != BI->getParent()) - Other = *PI; - ++PI; - if (PI != pred_end(Dest)) { - if (*PI != BI->getParent()) - if (Other) - Other = 0; - else - Other = *PI; - if (++PI != pred_end(Dest)) - Other = 0; - } - if (Other) { // If only one other pred... - BBI = Other->getTerminator(); - // Make sure this other block ends in an unconditional branch and that - // there is an instruction before the branch. - if (isa(BBI) && cast(BBI)->isUnconditional() && - BBI != Other->begin()) { - --BBI; - StoreInst *OtherStore = dyn_cast(BBI); - - // If this instruction is a store to the same location. - if (OtherStore && OtherStore->getOperand(1) == SI.getOperand(1)) { - // Okay, we know we can perform this transformation. Insert a PHI - // node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = new PHINode(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), Other); - MergedVal = InsertNewInstBefore(PN, Dest->front()); - } - - // Advance to a place where it is safe to insert the new store and - // insert it. - BBI = Dest->begin(); - while (isa(BBI)) ++BBI; - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile()), *BBI); - - // Nuke the old stores. - EraseInstFromFunction(SI); - EraseInstFromFunction(*OtherStore); - ++NumCombined; - return 0; - } - } + if (BI->isUnconditional()) + if (SimplifyStoreAtEndOfBlock(SI)) + return 0; // xform done! + + return 0; +} + +/// SimplifyStoreAtEndOfBlock - Turn things like: +/// if () { *P = v1; } else { *P = v2 } +/// into a phi node with a store in the successor. +/// +/// Simplify things like: +/// *P = v1; if () { *P = v2; } +/// into a phi node with a store in the successor. +/// +bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { + BasicBlock *StoreBB = SI.getParent(); + + // Check to see if the successor block has exactly two incoming edges. If + // so, see if the other predecessor contains a store to the same location. + // if so, insert a PHI node (if needed) and move the stores down. + BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); + + // Determine whether Dest has exactly two predecessors and, if so, compute + // the other predecessor. + pred_iterator PI = pred_begin(DestBB); + BasicBlock *OtherBB = 0; + if (*PI != StoreBB) + OtherBB = *PI; + ++PI; + if (PI == pred_end(DestBB)) + return false; + + if (*PI != StoreBB) { + if (OtherBB) + return false; + OtherBB = *PI; + } + if (++PI != pred_end(DestBB)) + return false; + + + // Verify that the other block ends in a branch and is not otherwise empty. + BasicBlock::iterator BBI = OtherBB->getTerminator(); + BranchInst *OtherBr = dyn_cast(BBI); + if (!OtherBr || BBI == OtherBB->begin()) + return false; + + // If the other block ends in an unconditional branch, check for the 'if then + // else' case. there is an instruction before the branch. + StoreInst *OtherStore = 0; + if (OtherBr->isUnconditional()) { + // If this isn't a store, or isn't a store to the same location, bail out. + --BBI; + OtherStore = dyn_cast(BBI); + if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1)) + return false; + } else { + // Otherwise, the other block ended with a conditional branch. If one of the + // destinations is StoreBB, then we have the if/then case. + if (OtherBr->getSuccessor(0) != StoreBB && + OtherBr->getSuccessor(1) != StoreBB) + return false; + + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an + // if/then triangle. See if there is a store to the same ptr as SI that lives + // in OtherBB. + for (;; --BBI) { + // Check to see if we find the matching store. + if ((OtherStore = dyn_cast(BBI))) { + if (OtherStore->getOperand(1) != SI.getOperand(1)) + return false; + break; } + // If we find something that may be using the stored value, or if we run out + // of instructions, we can't do the xform. + if (isa(BBI) || BBI->mayWriteToMemory() || + BBI == OtherBB->begin()) + return false; } + + // In order to eliminate the store in OtherBr, we have to + // make sure nothing reads the stored value in StoreBB. + for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { + // FIXME: This should really be AA driven. + if (isa(I) || I->mayWriteToMemory()) + return false; + } + } - return 0; + // Insert a PHI node now if we need it. + Value *MergedVal = OtherStore->getOperand(0); + if (MergedVal != SI.getOperand(0)) { + PHINode *PN = new PHINode(MergedVal->getType(), "storemerge"); + PN->reserveOperandSpace(2); + PN->addIncoming(SI.getOperand(0), SI.getParent()); + PN->addIncoming(OtherStore->getOperand(0), OtherBB); + MergedVal = InsertNewInstBefore(PN, DestBB->front()); + } + + // Advance to a place where it is safe to insert the new store and + // insert it. + BBI = DestBB->begin(); + while (isa(BBI)) ++BBI; + InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), + OtherStore->isVolatile()), *BBI); + + // Nuke the old stores. + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); + ++NumCombined; + return true; } @@ -8970,6 +9254,17 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) return ReplaceInstUsesWith(EI, Elt); + + // If the this extractelement is directly using a bitcast from a vector of + // the same number of elements, see if we can find the source element from + // it. In this case, we will end up needing to bitcast the scalars. + if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { + if (const VectorType *VT = + dyn_cast(BCI->getOperand(0)->getType())) + if (VT->getNumElements() == VectorWidth) + if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + return new BitCastInst(Elt, EI.getType()); + } } if (Instruction *I = dyn_cast(EI.getOperand(0))) { @@ -9181,7 +9476,8 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (isa(EI->getOperand(1)) && isa(IdxOp) && EI->getOperand(0)->getType() == IE.getType()) { unsigned NumVectorElts = IE.getType()->getNumElements(); - unsigned ExtractedIdx=cast(EI->getOperand(1))->getZExtValue(); + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); if (ExtractedIdx >= NumVectorElts) // Out of range extract.