Tighten known bits for ctpop based on zero input bits

[oota-llvm.git] / lib / Analysis / ValueTracking.cpp
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp

index 275e35992fb09072d74fd51c1f4056f7b1fe3d8b..d2a1f8737cda4d2805adb2b337ba24d1a3af6dc6 100644 (file)
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -43,7 +43,7 @@ const unsigned MaxDepth = 6;
  
  /// Enable an experimental feature to leverage information about dominating
  /// conditions to compute known bits.  The individual options below control how
-/// hard we search.  The defaults are choosen to be fairly aggressive.  If you
+/// hard we search.  The defaults are chosen to be fairly aggressive.  If you
  /// run into compile time problems when testing, scale them back and report
  /// your findings.
  static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
@@ -58,12 +58,12 @@ static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
  /// conditions?
  static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
                                                     cl::Hidden,
-                                                   cl::init(20000));
+                                                   cl::init(20));
  
  // Controls the number of uses of the value searched for possible
  // dominating comparisons.
  static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
-                                              cl::Hidden, cl::init(2000));
+                                              cl::Hidden, cl::init(20));
  
  // If true, don't consider only compares whose only use is a branch.
  static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
@@ -185,6 +185,14 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
    return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
  }
  
+bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+                              AssumptionCache *AC, const Instruction *CxtI,
+                              const DominatorTree *DT) {
+  bool NonNegative, Negative;
+  ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
+  return NonNegative;
+}
+
  static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
                                unsigned Depth, const Query &Q);
  
@@ -320,7 +328,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
    }
  
    // If low bits are zero in either operand, output low known-0 bits.
-  // Also compute a conserative estimate for high known-0 bits.
+  // Also compute a conservative estimate for high known-0 bits.
    // More trickiness is possible, but this is sufficient for the
    // interesting case of alignment computation.
    KnownOne.clearAllBits();
@@ -447,7 +455,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
        for (BasicBlock::const_iterator I =
               std::next(BasicBlock::const_iterator(Q.CxtI)),
                                        IE(Inv); I != IE; ++I)
-        if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+        if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
            return false;
  
        return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -464,14 +472,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
      // of the block); the common case is that the assume will come first.
      for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
           IE = Inv->getParent()->end(); I != IE; ++I)
-      if (I == Q.CxtI)
+      if (&*I == Q.CxtI)
          return true;
  
      // The context must come first...
      for (BasicBlock::const_iterator I =
             std::next(BasicBlock::const_iterator(Q.CxtI)),
                                      IE(Inv); I != IE; ++I)
-      if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+      if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
          return false;
  
      return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -601,6 +609,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
    if (!Q.DT || !Q.CxtI)
      return;
    Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+  // The context instruction might be in a statically unreachable block.  If
+  // so, asking dominator queries may yield suprising results.  (e.g. the block
+  // may not have a dom tree node)
+  if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
+    return;
  
    // Avoid useless work
    if (auto VI = dyn_cast<Instruction>(V))
@@ -647,7 +660,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
      // instruction.  Finding a condition where one path dominates the context
      // isn't enough because both the true and false cases could merge before
      // the context instruction we're actually interested in.  Instead, we need
-    // to ensure that the taken *edge* dominates the context instruction.
+    // to ensure that the taken *edge* dominates the context instruction.  We
+    // know that the edge must be reachable since we started from a reachable
+    // block.
      BasicBlock *BB0 = BI->getSuccessor(0);
      BasicBlockEdge Edge(BI->getParent(), BB0);
      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
@@ -1050,7 +1065,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
    }
    case Instruction::BitCast: {
      Type *SrcTy = I->getOperand(0)->getType();
-    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
+         SrcTy->isFloatingPointTy()) &&
          // TODO: For now, not handling conversions like:
          // (bitcast i64 %x to <2 x i32>)
          !I->getType()->isVectorTy()) {
@@ -1343,6 +1359,12 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
        switch (II->getIntrinsicID()) {
        default: break;
+      case Intrinsic::bswap:
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+                         Depth + 1, Q);
+        KnownZero |= KnownZero2.byteSwap();
+        KnownOne |= KnownOne2.byteSwap();
+        break;
        case Intrinsic::ctlz:
        case Intrinsic::cttz: {
          unsigned LowBits = Log2_32(BitWidth)+1;
@@ -1353,8 +1375,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
          break;
        }
        case Intrinsic::ctpop: {
-        unsigned LowBits = Log2_32(BitWidth)+1;
-        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+                         Depth + 1, Q);
+        // We can bound the space the count needs.  Also, bits known to be zero
+        // can't contribute to the population.
+        unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
+        unsigned LeadingZeros =
+          APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
+        assert(LeadingZeros >= 0 && LeadingZeros <= BitWidth);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
+        KnownOne &= ~KnownZero;
+        // TODO: we could bound KnownOne using the lower bound on the number
+        // of bits which might be set provided by popcnt KnownOne2.
+        break;
+      }
+      case Intrinsic::fabs: {
+        Type *Ty = II->getType();
+        APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
+        KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
          break;
        }
        case Intrinsic::x86_sse42_crc32_64_64:
@@ -1394,6 +1432,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
    }
  }
  
+static unsigned getAlignment(const Value *V, const DataLayout &DL) {
+  unsigned Align = 0;
+  if (auto *GO = dyn_cast<GlobalObject>(V)) {
+    Align = GO->getAlignment();
+    if (Align == 0) {
+      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+        Type *ObjectType = GVar->getType()->getElementType();
+        if (ObjectType->isSized()) {
+          // If the object is defined in the current Module, we'll be giving
+          // it the preferred alignment. Otherwise, we have to assume that it
+          // may only have the minimum ABI alignment.
+          if (GVar->isStrongDefinitionForLinker())
+            Align = DL.getPreferredAlignment(GVar);
+          else
+            Align = DL.getABITypeAlignment(ObjectType);
+        }
+      }
+    }
+  } else if (const Argument *A = dyn_cast<Argument>(V)) {
+    Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
+
+    if (!Align && A->hasStructRetAttr()) {
+      // An sret parameter has at least the ABI alignment of the return type.
+      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+      if (EltTy->isSized())
+        Align = DL.getABITypeAlignment(EltTy);
+    }
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    Align = AI->getAlignment();
+  else if (auto CS = ImmutableCallSite(V))
+    Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex);
+  else if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+    if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
+      ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+      Align = CI->getLimitedValue();
+    }
+
+  return Align;
+}
+
  /// Determine which bits of V are known to be either zero or one and return
  /// them in the KnownZero/KnownOne bit sets.
  ///
@@ -1416,8 +1494,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
    unsigned BitWidth = KnownZero.getBitWidth();
  
    assert((V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->isFPOrFPVectorTy() ||
            V->getType()->getScalarType()->isPointerTy()) &&
-         "Not integer or pointer type!");
+         "Not integer, floating point, or pointer type!");
    assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
           (!V->getType()->isIntOrIntVectorTy() ||
            V->getType()->getScalarSizeInBits() == BitWidth) &&
@@ -1454,59 +1533,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
      return;
    }
  
-  // The address of an aligned GlobalValue has trailing zeros.
-  if (auto *GO = dyn_cast<GlobalObject>(V)) {
-    unsigned Align = GO->getAlignment();
-    if (Align == 0) {
-      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
-        Type *ObjectType = GVar->getType()->getElementType();
-        if (ObjectType->isSized()) {
-          // If the object is defined in the current Module, we'll be giving
-          // it the preferred alignment. Otherwise, we have to assume that it
-          // may only have the minimum ABI alignment.
-          if (GVar->isStrongDefinitionForLinker())
-            Align = DL.getPreferredAlignment(GVar);
-          else
-            Align = DL.getABITypeAlignment(ObjectType);
-        }
-      }
-    }
-    if (Align > 0)
-      KnownZero = APInt::getLowBitsSet(BitWidth,
-                                       countTrailingZeros(Align));
-    else
-      KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-    return;
-  }
-
-  if (Argument *A = dyn_cast<Argument>(V)) {
-    unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
-
-    if (!Align && A->hasStructRetAttr()) {
-      // An sret parameter has at least the ABI alignment of the return type.
-      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
-      if (EltTy->isSized())
-        Align = DL.getABITypeAlignment(EltTy);
-    }
-
-    if (Align)
-      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
-    else
-      KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-
-    // Don't give up yet... there might be an assumption that provides more
-    // information...
-    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
-
-    // Or a dominating condition for that matter
-    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
-      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
-                                              Depth, Q);
-    return;
-  }
-
    // Start out not knowing anything.
    KnownZero.clearAllBits(); KnownOne.clearAllBits();
  
@@ -1525,6 +1551,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
  
    if (Operator *I = dyn_cast<Operator>(V))
      computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q);
+
+  // Aligned pointers have trailing zeros - refine KnownZero set
+  if (V->getType()->isPointerTy()) {
+    unsigned Align = getAlignment(V, DL);
+    if (Align)
+      KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+  }
+
    // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
    // strictly refines KnownZero and KnownOne. Therefore, we run them after
    // computeKnownBitsFromOperator.
@@ -1812,6 +1846,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
      ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
      if (XKnownNegative)
        return true;
+
+    // If the shifter operand is a constant, and all of the bits shifted
+    // out are known to be zero, and X is known non-zero then at least one
+    // non-zero bit must remain.
+    if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+      
+      auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
+      // Is there a known one in the portion not shifted out?
+      if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+        return true;
+      // Are all the bits to be shifted out known zero?
+      if (KnownZero.countTrailingOnes() >= ShiftVal)
+        return isKnownNonZero(X, DL, Depth, Q);
+    }
    }
    // div exact can only produce a zero if the dividend is zero.
    else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
@@ -1871,6 +1922,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
          isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
        return true;
    }
+  // PHI
+  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // Try and detect a recurrence that monotonically increases from a
+    // starting value, as these are common as induction variables.
+    if (PN->getNumIncomingValues() == 2) {
+      Value *Start = PN->getIncomingValue(0);
+      Value *Induction = PN->getIncomingValue(1);
+      if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
+        std::swap(Start, Induction);
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
+        if (!C->isZero() && !C->isNegative()) {
+          ConstantInt *X;
+          if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+               match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
+              !X->isNegative())
+            return true;
+        }
+      }
+    }
+  }
  
    if (!BitWidth) return false;
    APInt KnownZero(BitWidth, 0);
@@ -2545,7 +2616,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
        }
  
        // This insert value inserts something else than what we are looking for.
-      // See if the (aggregrate) value inserted into has the value we are
+      // See if the (aggregate) value inserted into has the value we are
        // looking for, then.
        if (*req_idx != *i)
          return FindInsertedValue(I->getAggregateOperand(), idx_range,
@@ -2560,7 +2631,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
    }
  
    if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
-    // If we're extracting a value from an aggregrate that was extracted from
+    // If we're extracting a value from an aggregate that was extracted from
      // something else, we can extract from that something else directly instead.
      // However, we will need to chain I's indices with the requested indices.
  
@@ -2935,20 +3006,38 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
    return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
  }
  
-/// Return true if Value is always a dereferenceable pointer.
-///
+static bool isAligned(const Value *Base, APInt Offset, unsigned Align,
+                      const DataLayout &DL) {
+  APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL));
+
+  if (!BaseAlign) {
+    Type *Ty = Base->getType()->getPointerElementType();
+    BaseAlign = DL.getABITypeAlignment(Ty);
+  }
+
+  APInt Alignment(Offset.getBitWidth(), Align);
+
+  assert(Alignment.isPowerOf2() && "must be a power of 2!");
+  return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+}
+
+static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
+  APInt Offset(DL.getTypeStoreSizeInBits(Base->getType()), 0);
+  return isAligned(Base, Offset, Align, DL);
+}
+
  /// Test if V is always a pointer to allocated and suitably aligned memory for
  /// a simple load or store.
-static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
-                                     const Instruction *CtxI,
-                                     const DominatorTree *DT,
-                                     const TargetLibraryInfo *TLI,
-                                     SmallPtrSetImpl<const Value *> &Visited) {
+static bool isDereferenceableAndAlignedPointer(
+    const Value *V, unsigned Align, const DataLayout &DL,
+    const Instruction *CtxI, const DominatorTree *DT,
+    const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) {
    // Note that it is not safe to speculate into a malloc'd region because
    // malloc may return null.
  
-  // These are obviously ok.
-  if (isa<AllocaInst>(V)) return true;
+  // These are obviously ok if aligned.
+  if (isa<AllocaInst>(V))
+    return isAligned(V, Align, DL);
  
    // It's not always safe to follow a bitcast, for example:
    //   bitcast i8* (alloca i8) to i32*
@@ -2963,21 +3052,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
      if (STy->isSized() && DTy->isSized() &&
          (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
          (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
-      return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
-                                      DT, TLI, Visited);
+      return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL,
+                                                CtxI, DT, TLI, Visited);
    }
  
    // Global variables which can't collapse to null are ok.
    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return !GV->hasExternalWeakLinkage();
+    if (!GV->hasExternalWeakLinkage())
+      return isAligned(V, Align, DL);
  
    // byval arguments are okay.
    if (const Argument *A = dyn_cast<Argument>(V))
      if (A->hasByValAttr())
-      return true;
-    
+      return isAligned(V, Align, DL);
+
    if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
-    return true;
+    return isAligned(V, Align, DL);
  
    // For GEPs, determine if the indexing lands within the allocated object.
    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -2985,61 +3075,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
      Type *Ty = VTy->getPointerElementType();
      const Value *Base = GEP->getPointerOperand();
  
-    // Conservatively require that the base pointer be fully dereferenceable.
+    // Conservatively require that the base pointer be fully dereferenceable
+    // and aligned.
      if (!Visited.insert(Base).second)
        return false;
-    if (!isDereferenceablePointer(Base, DL, CtxI,
-                                  DT, TLI, Visited))
+    if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI,
+                                            Visited))
        return false;
-    
+
      APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
      if (!GEP->accumulateConstantOffset(DL, Offset))
        return false;
-    
-    // Check if the load is within the bounds of the underlying object.
+
+    // Check if the load is within the bounds of the underlying object
+    // and offset is aligned.
      uint64_t LoadSize = DL.getTypeStoreSize(Ty);
      Type *BaseType = Base->getType()->getPointerElementType();
-    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType));
+    assert(isPowerOf2_32(Align) && "must be a power of 2!");
+    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && 
+           !(Offset & APInt(Offset.getBitWidth(), Align-1));
    }
  
    // For gc.relocate, look through relocations
    if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
      if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
        GCRelocateOperands RelocateInst(I);
-      return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
-                                      DT, TLI, Visited);
+      return isDereferenceableAndAlignedPointer(
+          RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
      }
  
    if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
-    return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
-                                    DT, TLI, Visited);
+    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
+                                              CtxI, DT, TLI, Visited);
  
    // If we don't know, assume the worst.
    return false;
  }
  
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
-                                    const Instruction *CtxI,
-                                    const DominatorTree *DT,
-                                    const TargetLibraryInfo *TLI) {
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+                                              const DataLayout &DL,
+                                              const Instruction *CtxI,
+                                              const DominatorTree *DT,
+                                              const TargetLibraryInfo *TLI) {
    // When dereferenceability information is provided by a dereferenceable
    // attribute, we know exactly how many bytes are dereferenceable. If we can
    // determine the exact offset to the attributed variable, we can use that
    // information here.
    Type *VTy = V->getType();
    Type *Ty = VTy->getPointerElementType();
+
+  // Require ABI alignment for loads without alignment specification
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(Ty);
+
    if (Ty->isSized()) {
      APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
      const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-    
+
      if (Offset.isNonNegative())
-      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
-                                         CtxI, DT, TLI))
+      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) &&
+          isAligned(BV, Offset, Align, DL))
          return true;
    }
  
    SmallPtrSet<const Value *, 32> Visited;
-  return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
+  return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI,
+                                              Visited);
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+                                    const Instruction *CtxI,
+                                    const DominatorTree *DT,
+                                    const TargetLibraryInfo *TLI) {
+  return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI);
  }
  
  bool llvm::isSafeToSpeculativelyExecute(const Value *V,
@@ -3089,10 +3197,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
      const LoadInst *LI = cast<LoadInst>(Inst);
      if (!LI->isUnordered() ||
          // Speculative load may create a race that did not exist in the source.
-        LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeThread) ||
+        // Speculative load may load data from dirty regions.
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeAddress))
        return false;
      const DataLayout &DL = LI->getModule()->getDataLayout();
-    return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
+    return isDereferenceableAndAlignedPointer(
+        LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI);
    }
    case Instruction::Call: {
      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -3155,6 +3268,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
    case Instruction::CatchEndPad:
    case Instruction::CatchRet:
    case Instruction::CleanupPad:
+  case Instruction::CleanupEndPad:
    case Instruction::CleanupRet:
    case Instruction::TerminatePad:
      return false; // Misc instructions which have effects
@@ -3167,6 +3281,8 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
  
  /// Return true if we know that the specified value is never null.
  bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
+  assert(V->getType()->isPointerTy() && "V must be pointer type");
+
    // Alloca never returns null, malloc might.
    if (isa<AllocaInst>(V)) return true;
  
@@ -3174,9 +3290,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
    if (const Argument *A = dyn_cast<Argument>(V))
      return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
  
-  // Global values are not null unless extern weak.
+  // A global variable in address space 0 is non null unless extern weak.
+  // Other address spaces may have null as a valid address for a global,
+  // so we can't assume anything.
    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return !GV->hasExternalWeakLinkage();
+    return !GV->hasExternalWeakLinkage() &&
+           GV->getType()->getAddressSpace() == 0;
  
    // A Load tagged w/nonnull metadata is never null. 
    if (const LoadInst *LI = dyn_cast<LoadInst>(V))
@@ -3196,6 +3315,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
  static bool isKnownNonNullFromDominatingCondition(const Value *V,
                                                    const Instruction *CtxI,
                                                    const DominatorTree *DT) {
+  assert(V->getType()->isPointerTy() && "V must be pointer type");
+
    unsigned NumUsesExplored = 0;
    for (auto U : V->users()) {
      // Avoid massive lists
@@ -3326,6 +3447,67 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
    return OverflowResult::MayOverflow;
  }
  
+static OverflowResult computeOverflowForSignedAdd(
+    Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+  if (Add && Add->hasNoSignedWrap()) {
+    return OverflowResult::NeverOverflows;
+  }
+
+  bool LHSKnownNonNegative, LHSKnownNegative;
+  bool RHSKnownNonNegative, RHSKnownNegative;
+  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+
+  if ((LHSKnownNonNegative && RHSKnownNegative) ||
+      (LHSKnownNegative && RHSKnownNonNegative)) {
+    // The sign bits are opposite: this CANNOT overflow.
+    return OverflowResult::NeverOverflows;
+  }
+
+  // The remaining code needs Add to be available. Early returns if not so.
+  if (!Add)
+    return OverflowResult::MayOverflow;
+
+  // If the sign of Add is the same as at least one of the operands, this add
+  // CANNOT overflow. This is particularly useful when the sum is
+  // @llvm.assume'ed non-negative rather than proved so from analyzing its
+  // operands.
+  bool LHSOrRHSKnownNonNegative =
+      (LHSKnownNonNegative || RHSKnownNonNegative);
+  bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+  if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
+    bool AddKnownNonNegative, AddKnownNegative;
+    ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
+                   /*Depth=*/0, AC, CxtI, DT);
+    if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
+        (AddKnownNegative && LHSOrRHSKnownNegative)) {
+      return OverflowResult::NeverOverflows;
+    }
+  }
+
+  return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+                                                 const DataLayout &DL,
+                                                 AssumptionCache *AC,
+                                                 const Instruction *CxtI,
+                                                 const DominatorTree *DT) {
+  return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
+                                       Add, DL, AC, CxtI, DT);
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+                                                 const DataLayout &DL,
+                                                 AssumptionCache *AC,
+                                                 const Instruction *CxtI,
+                                                 const DominatorTree *DT) {
+  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+}
+
  bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
    // FIXME: This conservative implementation can be relaxed. E.g. most
    // atomic operations are guaranteed to terminate on most platforms
@@ -3467,16 +3649,17 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
    SmallSet<const Value *, 16> YieldsPoison;
    YieldsPoison.insert(PoisonI);
  
-  for (const Instruction *I = PoisonI, *E = BB->end(); I != E;
-       I = I->getNextNode()) {
-    if (I != PoisonI) {
-      const Value *NotPoison = getGuaranteedNonFullPoisonOp(I);
+  for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end();
+       I != E; ++I) {
+    if (&*I != PoisonI) {
+      const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I);
        if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
-      if (!isGuaranteedToTransferExecutionToSuccessor(I)) return false;
+      if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+        return false;
      }
  
      // Mark poison that propagates from I through uses of I.
-    if (YieldsPoison.count(I)) {
+    if (YieldsPoison.count(&*I)) {
        for (const User *User : I->users()) {
          const Instruction *UserI = cast<Instruction>(User);
          if (UserI->getParent() == BB && propagatesFullPoison(UserI))
@@ -3487,40 +3670,116 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
    return false;
  }
  
-static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
+static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+  if (FMF.noNaNs())
+    return true;
+
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isNaN();
+  return false;
+}
+
+static bool isKnownNonZero(Value *V) {
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isZero();
+  return false;
+}
+
+static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
+                                              FastMathFlags FMF,
                                                Value *CmpLHS, Value *CmpRHS,
                                                Value *TrueVal, Value *FalseVal,
                                                Value *&LHS, Value *&RHS) {
    LHS = CmpLHS;
    RHS = CmpRHS;
  
-  // (icmp X, Y) ? X : Y
-  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
-    switch (Pred) {
-    default: return SPF_UNKNOWN; // Equality.
-    case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMAX;
-    case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMAX;
-    case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMIN;
-    case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMIN;
+  // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may
+  // return inconsistent results between implementations.
+  //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+  //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+  // Therefore we behave conservatively and only proceed if at least one of the
+  // operands is known to not be zero, or if we don't care about signed zeroes.
+  switch (Pred) {
+  default: break;
+  case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
+    if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+        !isKnownNonZero(CmpRHS))
+      return {SPF_UNKNOWN, SPNB_NA, false};
+  }
+
+  SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
+  bool Ordered = false;
+
+  // When given one NaN and one non-NaN input:
+  //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
+  //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
+  //     ordered comparison fails), which could be NaN or non-NaN.
+  // so here we discover exactly what NaN behavior is required/accepted.
+  if (CmpInst::isFPPredicate(Pred)) {
+    bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
+    bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
+
+    if (LHSSafe && RHSSafe) {
+      // Both operands are known non-NaN.
+      NaNBehavior = SPNB_RETURNS_ANY;
+    } else if (CmpInst::isOrdered(Pred)) {
+      // An ordered comparison will return false when given a NaN, so it
+      // returns the RHS.
+      Ordered = true;
+      if (LHSSafe)
+        // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
+        NaNBehavior = SPNB_RETURNS_NAN;
+      else if (RHSSafe)
+        NaNBehavior = SPNB_RETURNS_OTHER;
+      else
+        // Completely unsafe.
+        return {SPF_UNKNOWN, SPNB_NA, false};
+    } else {
+      Ordered = false;
+      // An unordered comparison will return true when given a NaN, so it
+      // returns the LHS.
+      if (LHSSafe)
+        // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
+        NaNBehavior = SPNB_RETURNS_OTHER;
+      else if (RHSSafe)
+        NaNBehavior = SPNB_RETURNS_NAN;
+      else
+        // Completely unsafe.
+        return {SPF_UNKNOWN, SPNB_NA, false};
      }
    }
  
-  // (icmp X, Y) ? Y : X
    if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+    std::swap(CmpLHS, CmpRHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+    if (NaNBehavior == SPNB_RETURNS_NAN)
+      NaNBehavior = SPNB_RETURNS_OTHER;
+    else if (NaNBehavior == SPNB_RETURNS_OTHER)
+      NaNBehavior = SPNB_RETURNS_NAN;
+    Ordered = !Ordered;
+  }
+
+  // ([if]cmp X, Y) ? X : Y
+  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
      switch (Pred) {
-    default: return SPF_UNKNOWN; // Equality.
+    default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
      case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMIN;
+    case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
      case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMIN;
+    case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
      case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMAX;
+    case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
      case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMAX;
+    case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
+    case FCmpInst::FCMP_UGT:
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_OGT:
+    case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
+    case FCmpInst::FCMP_ULT:
+    case FCmpInst::FCMP_ULE:
+    case FCmpInst::FCMP_OLT:
+    case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
      }
    }
  
@@ -3531,13 +3790,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
        // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
        // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
        if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
-        return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+        return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
        }
  
        // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
        // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
        if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
-        return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+        return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
        }
      }
      
@@ -3548,24 +3807,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
             match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
          LHS = TrueVal;
          RHS = FalseVal;
-        return SPF_SMIN;
+        return {SPF_SMIN, SPNB_NA, false};
        }
      }
    }
  
    // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
  
-  return SPF_UNKNOWN;
+  return {SPF_UNKNOWN, SPNB_NA, false};
  }
  
-static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
-                                 Instruction::CastOps *CastOp) {
+static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
+                              Instruction::CastOps *CastOp) {
    CastInst *CI = dyn_cast<CastInst>(V1);
    Constant *C = dyn_cast<Constant>(V2);
-  if (!CI || !C)
+  CastInst *CI2 = dyn_cast<CastInst>(V2);
+  if (!CI)
      return nullptr;
    *CastOp = CI->getOpcode();
  
+  if (CI2) {
+    // If V1 and V2 are both the same cast from the same type, we can look
+    // through V1.
+    if (CI2->getOpcode() == CI->getOpcode() &&
+        CI2->getSrcTy() == CI->getSrcTy())
+      return CI2->getOperand(0);
+    return nullptr;
+  } else if (!C) {
+    return nullptr;
+  }
+
    if (isa<SExtInst>(CI) && CmpI->isSigned()) {
      Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
      // This is only valid if the truncated value can be sign-extended
@@ -3580,39 +3851,60 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
    if (isa<TruncInst>(CI))
      return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
  
+  if (isa<FPToUIInst>(CI))
+    return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
+
+  if (isa<FPToSIInst>(CI))
+    return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
+
+  if (isa<UIToFPInst>(CI))
+    return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+
+  if (isa<SIToFPInst>(CI))
+    return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+
+  if (isa<FPTruncInst>(CI))
+    return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
+
+  if (isa<FPExtInst>(CI))
+    return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
+
    return nullptr;
  }
  
-SelectPatternFlavor llvm::matchSelectPattern(Value *V,
+SelectPatternResult llvm::matchSelectPattern(Value *V,
                                               Value *&LHS, Value *&RHS,
                                               Instruction::CastOps *CastOp) {
    SelectInst *SI = dyn_cast<SelectInst>(V);
-  if (!SI) return SPF_UNKNOWN;
+  if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
  
-  ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition());
-  if (!CmpI) return SPF_UNKNOWN;
+  CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
+  if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
  
-  ICmpInst::Predicate Pred = CmpI->getPredicate();
+  CmpInst::Predicate Pred = CmpI->getPredicate();
    Value *CmpLHS = CmpI->getOperand(0);
    Value *CmpRHS = CmpI->getOperand(1);
    Value *TrueVal = SI->getTrueValue();
    Value *FalseVal = SI->getFalseValue();
+  FastMathFlags FMF;
+  if (isa<FPMathOperator>(CmpI))
+    FMF = CmpI->getFastMathFlags();
  
    // Bail out early.
    if (CmpI->isEquality())
-    return SPF_UNKNOWN;
+    return {SPF_UNKNOWN, SPNB_NA, false};
  
    // Deal with type mismatches.
    if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
-    if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
-      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
                                    cast<CastInst>(TrueVal)->getOperand(0), C,
                                    LHS, RHS);
-    if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
-      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
                                    C, cast<CastInst>(FalseVal)->getOperand(0),
                                    LHS, RHS);
    }
-  return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal,
+  return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
                                LHS, RHS);
  }