1 //===- InstCombineCompares.cpp --------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the visitICmp and visitFCmp functions.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/MemoryBuiltins.h"
21 #include "llvm/IR/ConstantRange.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/GetElementPtrTypeIterator.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
31 using namespace PatternMatch;
33 #define DEBUG_TYPE "instcombine"
35 // How many times is a select replaced by one of its operands?
36 STATISTIC(NumSel, "Number of select opts");
38 // Initialization Routines
40 static ConstantInt *getOne(Constant *C) {
41 return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
44 static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
45 return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
48 static bool HasAddOverflow(ConstantInt *Result,
49 ConstantInt *In1, ConstantInt *In2,
52 return Result->getValue().ult(In1->getValue());
54 if (In2->isNegative())
55 return Result->getValue().sgt(In1->getValue());
56 return Result->getValue().slt(In1->getValue());
59 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
60 /// overflowed for this type.
61 static bool AddWithOverflow(Constant *&Result, Constant *In1,
62 Constant *In2, bool IsSigned = false) {
63 Result = ConstantExpr::getAdd(In1, In2);
65 if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
66 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
67 Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
68 if (HasAddOverflow(ExtractElement(Result, Idx),
69 ExtractElement(In1, Idx),
70 ExtractElement(In2, Idx),
77 return HasAddOverflow(cast<ConstantInt>(Result),
78 cast<ConstantInt>(In1), cast<ConstantInt>(In2),
82 static bool HasSubOverflow(ConstantInt *Result,
83 ConstantInt *In1, ConstantInt *In2,
86 return Result->getValue().ugt(In1->getValue());
88 if (In2->isNegative())
89 return Result->getValue().slt(In1->getValue());
91 return Result->getValue().sgt(In1->getValue());
94 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
95 /// overflowed for this type.
96 static bool SubWithOverflow(Constant *&Result, Constant *In1,
97 Constant *In2, bool IsSigned = false) {
98 Result = ConstantExpr::getSub(In1, In2);
100 if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
101 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
102 Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
103 if (HasSubOverflow(ExtractElement(Result, Idx),
104 ExtractElement(In1, Idx),
105 ExtractElement(In2, Idx),
112 return HasSubOverflow(cast<ConstantInt>(Result),
113 cast<ConstantInt>(In1), cast<ConstantInt>(In2),
117 /// isSignBitCheck - Given an exploded icmp instruction, return true if the
118 /// comparison only checks the sign bit. If it only checks the sign bit, set
119 /// TrueIfSigned if the result of the comparison is true when the input value is
121 static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
122 bool &TrueIfSigned) {
124 case ICmpInst::ICMP_SLT: // True if LHS s< 0
126 return RHS->isZero();
127 case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
129 return RHS->isAllOnesValue();
130 case ICmpInst::ICMP_SGT: // True if LHS s> -1
131 TrueIfSigned = false;
132 return RHS->isAllOnesValue();
133 case ICmpInst::ICMP_UGT:
134 // True if LHS u> RHS and RHS == high-bit-mask - 1
136 return RHS->isMaxValue(true);
137 case ICmpInst::ICMP_UGE:
138 // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
140 return RHS->getValue().isSignBit();
146 /// Returns true if the exploded icmp can be expressed as a signed comparison
147 /// to zero and updates the predicate accordingly.
148 /// The signedness of the comparison is preserved.
149 static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
150 if (!ICmpInst::isSigned(pred))
154 return ICmpInst::isRelational(pred);
157 if (pred == ICmpInst::ICMP_SLT) {
158 pred = ICmpInst::ICMP_SLE;
161 } else if (RHS->isAllOnesValue()) {
162 if (pred == ICmpInst::ICMP_SGT) {
163 pred = ICmpInst::ICMP_SGE;
171 // isHighOnes - Return true if the constant is of the form 1+0+.
172 // This is the same as lowones(~X).
173 static bool isHighOnes(const ConstantInt *CI) {
174 return (~CI->getValue() + 1).isPowerOf2();
177 /// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
178 /// set of known zero and one bits, compute the maximum and minimum values that
179 /// could have the specified known zero and known one bits, returning them in
181 static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
182 const APInt& KnownOne,
183 APInt& Min, APInt& Max) {
184 assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
185 KnownZero.getBitWidth() == Min.getBitWidth() &&
186 KnownZero.getBitWidth() == Max.getBitWidth() &&
187 "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
188 APInt UnknownBits = ~(KnownZero|KnownOne);
190 // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
191 // bit if it is unknown.
193 Max = KnownOne|UnknownBits;
195 if (UnknownBits.isNegative()) { // Sign bit is unknown
196 Min.setBit(Min.getBitWidth()-1);
197 Max.clearBit(Max.getBitWidth()-1);
201 // ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
202 // a set of known zero and one bits, compute the maximum and minimum values that
203 // could have the specified known zero and known one bits, returning them in
205 static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
206 const APInt &KnownOne,
207 APInt &Min, APInt &Max) {
208 assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
209 KnownZero.getBitWidth() == Min.getBitWidth() &&
210 KnownZero.getBitWidth() == Max.getBitWidth() &&
211 "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
212 APInt UnknownBits = ~(KnownZero|KnownOne);
214 // The minimum value is when the unknown bits are all zeros.
216 // The maximum value is when the unknown bits are all ones.
217 Max = KnownOne|UnknownBits;
220 /// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
221 /// cmp pred (load (gep GV, ...)), cmpcst
222 /// where GV is a global variable with a constant initializer. Try to simplify
223 /// this into some simple computation that does not need the load. For example
224 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
226 /// If AndCst is non-null, then the loaded value is masked with that constant
227 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
228 Instruction *InstCombiner::
229 FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
230 CmpInst &ICI, ConstantInt *AndCst) {
231 Constant *Init = GV->getInitializer();
232 if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
235 uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
236 if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays.
238 // There are many forms of this optimization we can handle, for now, just do
239 // the simple index into a single-dimensional array.
241 // Require: GEP GV, 0, i {{, constant indices}}
242 if (GEP->getNumOperands() < 3 ||
243 !isa<ConstantInt>(GEP->getOperand(1)) ||
244 !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
245 isa<Constant>(GEP->getOperand(2)))
248 // Check that indices after the variable are constants and in-range for the
249 // type they index. Collect the indices. This is typically for arrays of
251 SmallVector<unsigned, 4> LaterIndices;
253 Type *EltTy = Init->getType()->getArrayElementType();
254 for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
255 ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
256 if (!Idx) return nullptr; // Variable index.
258 uint64_t IdxVal = Idx->getZExtValue();
259 if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
261 if (StructType *STy = dyn_cast<StructType>(EltTy))
262 EltTy = STy->getElementType(IdxVal);
263 else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
264 if (IdxVal >= ATy->getNumElements()) return nullptr;
265 EltTy = ATy->getElementType();
267 return nullptr; // Unknown type.
270 LaterIndices.push_back(IdxVal);
273 enum { Overdefined = -3, Undefined = -2 };
275 // Variables for our state machines.
277 // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
278 // "i == 47 | i == 87", where 47 is the first index the condition is true for,
279 // and 87 is the second (and last) index. FirstTrueElement is -2 when
280 // undefined, otherwise set to the first true element. SecondTrueElement is
281 // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
282 int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
284 // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
285 // form "i != 47 & i != 87". Same state transitions as for true elements.
286 int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
288 /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
289 /// define a state machine that triggers for ranges of values that the index
290 /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
291 /// This is -2 when undefined, -3 when overdefined, and otherwise the last
292 /// index in the range (inclusive). We use -2 for undefined here because we
293 /// use relative comparisons and don't want 0-1 to match -1.
294 int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
296 // MagicBitvector - This is a magic bitvector where we set a bit if the
297 // comparison is true for element 'i'. If there are 64 elements or less in
298 // the array, this will fully represent all the comparison results.
299 uint64_t MagicBitvector = 0;
301 // Scan the array and see if one of our patterns matches.
302 Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
303 for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
304 Constant *Elt = Init->getAggregateElement(i);
305 if (!Elt) return nullptr;
307 // If this is indexing an array of structures, get the structure element.
308 if (!LaterIndices.empty())
309 Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
311 // If the element is masked, handle it.
312 if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
314 // Find out if the comparison would be true or false for the i'th element.
315 Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
316 CompareRHS, DL, TLI);
317 // If the result is undef for this element, ignore it.
318 if (isa<UndefValue>(C)) {
319 // Extend range state machines to cover this element in case there is an
320 // undef in the middle of the range.
321 if (TrueRangeEnd == (int)i-1)
323 if (FalseRangeEnd == (int)i-1)
328 // If we can't compute the result for any of the elements, we have to give
329 // up evaluating the entire conditional.
330 if (!isa<ConstantInt>(C)) return nullptr;
332 // Otherwise, we know if the comparison is true or false for this element,
333 // update our state machines.
334 bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
336 // State machine for single/double/range index comparison.
338 // Update the TrueElement state machine.
339 if (FirstTrueElement == Undefined)
340 FirstTrueElement = TrueRangeEnd = i; // First true element.
342 // Update double-compare state machine.
343 if (SecondTrueElement == Undefined)
344 SecondTrueElement = i;
346 SecondTrueElement = Overdefined;
348 // Update range state machine.
349 if (TrueRangeEnd == (int)i-1)
352 TrueRangeEnd = Overdefined;
355 // Update the FalseElement state machine.
356 if (FirstFalseElement == Undefined)
357 FirstFalseElement = FalseRangeEnd = i; // First false element.
359 // Update double-compare state machine.
360 if (SecondFalseElement == Undefined)
361 SecondFalseElement = i;
363 SecondFalseElement = Overdefined;
365 // Update range state machine.
366 if (FalseRangeEnd == (int)i-1)
369 FalseRangeEnd = Overdefined;
373 // If this element is in range, update our magic bitvector.
374 if (i < 64 && IsTrueForElt)
375 MagicBitvector |= 1ULL << i;
377 // If all of our states become overdefined, bail out early. Since the
378 // predicate is expensive, only check it every 8 elements. This is only
379 // really useful for really huge arrays.
380 if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
381 SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
382 FalseRangeEnd == Overdefined)
386 // Now that we've scanned the entire array, emit our new comparison(s). We
387 // order the state machines in complexity of the generated code.
388 Value *Idx = GEP->getOperand(2);
390 // If the index is larger than the pointer size of the target, truncate the
391 // index down like the GEP would do implicitly. We don't have to do this for
392 // an inbounds GEP because the index can't be out of range.
393 if (!GEP->isInBounds()) {
394 Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
395 unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
396 if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
397 Idx = Builder->CreateTrunc(Idx, IntPtrTy);
400 // If the comparison is only true for one or two elements, emit direct
402 if (SecondTrueElement != Overdefined) {
403 // None true -> false.
404 if (FirstTrueElement == Undefined)
405 return ReplaceInstUsesWith(ICI, Builder->getFalse());
407 Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
409 // True for one element -> 'i == 47'.
410 if (SecondTrueElement == Undefined)
411 return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
413 // True for two elements -> 'i == 47 | i == 72'.
414 Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
415 Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
416 Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
417 return BinaryOperator::CreateOr(C1, C2);
420 // If the comparison is only false for one or two elements, emit direct
422 if (SecondFalseElement != Overdefined) {
423 // None false -> true.
424 if (FirstFalseElement == Undefined)
425 return ReplaceInstUsesWith(ICI, Builder->getTrue());
427 Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
429 // False for one element -> 'i != 47'.
430 if (SecondFalseElement == Undefined)
431 return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
433 // False for two elements -> 'i != 47 & i != 72'.
434 Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
435 Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
436 Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
437 return BinaryOperator::CreateAnd(C1, C2);
440 // If the comparison can be replaced with a range comparison for the elements
441 // where it is true, emit the range check.
442 if (TrueRangeEnd != Overdefined) {
443 assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
445 // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
446 if (FirstTrueElement) {
447 Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
448 Idx = Builder->CreateAdd(Idx, Offs);
451 Value *End = ConstantInt::get(Idx->getType(),
452 TrueRangeEnd-FirstTrueElement+1);
453 return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
456 // False range check.
457 if (FalseRangeEnd != Overdefined) {
458 assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
459 // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
460 if (FirstFalseElement) {
461 Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
462 Idx = Builder->CreateAdd(Idx, Offs);
465 Value *End = ConstantInt::get(Idx->getType(),
466 FalseRangeEnd-FirstFalseElement);
467 return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
470 // If a magic bitvector captures the entire comparison state
471 // of this load, replace it with computation that does:
472 // ((magic_cst >> i) & 1) != 0
476 // Look for an appropriate type:
477 // - The type of Idx if the magic fits
478 // - The smallest fitting legal type if we have a DataLayout
480 if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
483 Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
486 Value *V = Builder->CreateIntCast(Idx, Ty, false);
487 V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
488 V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
489 return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
496 /// EvaluateGEPOffsetExpression - Return a value that can be used to compare
497 /// the *offset* implied by a GEP to zero. For example, if we have &A[i], we
498 /// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can
499 /// be complex, and scales are involved. The above expression would also be
500 /// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
501 /// This later form is less amenable to optimization though, and we are allowed
502 /// to generate the first by knowing that pointer arithmetic doesn't overflow.
504 /// If we can't emit an optimized form for this expression, this returns null.
506 static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
507 const DataLayout &DL) {
508 gep_type_iterator GTI = gep_type_begin(GEP);
510 // Check to see if this gep only has a single variable index. If so, and if
511 // any constant indices are a multiple of its scale, then we can compute this
512 // in terms of the scale of the variable index. For example, if the GEP
513 // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
514 // because the expression will cross zero at the same point.
515 unsigned i, e = GEP->getNumOperands();
517 for (i = 1; i != e; ++i, ++GTI) {
518 if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
519 // Compute the aggregate offset of constant indices.
520 if (CI->isZero()) continue;
522 // Handle a struct index, which adds its field offset to the pointer.
523 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
524 Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
526 uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
527 Offset += Size*CI->getSExtValue();
530 // Found our variable index.
535 // If there are no variable indices, we must have a constant offset, just
536 // evaluate it the general way.
537 if (i == e) return nullptr;
539 Value *VariableIdx = GEP->getOperand(i);
540 // Determine the scale factor of the variable element. For example, this is
541 // 4 if the variable index is into an array of i32.
542 uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
544 // Verify that there are no other variable indices. If so, emit the hard way.
545 for (++i, ++GTI; i != e; ++i, ++GTI) {
546 ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
547 if (!CI) return nullptr;
549 // Compute the aggregate offset of constant indices.
550 if (CI->isZero()) continue;
552 // Handle a struct index, which adds its field offset to the pointer.
553 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
554 Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
556 uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
557 Offset += Size*CI->getSExtValue();
561 // Okay, we know we have a single variable index, which must be a
562 // pointer/array/vector index. If there is no offset, life is simple, return
564 Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
565 unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
567 // Cast to intptrty in case a truncation occurs. If an extension is needed,
568 // we don't need to bother extending: the extension won't affect where the
569 // computation crosses zero.
570 if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
571 VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
576 // Otherwise, there is an index. The computation we will do will be modulo
577 // the pointer size, so get it.
578 uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
580 Offset &= PtrSizeMask;
581 VariableScale &= PtrSizeMask;
583 // To do this transformation, any constant index must be a multiple of the
584 // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
585 // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
586 // multiple of the variable scale.
587 int64_t NewOffs = Offset / (int64_t)VariableScale;
588 if (Offset != NewOffs*(int64_t)VariableScale)
591 // Okay, we can do this evaluation. Start by converting the index to intptr.
592 if (VariableIdx->getType() != IntPtrTy)
593 VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
595 Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
596 return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
599 /// Returns true if we can rewrite Start as a GEP with pointer Base
600 /// and some integer offset. The nodes that need to be re-written
601 /// for this transformation will be added to Explored.
602 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
603 const DataLayout &DL,
604 SetVector<Value *> &Explored) {
605 SmallVector<Value *, 16> WorkList(1, Start);
606 Explored.insert(Base);
608 // The following traversal gives us an order which can be used
609 // when doing the final transformation. Since in the final
610 // transformation we create the PHI replacement instructions first,
611 // we don't have to get them in any particular order.
613 // However, for other instructions we will have to traverse the
614 // operands of an instruction first, which means that we have to
615 // do a post-order traversal.
616 while (!WorkList.empty()) {
617 SetVector<PHINode *> PHIs;
619 while (!WorkList.empty()) {
620 if (Explored.size() >= 100)
623 Value *V = WorkList.back();
625 if (Explored.count(V) != 0) {
630 if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
631 !isa<GEPOperator>(V) && !isa<PHINode>(V))
632 // We've found some value that we can't explore which is different from
633 // the base. Therefore we can't do this transformation.
636 if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
637 auto *CI = dyn_cast<CastInst>(V);
638 if (!CI->isNoopCast(DL))
641 if (Explored.count(CI->getOperand(0)) == 0)
642 WorkList.push_back(CI->getOperand(0));
645 if (auto *GEP = dyn_cast<GEPOperator>(V)) {
646 // We're limiting the GEP to having one index. This will preserve
647 // the original pointer type. We could handle more cases in the
649 if (GEP->getNumIndices() != 1 || !GEP->isInBounds())
652 if (Explored.count(GEP->getOperand(0)) == 0)
653 WorkList.push_back(GEP->getOperand(0));
656 if (WorkList.back() == V) {
658 // We've finished visiting this node, mark it as such.
662 if (auto *PN = dyn_cast<PHINode>(V)) {
668 // Explore the PHI nodes further.
669 for (auto *PN : PHIs)
670 for (Value *Op : PN->incoming_values())
671 if (Explored.count(Op) == 0)
672 WorkList.push_back(Op);
675 // Make sure that we can do this. Since we can't insert GEPs in a basic
676 // block before a PHI node, we can't easily do this transformation if
677 // we have PHI node users of transformed instructions.
678 for (Value *Val : Explored) {
679 for (Value *Use : Val->uses()) {
681 auto *PHI = dyn_cast<PHINode>(Use);
682 auto *Inst = dyn_cast<Instruction>(Val);
684 if (Inst == Base || Inst == PHI || !Inst || !PHI ||
685 Explored.count(PHI) == 0)
688 if (PHI->getParent() == Inst->getParent())
695 // Sets the appropriate insert point on Builder where we can add
696 // a replacement Instruction for V (if that is possible).
697 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
698 bool Before = true) {
699 if (auto *PHI = dyn_cast<PHINode>(V)) {
700 Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
703 if (auto *I = dyn_cast<Instruction>(V)) {
705 I = &*std::next(I->getIterator());
706 Builder.SetInsertPoint(I);
709 if (auto *A = dyn_cast<Argument>(V)) {
710 // Set the insertion point in the entry block.
711 BasicBlock &Entry = A->getParent()->getEntryBlock();
712 Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
715 // Otherwise, this is a constant and we don't need to set a new
717 assert(isa<ConstantExpr>(V) && "Setting insertion point for unknown value!");
720 /// Returns a re-written value of Start as an indexed GEP using Base as a
722 static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
723 const DataLayout &DL,
724 SetVector<Value *> &Explored) {
725 // Perform all the substitutions. This is a bit tricky because we can
726 // have cycles in our use-def chains.
727 // 1. Create the PHI nodes without any incoming values.
728 // 2. Create all the other values.
729 // 3. Add the edges for the PHI nodes.
730 // 4. Emit GEPs to get the original pointers.
731 // 5. Remove the original instructions.
732 Type *IndexType = IntegerType::get(
733 Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType()));
735 DenseMap<Value *, Value *> NewInsts;
736 NewInsts[Base] = ConstantInt::getNullValue(IndexType);
738 // Create the new PHI nodes, without adding any incoming values.
739 for (Value *Val : Explored) {
742 // Create empty phi nodes. This avoids cyclic dependencies when creating
743 // the remaining instructions.
744 if (auto *PHI = dyn_cast<PHINode>(Val))
745 NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
746 PHI->getName() + ".idx", PHI);
748 IRBuilder<> Builder(Base->getContext());
750 // Create all the other instructions.
751 for (Value *Val : Explored) {
753 if (NewInsts.find(Val) != NewInsts.end())
756 if (auto *CI = dyn_cast<CastInst>(Val)) {
757 NewInsts[CI] = NewInsts[CI->getOperand(0)];
760 if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
761 Value *Index = NewInsts[GEP->getOperand(1)]
762 ? NewInsts[GEP->getOperand(1)]
763 : GEP->getOperand(1);
764 setInsertionPoint(Builder, GEP);
765 // Indices might need to be sign extended. GEPs will magically do
766 // this, but we need to do it ourselves here.
767 if (Index->getType()->getScalarSizeInBits() !=
768 NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
769 Index = Builder.CreateSExtOrTrunc(
770 Index, NewInsts[GEP->getOperand(0)]->getType(),
771 GEP->getOperand(0)->getName() + ".sext");
774 Builder.CreateAdd(NewInsts[GEP->getOperand(0)], Index,
775 GEP->getOperand(0)->getName() + ".add");
779 if (isa<PHINode>(Val))
782 llvm_unreachable("Unexpected instruction type");
785 // Add the incoming values to the PHI nodes.
786 for (Value *Val : Explored) {
789 // All the instructions have been created, we can now add edges to the
791 if (auto *PHI = dyn_cast<PHINode>(Val)) {
792 PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
793 for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
794 Value *NewIncoming = PHI->getIncomingValue(I);
796 if (NewInsts.find(NewIncoming) != NewInsts.end())
797 NewIncoming = NewInsts[NewIncoming];
799 NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
804 // If required, create a inttoptr instruction.
805 Value *NewBase = Base;
806 setInsertionPoint(Builder, Base, false);
807 if (!Base->getType()->isPointerTy())
808 NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
809 Start->getName() + "to.ptr");
811 for (Value *Val : Explored) {
815 // Depending on the type, for external users we have to emit
816 // a GEP or a GEP + ptrtoint.
817 if (isa<Instruction>(NewInsts[Val]))
818 setInsertionPoint(Builder, NewInsts[Val], false);
820 setInsertionPoint(Builder, NewBase, false);
823 Builder.CreateInBoundsGEP(Start->getType()->getPointerElementType(),
824 NewBase, {NewInsts[Val]},
825 Val->getName() + ".ptr");
827 if (!Val->getType()->isPointerTy()) {
828 Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
829 Val->getName() + ".conv");
832 Val->replaceAllUsesWith(GEP);
835 return NewInsts[Start];
838 /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
839 /// the input Value as a GEP constant indexed GEP. Returns a pair containing
840 /// the GEPs Pointer and Index.
841 static std::pair<Value *, Value *>
842 getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
844 IntegerType::get(V->getContext(),
845 DL.getPointerTypeSizeInBits(V->getType()));
847 Constant *Index = ConstantInt::getNullValue(IndexType);
849 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
850 // We accept only inbouds GEPs here to exclude the possibility of
852 if (!GEP->isInBounds())
854 if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1) {
855 V = GEP->getOperand(0);
856 Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
857 Index = ConstantExpr::getAdd(
858 Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
863 if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
864 if (!CI->isNoopCast(DL))
866 V = CI->getOperand(0);
869 if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
870 if (!CI->isNoopCast(DL))
872 V = CI->getOperand(0);
880 // Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
881 // We can look through PHIs, GEPs and casts in order to determine a
882 // common base between GEPLHS and RHS.
883 static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
884 ICmpInst::Predicate Cond,
885 const DataLayout &DL) {
886 if (!GEPLHS->hasAllConstantIndices())
889 Value *PtrBase, *Index;
890 std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
892 // The set of nodes that will take part in this transformation.
893 SetVector<Value *> Nodes;
895 if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
898 // We know we can re-write this as
899 // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
900 // Since we've only looked through inbouds GEPs we know that we
901 // can't have overflow on either side. We can therefore re-write
903 // OFFSET1 cmp OFFSET2
904 Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
906 // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
907 // GEP having PtrBase as the pointer base, and has returned in NewRHS the
908 // offset. Since Index is the offset of LHS to the base pointer, we will now
909 // compare the offsets instead of comparing the pointers.
910 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
913 /// FoldGEPICmp - Fold comparisons between a GEP instruction and something
914 /// else. At this point we know that the GEP is on the LHS of the comparison.
915 Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
916 ICmpInst::Predicate Cond,
918 // Don't transform signed compares of GEPs into index compares. Even if the
919 // GEP is inbounds, the final add of the base pointer can have signed overflow
920 // and would change the result of the icmp.
921 // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
922 // the maximum signed value for the pointer type.
923 if (ICmpInst::isSigned(Cond))
926 // Look through bitcasts and addrspacecasts. We do not however want to remove
928 if (!isa<GetElementPtrInst>(RHS))
929 RHS = RHS->stripPointerCasts();
931 Value *PtrBase = GEPLHS->getOperand(0);
932 if (PtrBase == RHS && GEPLHS->isInBounds()) {
933 // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
934 // This transformation (ignoring the base and scales) is valid because we
935 // know pointers can't overflow since the gep is inbounds. See if we can
936 // output an optimized form.
937 Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
939 // If not, synthesize the offset the hard way.
941 Offset = EmitGEPOffset(GEPLHS);
942 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
943 Constant::getNullValue(Offset->getType()));
944 } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
945 // If the base pointers are different, but the indices are the same, just
946 // compare the base pointer.
947 if (PtrBase != GEPRHS->getOperand(0)) {
948 bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
949 IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
950 GEPRHS->getOperand(0)->getType();
952 for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
953 if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
954 IndicesTheSame = false;
958 // If all indices are the same, just compare the base pointers.
960 return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
962 // If we're comparing GEPs with two base pointers that only differ in type
963 // and both GEPs have only constant indices or just one use, then fold
964 // the compare with the adjusted indices.
965 if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
966 (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
967 (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
968 PtrBase->stripPointerCasts() ==
969 GEPRHS->getOperand(0)->stripPointerCasts()) {
970 Value *LOffset = EmitGEPOffset(GEPLHS);
971 Value *ROffset = EmitGEPOffset(GEPRHS);
973 // If we looked through an addrspacecast between different sized address
974 // spaces, the LHS and RHS pointers are different sized
975 // integers. Truncate to the smaller one.
976 Type *LHSIndexTy = LOffset->getType();
977 Type *RHSIndexTy = ROffset->getType();
978 if (LHSIndexTy != RHSIndexTy) {
979 if (LHSIndexTy->getPrimitiveSizeInBits() <
980 RHSIndexTy->getPrimitiveSizeInBits()) {
981 ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy);
983 LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy);
986 Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
988 return ReplaceInstUsesWith(I, Cmp);
991 // Otherwise, the base pointers are different and the indices are
992 // different. Try convert this to an indexed compare by looking through
994 return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
997 // If one of the GEPs has all zero indices, recurse.
998 if (GEPLHS->hasAllZeroIndices())
999 return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
1000 ICmpInst::getSwappedPredicate(Cond), I);
1002 // If the other GEP has all zero indices, recurse.
1003 if (GEPRHS->hasAllZeroIndices())
1004 return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
1006 bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
1007 if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
1008 // If the GEPs only differ by one index, compare it.
1009 unsigned NumDifferences = 0; // Keep track of # differences.
1010 unsigned DiffOperand = 0; // The operand that differs.
1011 for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
1012 if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
1013 if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
1014 GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
1015 // Irreconcilable differences.
1019 if (NumDifferences++) break;
1024 if (NumDifferences == 0) // SAME GEP?
1025 return ReplaceInstUsesWith(I, // No comparison is needed here.
1026 Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond)));
1028 else if (NumDifferences == 1 && GEPsInBounds) {
1029 Value *LHSV = GEPLHS->getOperand(DiffOperand);
1030 Value *RHSV = GEPRHS->getOperand(DiffOperand);
1031 // Make sure we do a signed comparison here.
1032 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
1036 // Only lower this if the icmp is the only user of the GEP or if we expect
1037 // the result to fold to a constant!
1038 if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
1039 (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
1040 // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
1041 Value *L = EmitGEPOffset(GEPLHS);
1042 Value *R = EmitGEPOffset(GEPRHS);
1043 return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
1047 // Try convert this to an indexed compare by looking through PHIs/casts as a
1049 return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
1052 Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
1054 assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
1056 // It would be tempting to fold away comparisons between allocas and any
1057 // pointer not based on that alloca (e.g. an argument). However, even
1058 // though such pointers cannot alias, they can still compare equal.
1060 // But LLVM doesn't specify where allocas get their memory, so if the alloca
1061 // doesn't escape we can argue that it's impossible to guess its value, and we
1062 // can therefore act as if any such guesses are wrong.
1064 // The code below checks that the alloca doesn't escape, and that it's only
1065 // used in a comparison once (the current instruction). The
1066 // single-comparison-use condition ensures that we're trivially folding all
1067 // comparisons against the alloca consistently, and avoids the risk of
1068 // erroneously folding a comparison of the pointer with itself.
1070 unsigned MaxIter = 32; // Break cycles and bound to constant-time.
1072 SmallVector<Use *, 32> Worklist;
1073 for (Use &U : Alloca->uses()) {
1074 if (Worklist.size() >= MaxIter)
1076 Worklist.push_back(&U);
1079 unsigned NumCmps = 0;
1080 while (!Worklist.empty()) {
1081 assert(Worklist.size() <= MaxIter);
1082 Use *U = Worklist.pop_back_val();
1083 Value *V = U->getUser();
1086 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
1087 isa<SelectInst>(V)) {
1089 } else if (isa<LoadInst>(V)) {
1090 // Loading from the pointer doesn't escape it.
1092 } else if (auto *SI = dyn_cast<StoreInst>(V)) {
1093 // Storing *to* the pointer is fine, but storing the pointer escapes it.
1094 if (SI->getValueOperand() == U->get())
1097 } else if (isa<ICmpInst>(V)) {
1099 return nullptr; // Found more than one cmp.
1101 } else if (auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
1102 switch (Intrin->getIntrinsicID()) {
1103 // These intrinsics don't escape or compare the pointer. Memset is safe
1104 // because we don't allow ptrtoint. Memcpy and memmove are safe because
1105 // we don't allow stores, so src cannot point to V.
1106 case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
1107 case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
1108 case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
1116 for (Use &U : V->uses()) {
1117 if (Worklist.size() >= MaxIter)
1119 Worklist.push_back(&U);
1123 Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
1124 return ReplaceInstUsesWith(
1126 ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
1129 /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
1130 Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
1131 Value *X, ConstantInt *CI,
1132 ICmpInst::Predicate Pred) {
1133 // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
1134 // so the values can never be equal. Similarly for all other "or equals"
1137 // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
1138 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
1139 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
1140 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
1142 ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
1143 return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
1146 // (X+1) >u X --> X <u (0-1) --> X != 255
1147 // (X+2) >u X --> X <u (0-2) --> X <u 254
1148 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
1149 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
1150 return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
1152 unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
1153 ConstantInt *SMax = ConstantInt::get(X->getContext(),
1154 APInt::getSignedMaxValue(BitWidth));
1156 // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
1157 // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
1158 // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
1159 // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
1160 // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
1161 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
1162 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1163 return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
1165 // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
1166 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
1167 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
1168 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
1169 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
1170 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
1172 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
1173 Constant *C = Builder->getInt(CI->getValue()-1);
1174 return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
1177 /// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
1178 /// and CmpRHS are both known to be integer constants.
1179 Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
1180 ConstantInt *DivRHS) {
1181 ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
1182 const APInt &CmpRHSV = CmpRHS->getValue();
1184 // FIXME: If the operand types don't match the type of the divide
1185 // then don't attempt this transform. The code below doesn't have the
1186 // logic to deal with a signed divide and an unsigned compare (and
1187 // vice versa). This is because (x /s C1) <s C2 produces different
1188 // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
1189 // (x /u C1) <u C2. Simply casting the operands and result won't
1190 // work. :( The if statement below tests that condition and bails
1192 bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
1193 if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
1195 if (DivRHS->isZero())
1196 return nullptr; // The ProdOV computation fails on divide by zero.
1197 if (DivIsSigned && DivRHS->isAllOnesValue())
1198 return nullptr; // The overflow computation also screws up here
1199 if (DivRHS->isOne()) {
1200 // This eliminates some funny cases with INT_MIN.
1201 ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
1205 // Compute Prod = CI * DivRHS. We are essentially solving an equation
1206 // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
1207 // C2 (CI). By solving for X we can turn this into a range check
1208 // instead of computing a divide.
1209 Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
1211 // Determine if the product overflows by seeing if the product is
1212 // not equal to the divide. Make sure we do the same kind of divide
1213 // as in the LHS instruction that we're folding.
1214 bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
1215 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
1217 // Get the ICmp opcode
1218 ICmpInst::Predicate Pred = ICI.getPredicate();
1220 /// If the division is known to be exact, then there is no remainder from the
1221 /// divide, so the covered range size is unit, otherwise it is the divisor.
1222 ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
1224 // Figure out the interval that is being checked. For example, a comparison
1225 // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
1226 // Compute this interval based on the constants involved and the signedness of
1227 // the compare/divide. This computes a half-open interval, keeping track of
1228 // whether either value in the interval overflows. After analysis each
1229 // overflow variable is set to 0 if it's corresponding bound variable is valid
1230 // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
1231 int LoOverflow = 0, HiOverflow = 0;
1232 Constant *LoBound = nullptr, *HiBound = nullptr;
1234 if (!DivIsSigned) { // udiv
1235 // e.g. X/5 op 3 --> [15, 20)
1237 HiOverflow = LoOverflow = ProdOV;
1239 // If this is not an exact divide, then many values in the range collapse
1240 // to the same result value.
1241 HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
1243 } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
1244 if (CmpRHSV == 0) { // (X / pos) op 0
1245 // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
1246 LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
1247 HiBound = RangeSize;
1248 } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
1249 LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
1250 HiOverflow = LoOverflow = ProdOV;
1252 HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
1253 } else { // (X / pos) op neg
1254 // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
1255 HiBound = AddOne(Prod);
1256 LoOverflow = HiOverflow = ProdOV ? -1 : 0;
1258 ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
1259 LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
1262 } else if (DivRHS->isNegative()) { // Divisor is < 0.
1263 if (DivI->isExact())
1264 RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
1265 if (CmpRHSV == 0) { // (X / neg) op 0
1266 // e.g. X/-5 op 0 --> [-4, 5)
1267 LoBound = AddOne(RangeSize);
1268 HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
1269 if (HiBound == DivRHS) { // -INTMIN = INTMIN
1270 HiOverflow = 1; // [INTMIN+1, overflow)
1271 HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN
1273 } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
1274 // e.g. X/-5 op 3 --> [-19, -14)
1275 HiBound = AddOne(Prod);
1276 HiOverflow = LoOverflow = ProdOV ? -1 : 0;
1278 LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
1279 } else { // (X / neg) op neg
1280 LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
1281 LoOverflow = HiOverflow = ProdOV;
1283 HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
1286 // Dividing by a negative swaps the condition. LT <-> GT
1287 Pred = ICmpInst::getSwappedPredicate(Pred);
1290 Value *X = DivI->getOperand(0);
1292 default: llvm_unreachable("Unhandled icmp opcode!");
1293 case ICmpInst::ICMP_EQ:
1294 if (LoOverflow && HiOverflow)
1295 return ReplaceInstUsesWith(ICI, Builder->getFalse());
1297 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
1298 ICmpInst::ICMP_UGE, X, LoBound);
1300 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
1301 ICmpInst::ICMP_ULT, X, HiBound);
1302 return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
1303 DivIsSigned, true));
1304 case ICmpInst::ICMP_NE:
1305 if (LoOverflow && HiOverflow)
1306 return ReplaceInstUsesWith(ICI, Builder->getTrue());
1308 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
1309 ICmpInst::ICMP_ULT, X, LoBound);
1311 return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
1312 ICmpInst::ICMP_UGE, X, HiBound);
1313 return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
1314 DivIsSigned, false));
1315 case ICmpInst::ICMP_ULT:
1316 case ICmpInst::ICMP_SLT:
1317 if (LoOverflow == +1) // Low bound is greater than input range.
1318 return ReplaceInstUsesWith(ICI, Builder->getTrue());
1319 if (LoOverflow == -1) // Low bound is less than input range.
1320 return ReplaceInstUsesWith(ICI, Builder->getFalse());
1321 return new ICmpInst(Pred, X, LoBound);
1322 case ICmpInst::ICMP_UGT:
1323 case ICmpInst::ICMP_SGT:
1324 if (HiOverflow == +1) // High bound greater than input range.
1325 return ReplaceInstUsesWith(ICI, Builder->getFalse());
1326 if (HiOverflow == -1) // High bound less than input range.
1327 return ReplaceInstUsesWith(ICI, Builder->getTrue());
1328 if (Pred == ICmpInst::ICMP_UGT)
1329 return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
1330 return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
1334 /// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
1335 Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
1336 ConstantInt *ShAmt) {
1337 const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
1339 // Check that the shift amount is in range. If not, don't perform
1340 // undefined shifts. When the shift is visited it will be
1342 uint32_t TypeBits = CmpRHSV.getBitWidth();
1343 uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
1344 if (ShAmtVal >= TypeBits || ShAmtVal == 0)
1347 if (!ICI.isEquality()) {
1348 // If we have an unsigned comparison and an ashr, we can't simplify this.
1349 // Similarly for signed comparisons with lshr.
1350 if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
1353 // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
1354 // by a power of 2. Since we already have logic to simplify these,
1355 // transform to div and then simplify the resultant comparison.
1356 if (Shr->getOpcode() == Instruction::AShr &&
1357 (!Shr->isExact() || ShAmtVal == TypeBits - 1))
1360 // Revisit the shift (to delete it).
1364 ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
1367 Shr->getOpcode() == Instruction::AShr ?
1368 Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
1369 Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
1371 ICI.setOperand(0, Tmp);
1373 // If the builder folded the binop, just return it.
1374 BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
1378 // Otherwise, fold this div/compare.
1379 assert(TheDiv->getOpcode() == Instruction::SDiv ||
1380 TheDiv->getOpcode() == Instruction::UDiv);
1382 Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
1383 assert(Res && "This div/cst should have folded!");
1387 // If we are comparing against bits always shifted out, the
1388 // comparison cannot succeed.
1389 APInt Comp = CmpRHSV << ShAmtVal;
1390 ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
1391 if (Shr->getOpcode() == Instruction::LShr)
1392 Comp = Comp.lshr(ShAmtVal);
1394 Comp = Comp.ashr(ShAmtVal);
1396 if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
1397 bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
1398 Constant *Cst = Builder->getInt1(IsICMP_NE);
1399 return ReplaceInstUsesWith(ICI, Cst);
1402 // Otherwise, check to see if the bits shifted out are known to be zero.
1403 // If so, we can compare against the unshifted value:
1404 // (X & 4) >> 1 == 2 --> (X & 4) == 4.
1405 if (Shr->hasOneUse() && Shr->isExact())
1406 return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
1408 if (Shr->hasOneUse()) {
1409 // Otherwise strength reduce the shift into an and.
1410 APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
1411 Constant *Mask = Builder->getInt(Val);
1413 Value *And = Builder->CreateAnd(Shr->getOperand(0),
1414 Mask, Shr->getName()+".mask");
1415 return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
1420 /// FoldICmpCstShrCst - Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" ->
1421 /// (icmp eq/ne A, Log2(const2/const1)) ->
1422 /// (icmp eq/ne A, Log2(const2) - Log2(const1)).
1423 Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
1426 assert(I.isEquality() && "Cannot fold icmp gt/lt");
1428 auto getConstant = [&I, this](bool IsTrue) {
1429 if (I.getPredicate() == I.ICMP_NE)
1431 return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
1434 auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1435 if (I.getPredicate() == I.ICMP_NE)
1436 Pred = CmpInst::getInversePredicate(Pred);
1437 return new ICmpInst(Pred, LHS, RHS);
1440 APInt AP1 = CI1->getValue();
1441 APInt AP2 = CI2->getValue();
1443 // Don't bother doing any work for cases which InstSimplify handles.
1446 bool IsAShr = isa<AShrOperator>(Op);
1448 if (AP2.isAllOnesValue())
1450 if (AP2.isNegative() != AP1.isNegative())
1457 // 'A' must be large enough to shift out the highest set bit.
1458 return getICmp(I.ICMP_UGT, A,
1459 ConstantInt::get(A->getType(), AP2.logBase2()));
1462 return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1465 if (IsAShr && AP1.isNegative())
1466 Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
1468 Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
1471 if (IsAShr && AP1 == AP2.ashr(Shift)) {
1472 // There are multiple solutions if we are comparing against -1 and the LHS
1473 // of the ashr is not a power of two.
1474 if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
1475 return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1476 return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1477 } else if (AP1 == AP2.lshr(Shift)) {
1478 return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1481 // Shifting const2 will never be equal to const1.
1482 return getConstant(false);
1485 /// FoldICmpCstShlCst - Handle "(icmp eq/ne (shl const2, A), const1)" ->
1486 /// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)).
1487 Instruction *InstCombiner::FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
1490 assert(I.isEquality() && "Cannot fold icmp gt/lt");
1492 auto getConstant = [&I, this](bool IsTrue) {
1493 if (I.getPredicate() == I.ICMP_NE)
1495 return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
1498 auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1499 if (I.getPredicate() == I.ICMP_NE)
1500 Pred = CmpInst::getInversePredicate(Pred);
1501 return new ICmpInst(Pred, LHS, RHS);
1504 APInt AP1 = CI1->getValue();
1505 APInt AP2 = CI2->getValue();
1507 // Don't bother doing any work for cases which InstSimplify handles.
1511 unsigned AP2TrailingZeros = AP2.countTrailingZeros();
1513 if (!AP1 && AP2TrailingZeros != 0)
1514 return getICmp(I.ICMP_UGE, A,
1515 ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1518 return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1520 // Get the distance between the lowest bits that are set.
1521 int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
1523 if (Shift > 0 && AP2.shl(Shift) == AP1)
1524 return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1526 // Shifting const2 will never be equal to const1.
1527 return getConstant(false);
1530 /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
1532 Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
1535 const APInt &RHSV = RHS->getValue();
1537 switch (LHSI->getOpcode()) {
1538 case Instruction::Trunc:
1539 if (RHS->isOne() && RHSV.getBitWidth() > 1) {
1540 // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1542 if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
1543 match(LHSI->getOperand(0), m_Signum(m_Value(V))))
1544 return new ICmpInst(ICmpInst::ICMP_SLT, V,
1545 ConstantInt::get(V->getType(), 1));
1547 if (ICI.isEquality() && LHSI->hasOneUse()) {
1548 // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1549 // of the high bits truncated out of x are known.
1550 unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
1551 SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
1552 APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
1553 computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI);
1555 // If all the high bits are known, we can do this xform.
1556 if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
1557 // Pull in the high bits from known-ones set.
1558 APInt NewRHS = RHS->getValue().zext(SrcBits);
1559 NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits);
1560 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
1561 Builder->getInt(NewRHS));
1566 case Instruction::Xor: // (icmp pred (xor X, XorCst), CI)
1567 if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
1568 // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1570 if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
1571 (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
1572 Value *CompareVal = LHSI->getOperand(0);
1574 // If the sign bit of the XorCst is not set, there is no change to
1575 // the operation, just stop using the Xor.
1576 if (!XorCst->isNegative()) {
1577 ICI.setOperand(0, CompareVal);
1582 // Was the old condition true if the operand is positive?
1583 bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
1585 // If so, the new one isn't.
1586 isTrueIfPositive ^= true;
1588 if (isTrueIfPositive)
1589 return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
1592 return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
1596 if (LHSI->hasOneUse()) {
1597 // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
1598 if (!ICI.isEquality() && XorCst->getValue().isSignBit()) {
1599 const APInt &SignBit = XorCst->getValue();
1600 ICmpInst::Predicate Pred = ICI.isSigned()
1601 ? ICI.getUnsignedPredicate()
1602 : ICI.getSignedPredicate();
1603 return new ICmpInst(Pred, LHSI->getOperand(0),
1604 Builder->getInt(RHSV ^ SignBit));
1607 // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
1608 if (!ICI.isEquality() && XorCst->isMaxValue(true)) {
1609 const APInt &NotSignBit = XorCst->getValue();
1610 ICmpInst::Predicate Pred = ICI.isSigned()
1611 ? ICI.getUnsignedPredicate()
1612 : ICI.getSignedPredicate();
1613 Pred = ICI.getSwappedPredicate(Pred);
1614 return new ICmpInst(Pred, LHSI->getOperand(0),
1615 Builder->getInt(RHSV ^ NotSignBit));
1619 // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
1620 // iff -C is a power of 2
1621 if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
1622 XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2())
1623 return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst);
1625 // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
1626 // iff -C is a power of 2
1627 if (ICI.getPredicate() == ICmpInst::ICMP_ULT &&
1628 XorCst->getValue() == -RHSV && RHSV.isPowerOf2())
1629 return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst);
1632 case Instruction::And: // (icmp pred (and X, AndCst), RHS)
1633 if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
1634 LHSI->getOperand(0)->hasOneUse()) {
1635 ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1));
1637 // If the LHS is an AND of a truncating cast, we can widen the
1638 // and/compare to be the input width without changing the value
1639 // produced, eliminating a cast.
1640 if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
1641 // We can do this transformation if either the AND constant does not
1642 // have its sign bit set or if it is an equality comparison.
1643 // Extending a relational comparison when we're checking the sign
1644 // bit would not work.
1645 if (ICI.isEquality() ||
1646 (!AndCst->isNegative() && RHSV.isNonNegative())) {
1648 Builder->CreateAnd(Cast->getOperand(0),
1649 ConstantExpr::getZExt(AndCst, Cast->getSrcTy()));
1650 NewAnd->takeName(LHSI);
1651 return new ICmpInst(ICI.getPredicate(), NewAnd,
1652 ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
1656 // If the LHS is an AND of a zext, and we have an equality compare, we can
1657 // shrink the and/compare to the smaller type, eliminating the cast.
1658 if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
1659 IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
1660 // Make sure we don't compare the upper bits, SimplifyDemandedBits
1661 // should fold the icmp to true/false in that case.
1662 if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
1664 Builder->CreateAnd(Cast->getOperand(0),
1665 ConstantExpr::getTrunc(AndCst, Ty));
1666 NewAnd->takeName(LHSI);
1667 return new ICmpInst(ICI.getPredicate(), NewAnd,
1668 ConstantExpr::getTrunc(RHS, Ty));
1672 // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
1673 // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This
1674 // happens a LOT in code produced by the C front-end, for bitfield
1676 BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
1677 if (Shift && !Shift->isShift())
1681 ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr;
1683 // This seemingly simple opportunity to fold away a shift turns out to
1684 // be rather complicated. See PR17827
1685 // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details.
1687 bool CanFold = false;
1688 unsigned ShiftOpcode = Shift->getOpcode();
1689 if (ShiftOpcode == Instruction::AShr) {
1690 // There may be some constraints that make this possible,
1691 // but nothing simple has been discovered yet.
1693 } else if (ShiftOpcode == Instruction::Shl) {
1694 // For a left shift, we can fold if the comparison is not signed.
1695 // We can also fold a signed comparison if the mask value and
1696 // comparison value are not negative. These constraints may not be
1697 // obvious, but we can prove that they are correct using an SMT
1699 if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative()))
1701 } else if (ShiftOpcode == Instruction::LShr) {
1702 // For a logical right shift, we can fold if the comparison is not
1703 // signed. We can also fold a signed comparison if the shifted mask
1704 // value and the shifted comparison value are not negative.
1705 // These constraints may not be obvious, but we can prove that they
1706 // are correct using an SMT solver.
1707 if (!ICI.isSigned())
1710 ConstantInt *ShiftedAndCst =
1711 cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt));
1712 ConstantInt *ShiftedRHSCst =
1713 cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt));
1715 if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative())
1722 if (ShiftOpcode == Instruction::Shl)
1723 NewCst = ConstantExpr::getLShr(RHS, ShAmt);
1725 NewCst = ConstantExpr::getShl(RHS, ShAmt);
1727 // Check to see if we are shifting out any of the bits being
1729 if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) {
1730 // If we shifted bits out, the fold is not going to work out.
1731 // As a special case, check to see if this means that the
1732 // result is always true or false now.
1733 if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
1734 return ReplaceInstUsesWith(ICI, Builder->getFalse());
1735 if (ICI.getPredicate() == ICmpInst::ICMP_NE)
1736 return ReplaceInstUsesWith(ICI, Builder->getTrue());
1738 ICI.setOperand(1, NewCst);
1739 Constant *NewAndCst;
1740 if (ShiftOpcode == Instruction::Shl)
1741 NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt);
1743 NewAndCst = ConstantExpr::getShl(AndCst, ShAmt);
1744 LHSI->setOperand(1, NewAndCst);
1745 LHSI->setOperand(0, Shift->getOperand(0));
1746 Worklist.Add(Shift); // Shift is dead.
1752 // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
1753 // preferable because it allows the C<<Y expression to be hoisted out
1754 // of a loop if Y is invariant and X is not.
1755 if (Shift && Shift->hasOneUse() && RHSV == 0 &&
1756 ICI.isEquality() && !Shift->isArithmeticShift() &&
1757 !isa<Constant>(Shift->getOperand(0))) {
1760 if (Shift->getOpcode() == Instruction::LShr) {
1761 NS = Builder->CreateShl(AndCst, Shift->getOperand(1));
1763 // Insert a logical shift.
1764 NS = Builder->CreateLShr(AndCst, Shift->getOperand(1));
1767 // Compute X & (C << Y).
1769 Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
1771 ICI.setOperand(0, NewAnd);
1775 // (icmp pred (and (or (lshr X, Y), X), 1), 0) -->
1776 // (icmp pred (and X, (or (shl 1, Y), 1), 0))
1778 // iff pred isn't signed
1780 Value *X, *Y, *LShr;
1781 if (!ICI.isSigned() && RHSV == 0) {
1782 if (match(LHSI->getOperand(1), m_One())) {
1783 Constant *One = cast<Constant>(LHSI->getOperand(1));
1784 Value *Or = LHSI->getOperand(0);
1785 if (match(Or, m_Or(m_Value(LShr), m_Value(X))) &&
1786 match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) {
1787 unsigned UsesRemoved = 0;
1788 if (LHSI->hasOneUse())
1790 if (Or->hasOneUse())
1792 if (LShr->hasOneUse())
1794 Value *NewOr = nullptr;
1795 // Compute X & ((1 << Y) | 1)
1796 if (auto *C = dyn_cast<Constant>(Y)) {
1797 if (UsesRemoved >= 1)
1799 ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
1801 if (UsesRemoved >= 3)
1802 NewOr = Builder->CreateOr(Builder->CreateShl(One, Y,
1805 One, Or->getName());
1808 Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName());
1809 ICI.setOperand(0, NewAnd);
1817 // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any
1818 // bit set in (X & AndCst) will produce a result greater than RHSV.
1819 if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
1820 unsigned NTZ = AndCst->getValue().countTrailingZeros();
1821 if ((NTZ < AndCst->getBitWidth()) &&
1822 APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV))
1823 return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
1824 Constant::getNullValue(RHS->getType()));
1828 // Try to optimize things like "A[i]&42 == 0" to index computations.
1829 if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
1830 if (GetElementPtrInst *GEP =
1831 dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1832 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1833 if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
1834 !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
1835 ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
1836 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
1841 // X & -C == -C -> X > u ~C
1842 // X & -C != -C -> X <= u ~C
1843 // iff C is a power of 2
1844 if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2())
1845 return new ICmpInst(
1846 ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
1847 : ICmpInst::ICMP_ULE,
1848 LHSI->getOperand(0), SubOne(RHS));
1850 // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
1851 // iff C is a power of 2
1852 if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) {
1853 if (auto *CI = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
1854 const APInt &AI = CI->getValue();
1855 int32_t ExactLogBase2 = AI.exactLogBase2();
1856 if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
1857 Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1);
1858 Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy);
1859 return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ
1860 ? ICmpInst::ICMP_SGE
1861 : ICmpInst::ICMP_SLT,
1862 Trunc, Constant::getNullValue(NTy));
1868 case Instruction::Or: {
1870 // icmp slt signum(V) 1 --> icmp slt V, 1
1872 if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
1873 match(LHSI, m_Signum(m_Value(V))))
1874 return new ICmpInst(ICmpInst::ICMP_SLT, V,
1875 ConstantInt::get(V->getType(), 1));
1878 if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
1881 if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
1882 // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
1883 // -> and (icmp eq P, null), (icmp eq Q, null).
1884 Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
1885 Constant::getNullValue(P->getType()));
1886 Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
1887 Constant::getNullValue(Q->getType()));
1889 if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
1890 Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
1892 Op = BinaryOperator::CreateOr(ICIP, ICIQ);
1898 case Instruction::Mul: { // (icmp pred (mul X, Val), CI)
1899 ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
1902 // If this is a signed comparison to 0 and the mul is sign preserving,
1903 // use the mul LHS operand instead.
1904 ICmpInst::Predicate pred = ICI.getPredicate();
1905 if (isSignTest(pred, RHS) && !Val->isZero() &&
1906 cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
1907 return new ICmpInst(Val->isNegative() ?
1908 ICmpInst::getSwappedPredicate(pred) : pred,
1909 LHSI->getOperand(0),
1910 Constant::getNullValue(RHS->getType()));
1915 case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
1916 uint32_t TypeBits = RHSV.getBitWidth();
1917 ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
1920 // (1 << X) pred P2 -> X pred Log2(P2)
1921 if (match(LHSI, m_Shl(m_One(), m_Value(X)))) {
1922 bool RHSVIsPowerOf2 = RHSV.isPowerOf2();
1923 ICmpInst::Predicate Pred = ICI.getPredicate();
1924 if (ICI.isUnsigned()) {
1925 if (!RHSVIsPowerOf2) {
1926 // (1 << X) < 30 -> X <= 4
1927 // (1 << X) <= 30 -> X <= 4
1928 // (1 << X) >= 30 -> X > 4
1929 // (1 << X) > 30 -> X > 4
1930 if (Pred == ICmpInst::ICMP_ULT)
1931 Pred = ICmpInst::ICMP_ULE;
1932 else if (Pred == ICmpInst::ICMP_UGE)
1933 Pred = ICmpInst::ICMP_UGT;
1935 unsigned RHSLog2 = RHSV.logBase2();
1937 // (1 << X) >= 2147483648 -> X >= 31 -> X == 31
1938 // (1 << X) < 2147483648 -> X < 31 -> X != 31
1939 if (RHSLog2 == TypeBits-1) {
1940 if (Pred == ICmpInst::ICMP_UGE)
1941 Pred = ICmpInst::ICMP_EQ;
1942 else if (Pred == ICmpInst::ICMP_ULT)
1943 Pred = ICmpInst::ICMP_NE;
1946 return new ICmpInst(Pred, X,
1947 ConstantInt::get(RHS->getType(), RHSLog2));
1948 } else if (ICI.isSigned()) {
1949 if (RHSV.isAllOnesValue()) {
1950 // (1 << X) <= -1 -> X == 31
1951 if (Pred == ICmpInst::ICMP_SLE)
1952 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1953 ConstantInt::get(RHS->getType(), TypeBits-1));
1955 // (1 << X) > -1 -> X != 31
1956 if (Pred == ICmpInst::ICMP_SGT)
1957 return new ICmpInst(ICmpInst::ICMP_NE, X,
1958 ConstantInt::get(RHS->getType(), TypeBits-1));
1960 // (1 << X) < 0 -> X == 31
1961 // (1 << X) <= 0 -> X == 31
1962 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1963 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1964 ConstantInt::get(RHS->getType(), TypeBits-1));
1966 // (1 << X) >= 0 -> X != 31
1967 // (1 << X) > 0 -> X != 31
1968 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
1969 return new ICmpInst(ICmpInst::ICMP_NE, X,
1970 ConstantInt::get(RHS->getType(), TypeBits-1));
1972 } else if (ICI.isEquality()) {
1974 return new ICmpInst(
1975 Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2()));
1981 // Check that the shift amount is in range. If not, don't perform
1982 // undefined shifts. When the shift is visited it will be
1984 if (ShAmt->uge(TypeBits))
1987 if (ICI.isEquality()) {
1988 // If we are comparing against bits always shifted out, the
1989 // comparison cannot succeed.
1991 ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
1993 if (Comp != RHS) {// Comparing against a bit that we know is zero.
1994 bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
1995 Constant *Cst = Builder->getInt1(IsICMP_NE);
1996 return ReplaceInstUsesWith(ICI, Cst);
1999 // If the shift is NUW, then it is just shifting out zeros, no need for an
2001 if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
2002 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
2003 ConstantExpr::getLShr(RHS, ShAmt));
2005 // If the shift is NSW and we compare to 0, then it is just shifting out
2006 // sign bits, no need for an AND either.
2007 if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
2008 return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
2009 ConstantExpr::getLShr(RHS, ShAmt));
2011 if (LHSI->hasOneUse()) {
2012 // Otherwise strength reduce the shift into an and.
2013 uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
2014 Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits,
2015 TypeBits - ShAmtVal));
2018 Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
2019 return new ICmpInst(ICI.getPredicate(), And,
2020 ConstantExpr::getLShr(RHS, ShAmt));
2024 // If this is a signed comparison to 0 and the shift is sign preserving,
2025 // use the shift LHS operand instead.
2026 ICmpInst::Predicate pred = ICI.getPredicate();
2027 if (isSignTest(pred, RHS) &&
2028 cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
2029 return new ICmpInst(pred,
2030 LHSI->getOperand(0),
2031 Constant::getNullValue(RHS->getType()));
2033 // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
2034 bool TrueIfSigned = false;
2035 if (LHSI->hasOneUse() &&
2036 isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
2037 // (X << 31) <s 0 --> (X&1) != 0
2038 Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
2039 APInt::getOneBitSet(TypeBits,
2040 TypeBits-ShAmt->getZExtValue()-1));
2042 Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
2043 return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
2044 And, Constant::getNullValue(And->getType()));
2047 // Transform (icmp pred iM (shl iM %v, N), CI)
2048 // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
2049 // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
2050 // This enables to get rid of the shift in favor of a trunc which can be
2051 // free on the target. It has the additional benefit of comparing to a
2052 // smaller constant, which will be target friendly.
2053 unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
2054 if (LHSI->hasOneUse() &&
2055 Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
2056 Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
2057 Constant *NCI = ConstantExpr::getTrunc(
2058 ConstantExpr::getAShr(RHS,
2059 ConstantInt::get(RHS->getType(), Amt)),
2061 return new ICmpInst(ICI.getPredicate(),
2062 Builder->CreateTrunc(LHSI->getOperand(0), NTy),
2069 case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
2070 case Instruction::AShr: {
2071 // Handle equality comparisons of shift-by-constant.
2072 BinaryOperator *BO = cast<BinaryOperator>(LHSI);
2073 if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
2074 if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
2078 // Handle exact shr's.
2079 if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
2080 if (RHSV.isMinValue())
2081 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
2086 case Instruction::SDiv:
2087 case Instruction::UDiv:
2088 // Fold: icmp pred ([us]div X, C1), C2 -> range test
2089 // Fold this div into the comparison, producing a range check.
2090 // Determine, based on the divide type, what the range is being
2091 // checked. If there is an overflow on the low or high side, remember
2092 // it, otherwise compute the range [low, hi) bounding the new value.
2093 // See: InsertRangeTest above for the kinds of replacements possible.
2094 if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
2095 if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
2100 case Instruction::Sub: {
2101 ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0));
2103 const APInt &LHSV = LHSC->getValue();
2105 // C1-X <u C2 -> (X|(C2-1)) == C1
2106 // iff C1 & (C2-1) == C2-1
2107 // C2 is a power of 2
2108 if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
2109 RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1))
2110 return new ICmpInst(ICmpInst::ICMP_EQ,
2111 Builder->CreateOr(LHSI->getOperand(1), RHSV - 1),
2114 // C1-X >u C2 -> (X|C2) != C1
2115 // iff C1 & C2 == C2
2116 // C2+1 is a power of 2
2117 if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
2118 (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV)
2119 return new ICmpInst(ICmpInst::ICMP_NE,
2120 Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC);
2124 case Instruction::Add:
2125 // Fold: icmp pred (add X, C1), C2
2126 if (!ICI.isEquality()) {
2127 ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
2129 const APInt &LHSV = LHSC->getValue();
2131 ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
2134 if (ICI.isSigned()) {
2135 if (CR.getLower().isSignBit()) {
2136 return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
2137 Builder->getInt(CR.getUpper()));
2138 } else if (CR.getUpper().isSignBit()) {
2139 return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
2140 Builder->getInt(CR.getLower()));
2143 if (CR.getLower().isMinValue()) {
2144 return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
2145 Builder->getInt(CR.getUpper()));
2146 } else if (CR.getUpper().isMinValue()) {
2147 return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
2148 Builder->getInt(CR.getLower()));
2152 // X-C1 <u C2 -> (X & -C2) == C1
2153 // iff C1 & (C2-1) == 0
2154 // C2 is a power of 2
2155 if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
2156 RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0)
2157 return new ICmpInst(ICmpInst::ICMP_EQ,
2158 Builder->CreateAnd(LHSI->getOperand(0), -RHSV),
2159 ConstantExpr::getNeg(LHSC));
2161 // X-C1 >u C2 -> (X & ~C2) != C1
2163 // C2+1 is a power of 2
2164 if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
2165 (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0)
2166 return new ICmpInst(ICmpInst::ICMP_NE,
2167 Builder->CreateAnd(LHSI->getOperand(0), ~RHSV),
2168 ConstantExpr::getNeg(LHSC));
2173 // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
2174 if (ICI.isEquality()) {
2175 bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
2177 // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
2178 // the second operand is a constant, simplify a bit.
2179 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
2180 switch (BO->getOpcode()) {
2181 case Instruction::SRem:
2182 // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
2183 if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
2184 const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
2185 if (V.sgt(1) && V.isPowerOf2()) {
2187 Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
2189 return new ICmpInst(ICI.getPredicate(), NewRem,
2190 Constant::getNullValue(BO->getType()));
2194 case Instruction::Add:
2195 // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
2196 if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
2197 if (BO->hasOneUse())
2198 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
2199 ConstantExpr::getSub(RHS, BOp1C));
2200 } else if (RHSV == 0) {
2201 // Replace ((add A, B) != 0) with (A != -B) if A or B is
2202 // efficiently invertible, or if the add has just this one use.
2203 Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
2205 if (Value *NegVal = dyn_castNegVal(BOp1))
2206 return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
2207 if (Value *NegVal = dyn_castNegVal(BOp0))
2208 return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
2209 if (BO->hasOneUse()) {
2210 Value *Neg = Builder->CreateNeg(BOp1);
2212 return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
2216 case Instruction::Xor:
2217 // For the xor case, we can xor two constants together, eliminating
2218 // the explicit xor.
2219 if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
2220 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
2221 ConstantExpr::getXor(RHS, BOC));
2222 } else if (RHSV == 0) {
2223 // Replace ((xor A, B) != 0) with (A != B)
2224 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
2228 case Instruction::Sub:
2229 // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
2230 if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
2231 if (BO->hasOneUse())
2232 return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
2233 ConstantExpr::getSub(BOp0C, RHS));
2234 } else if (RHSV == 0) {
2235 // Replace ((sub A, B) != 0) with (A != B)
2236 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
2240 case Instruction::Or:
2241 // If bits are being or'd in that are not present in the constant we
2242 // are comparing against, then the comparison could never succeed!
2243 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
2244 Constant *NotCI = ConstantExpr::getNot(RHS);
2245 if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
2246 return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
2250 case Instruction::And:
2251 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
2252 // If bits are being compared against that are and'd out, then the
2253 // comparison can never succeed!
2254 if ((RHSV & ~BOC->getValue()) != 0)
2255 return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
2257 // If we have ((X & C) == C), turn it into ((X & C) != 0).
2258 if (RHS == BOC && RHSV.isPowerOf2())
2259 return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
2260 ICmpInst::ICMP_NE, LHSI,
2261 Constant::getNullValue(RHS->getType()));
2263 // Don't perform the following transforms if the AND has multiple uses
2264 if (!BO->hasOneUse())
2267 // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
2268 if (BOC->getValue().isSignBit()) {
2269 Value *X = BO->getOperand(0);
2270 Constant *Zero = Constant::getNullValue(X->getType());
2271 ICmpInst::Predicate pred = isICMP_NE ?
2272 ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
2273 return new ICmpInst(pred, X, Zero);
2276 // ((X & ~7) == 0) --> X < 8
2277 if (RHSV == 0 && isHighOnes(BOC)) {
2278 Value *X = BO->getOperand(0);
2279 Constant *NegX = ConstantExpr::getNeg(BOC);
2280 ICmpInst::Predicate pred = isICMP_NE ?
2281 ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
2282 return new ICmpInst(pred, X, NegX);
2286 case Instruction::Mul:
2287 if (RHSV == 0 && BO->hasNoSignedWrap()) {
2288 if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
2289 // The trivial case (mul X, 0) is handled by InstSimplify
2290 // General case : (mul X, C) != 0 iff X != 0
2291 // (mul X, C) == 0 iff X == 0
2293 return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
2294 Constant::getNullValue(RHS->getType()));
2300 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
2301 // Handle icmp {eq|ne} <intrinsic>, intcst.
2302 switch (II->getIntrinsicID()) {
2303 case Intrinsic::bswap:
2305 ICI.setOperand(0, II->getArgOperand(0));
2306 ICI.setOperand(1, Builder->getInt(RHSV.byteSwap()));
2308 case Intrinsic::ctlz:
2309 case Intrinsic::cttz:
2310 // ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
2311 if (RHSV == RHS->getType()->getBitWidth()) {
2313 ICI.setOperand(0, II->getArgOperand(0));
2314 ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
2318 case Intrinsic::ctpop:
2319 // popcount(A) == 0 -> A == 0 and likewise for !=
2320 if (RHS->isZero()) {
2322 ICI.setOperand(0, II->getArgOperand(0));
2323 ICI.setOperand(1, RHS);
2335 /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
2336 /// We only handle extending casts so far.
2338 Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
2339 const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
2340 Value *LHSCIOp = LHSCI->getOperand(0);
2341 Type *SrcTy = LHSCIOp->getType();
2342 Type *DestTy = LHSCI->getType();
2345 // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
2346 // integer type is the same size as the pointer type.
2347 if (LHSCI->getOpcode() == Instruction::PtrToInt &&
2348 DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
2349 Value *RHSOp = nullptr;
2350 if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) {
2351 Value *RHSCIOp = RHSC->getOperand(0);
2352 if (RHSCIOp->getType()->getPointerAddressSpace() ==
2353 LHSCIOp->getType()->getPointerAddressSpace()) {
2354 RHSOp = RHSC->getOperand(0);
2355 // If the pointer types don't match, insert a bitcast.
2356 if (LHSCIOp->getType() != RHSOp->getType())
2357 RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
2359 } else if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1)))
2360 RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
2363 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
2366 // The code below only handles extension cast instructions, so far.
2368 if (LHSCI->getOpcode() != Instruction::ZExt &&
2369 LHSCI->getOpcode() != Instruction::SExt)
2372 bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
2373 bool isSignedCmp = ICI.isSigned();
2375 if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
2376 // Not an extension from the same type?
2377 RHSCIOp = CI->getOperand(0);
2378 if (RHSCIOp->getType() != LHSCIOp->getType())
2381 // If the signedness of the two casts doesn't agree (i.e. one is a sext
2382 // and the other is a zext), then we can't handle this.
2383 if (CI->getOpcode() != LHSCI->getOpcode())
2386 // Deal with equality cases early.
2387 if (ICI.isEquality())
2388 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
2390 // A signed comparison of sign extended values simplifies into a
2391 // signed comparison.
2392 if (isSignedCmp && isSignedExt)
2393 return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
2395 // The other three cases all fold into an unsigned comparison.
2396 return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
2399 // If we aren't dealing with a constant on the RHS, exit early
2400 ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
2404 // Compute the constant that would happen if we truncated to SrcTy then
2405 // reextended to DestTy.
2406 Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
2407 Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
2410 // If the re-extended constant didn't change...
2412 // Deal with equality cases early.
2413 if (ICI.isEquality())
2414 return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
2416 // A signed comparison of sign extended values simplifies into a
2417 // signed comparison.
2418 if (isSignedExt && isSignedCmp)
2419 return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
2421 // The other three cases all fold into an unsigned comparison.
2422 return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
2425 // The re-extended constant changed so the constant cannot be represented
2426 // in the shorter type. Consequently, we cannot emit a simple comparison.
2427 // All the cases that fold to true or false will have already been handled
2428 // by SimplifyICmpInst, so only deal with the tricky case.
2430 if (isSignedCmp || !isSignedExt)
2433 // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
2434 // should have been folded away previously and not enter in here.
2436 // We're performing an unsigned comp with a sign extended value.
2437 // This is true if the input is >= 0. [aka >s -1]
2438 Constant *NegOne = Constant::getAllOnesValue(SrcTy);
2439 Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
2441 // Finally, return the value computed.
2442 if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
2443 return ReplaceInstUsesWith(ICI, Result);
2445 assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
2446 return BinaryOperator::CreateNot(Result);
2449 /// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
2450 /// I = icmp ugt (add (add A, B), CI2), CI1
2451 /// If this is of the form:
2453 /// if (sum+128 >u 255)
2454 /// Then replace it with llvm.sadd.with.overflow.i8.
2456 static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
2457 ConstantInt *CI2, ConstantInt *CI1,
2459 // The transformation we're trying to do here is to transform this into an
2460 // llvm.sadd.with.overflow. To do this, we have to replace the original add
2461 // with a narrower add, and discard the add-with-constant that is part of the
2462 // range check (if we can't eliminate it, this isn't profitable).
2464 // In order to eliminate the add-with-constant, the compare can be its only
2466 Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
2467 if (!AddWithCst->hasOneUse()) return nullptr;
2469 // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
2470 if (!CI2->getValue().isPowerOf2()) return nullptr;
2471 unsigned NewWidth = CI2->getValue().countTrailingZeros();
2472 if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr;
2474 // The width of the new add formed is 1 more than the bias.
2477 // Check to see that CI1 is an all-ones value with NewWidth bits.
2478 if (CI1->getBitWidth() == NewWidth ||
2479 CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
2482 // This is only really a signed overflow check if the inputs have been
2483 // sign-extended; check for that condition. For example, if CI2 is 2^31 and
2484 // the operands of the add are 64 bits wide, we need at least 33 sign bits.
2485 unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
2486 if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
2487 IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
2490 // In order to replace the original add with a narrower
2491 // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
2492 // and truncates that discard the high bits of the add. Verify that this is
2494 Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
2495 for (User *U : OrigAdd->users()) {
2496 if (U == AddWithCst) continue;
2498 // Only accept truncates for now. We would really like a nice recursive
2499 // predicate like SimplifyDemandedBits, but which goes downwards the use-def
2500 // chain to see which bits of a value are actually demanded. If the
2501 // original add had another add which was then immediately truncated, we
2502 // could still do the transformation.
2503 TruncInst *TI = dyn_cast<TruncInst>(U);
2504 if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
2508 // If the pattern matches, truncate the inputs to the narrower type and
2509 // use the sadd_with_overflow intrinsic to efficiently compute both the
2510 // result and the overflow bit.
2511 Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
2512 Value *F = Intrinsic::getDeclaration(I.getModule(),
2513 Intrinsic::sadd_with_overflow, NewType);
2515 InstCombiner::BuilderTy *Builder = IC.Builder;
2517 // Put the new code above the original add, in case there are any uses of the
2518 // add between the add and the compare.
2519 Builder->SetInsertPoint(OrigAdd);
2521 Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
2522 Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
2523 CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
2524 Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
2525 Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
2527 // The inner add was the result of the narrow add, zero extended to the
2528 // wider type. Replace it with the result computed by the intrinsic.
2529 IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
2531 // The original icmp gets replaced with the overflow value.
2532 return ExtractValueInst::Create(Call, 1, "sadd.overflow");
2535 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
2536 Value *RHS, Instruction &OrigI,
2537 Value *&Result, Constant *&Overflow) {
2538 if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
2539 std::swap(LHS, RHS);
2541 auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
2543 Overflow = OverflowVal;
2545 Result->takeName(&OrigI);
2549 // If the overflow check was an add followed by a compare, the insertion point
2550 // may be pointing to the compare. We want to insert the new instructions
2551 // before the add in case there are uses of the add between the add and the
2553 Builder->SetInsertPoint(&OrigI);
2557 llvm_unreachable("bad overflow check kind!");
2559 case OCF_UNSIGNED_ADD: {
2560 OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
2561 if (OR == OverflowResult::NeverOverflows)
2562 return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
2565 if (OR == OverflowResult::AlwaysOverflows)
2566 return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
2568 // FALL THROUGH uadd into sadd
2569 case OCF_SIGNED_ADD: {
2570 // X + 0 -> {X, false}
2571 if (match(RHS, m_Zero()))
2572 return SetResult(LHS, Builder->getFalse(), false);
2574 // We can strength reduce this signed add into a regular add if we can prove
2575 // that it will never overflow.
2576 if (OCF == OCF_SIGNED_ADD)
2577 if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
2578 return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
2583 case OCF_UNSIGNED_SUB:
2584 case OCF_SIGNED_SUB: {
2585 // X - 0 -> {X, false}
2586 if (match(RHS, m_Zero()))
2587 return SetResult(LHS, Builder->getFalse(), false);
2589 if (OCF == OCF_SIGNED_SUB) {
2590 if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
2591 return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
2594 if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
2595 return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
2601 case OCF_UNSIGNED_MUL: {
2602 OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
2603 if (OR == OverflowResult::NeverOverflows)
2604 return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
2606 if (OR == OverflowResult::AlwaysOverflows)
2607 return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
2609 case OCF_SIGNED_MUL:
2610 // X * undef -> undef
2611 if (isa<UndefValue>(RHS))
2612 return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
2614 // X * 0 -> {0, false}
2615 if (match(RHS, m_Zero()))
2616 return SetResult(RHS, Builder->getFalse(), false);
2618 // X * 1 -> {X, false}
2619 if (match(RHS, m_One()))
2620 return SetResult(LHS, Builder->getFalse(), false);
2622 if (OCF == OCF_SIGNED_MUL)
2623 if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
2624 return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
2632 /// \brief Recognize and process idiom involving test for multiplication
2635 /// The caller has matched a pattern of the form:
2636 /// I = cmp u (mul(zext A, zext B), V
2637 /// The function checks if this is a test for overflow and if so replaces
2638 /// multiplication with call to 'mul.with.overflow' intrinsic.
2640 /// \param I Compare instruction.
2641 /// \param MulVal Result of 'mult' instruction. It is one of the arguments of
2642 /// the compare instruction. Must be of integer type.
2643 /// \param OtherVal The other argument of compare instruction.
2644 /// \returns Instruction which must replace the compare instruction, NULL if no
2645 /// replacement required.
2646 static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
2647 Value *OtherVal, InstCombiner &IC) {
2648 // Don't bother doing this transformation for pointers, don't do it for
2650 if (!isa<IntegerType>(MulVal->getType()))
2653 assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
2654 assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
2655 auto *MulInstr = dyn_cast<Instruction>(MulVal);
2658 assert(MulInstr->getOpcode() == Instruction::Mul);
2660 auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
2661 *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
2662 assert(LHS->getOpcode() == Instruction::ZExt);
2663 assert(RHS->getOpcode() == Instruction::ZExt);
2664 Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
2666 // Calculate type and width of the result produced by mul.with.overflow.
2667 Type *TyA = A->getType(), *TyB = B->getType();
2668 unsigned WidthA = TyA->getPrimitiveSizeInBits(),
2669 WidthB = TyB->getPrimitiveSizeInBits();
2672 if (WidthB > WidthA) {
2680 // In order to replace the original mul with a narrower mul.with.overflow,
2681 // all uses must ignore upper bits of the product. The number of used low
2682 // bits must be not greater than the width of mul.with.overflow.
2683 if (MulVal->hasNUsesOrMore(2))
2684 for (User *U : MulVal->users()) {
2687 if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
2688 // Check if truncation ignores bits above MulWidth.
2689 unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
2690 if (TruncWidth > MulWidth)
2692 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
2693 // Check if AND ignores bits above MulWidth.
2694 if (BO->getOpcode() != Instruction::And)
2696 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
2697 const APInt &CVal = CI->getValue();
2698 if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
2702 // Other uses prohibit this transformation.
2707 // Recognize patterns
2708 switch (I.getPredicate()) {
2709 case ICmpInst::ICMP_EQ:
2710 case ICmpInst::ICMP_NE:
2711 // Recognize pattern:
2712 // mulval = mul(zext A, zext B)
2713 // cmp eq/neq mulval, zext trunc mulval
2714 if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
2715 if (Zext->hasOneUse()) {
2716 Value *ZextArg = Zext->getOperand(0);
2717 if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
2718 if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
2722 // Recognize pattern:
2723 // mulval = mul(zext A, zext B)
2724 // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
2727 if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
2728 if (ValToMask != MulVal)
2730 const APInt &CVal = CI->getValue() + 1;
2731 if (CVal.isPowerOf2()) {
2732 unsigned MaskWidth = CVal.logBase2();
2733 if (MaskWidth == MulWidth)
2734 break; // Recognized
2739 case ICmpInst::ICMP_UGT:
2740 // Recognize pattern:
2741 // mulval = mul(zext A, zext B)
2742 // cmp ugt mulval, max
2743 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
2744 APInt MaxVal = APInt::getMaxValue(MulWidth);
2745 MaxVal = MaxVal.zext(CI->getBitWidth());
2746 if (MaxVal.eq(CI->getValue()))
2747 break; // Recognized
2751 case ICmpInst::ICMP_UGE:
2752 // Recognize pattern:
2753 // mulval = mul(zext A, zext B)
2754 // cmp uge mulval, max+1
2755 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
2756 APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
2757 if (MaxVal.eq(CI->getValue()))
2758 break; // Recognized
2762 case ICmpInst::ICMP_ULE:
2763 // Recognize pattern:
2764 // mulval = mul(zext A, zext B)
2765 // cmp ule mulval, max
2766 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
2767 APInt MaxVal = APInt::getMaxValue(MulWidth);
2768 MaxVal = MaxVal.zext(CI->getBitWidth());
2769 if (MaxVal.eq(CI->getValue()))
2770 break; // Recognized
2774 case ICmpInst::ICMP_ULT:
2775 // Recognize pattern:
2776 // mulval = mul(zext A, zext B)
2777 // cmp ule mulval, max + 1
2778 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
2779 APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
2780 if (MaxVal.eq(CI->getValue()))
2781 break; // Recognized
2789 InstCombiner::BuilderTy *Builder = IC.Builder;
2790 Builder->SetInsertPoint(MulInstr);
2792 // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
2793 Value *MulA = A, *MulB = B;
2794 if (WidthA < MulWidth)
2795 MulA = Builder->CreateZExt(A, MulType);
2796 if (WidthB < MulWidth)
2797 MulB = Builder->CreateZExt(B, MulType);
2798 Value *F = Intrinsic::getDeclaration(I.getModule(),
2799 Intrinsic::umul_with_overflow, MulType);
2800 CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
2801 IC.Worklist.Add(MulInstr);
2803 // If there are uses of mul result other than the comparison, we know that
2804 // they are truncation or binary AND. Change them to use result of
2805 // mul.with.overflow and adjust properly mask/size.
2806 if (MulVal->hasNUsesOrMore(2)) {
2807 Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value");
2808 for (User *U : MulVal->users()) {
2809 if (U == &I || U == OtherVal)
2811 if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
2812 if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
2813 IC.ReplaceInstUsesWith(*TI, Mul);
2815 TI->setOperand(0, Mul);
2816 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
2817 assert(BO->getOpcode() == Instruction::And);
2818 // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
2819 ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
2820 APInt ShortMask = CI->getValue().trunc(MulWidth);
2821 Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask);
2823 cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType()));
2824 IC.Worklist.Add(Zext);
2825 IC.ReplaceInstUsesWith(*BO, Zext);
2827 llvm_unreachable("Unexpected Binary operation");
2829 IC.Worklist.Add(cast<Instruction>(U));
2832 if (isa<Instruction>(OtherVal))
2833 IC.Worklist.Add(cast<Instruction>(OtherVal));
2835 // The original icmp gets replaced with the overflow value, maybe inverted
2836 // depending on predicate.
2837 bool Inverse = false;
2838 switch (I.getPredicate()) {
2839 case ICmpInst::ICMP_NE:
2841 case ICmpInst::ICMP_EQ:
2844 case ICmpInst::ICMP_UGT:
2845 case ICmpInst::ICMP_UGE:
2846 if (I.getOperand(0) == MulVal)
2850 case ICmpInst::ICMP_ULT:
2851 case ICmpInst::ICMP_ULE:
2852 if (I.getOperand(1) == MulVal)
2857 llvm_unreachable("Unexpected predicate");
2860 Value *Res = Builder->CreateExtractValue(Call, 1);
2861 return BinaryOperator::CreateNot(Res);
2864 return ExtractValueInst::Create(Call, 1);
2867 // DemandedBitsLHSMask - When performing a comparison against a constant,
2868 // it is possible that not all the bits in the LHS are demanded. This helper
2869 // method computes the mask that IS demanded.
2870 static APInt DemandedBitsLHSMask(ICmpInst &I,
2871 unsigned BitWidth, bool isSignCheck) {
2873 return APInt::getSignBit(BitWidth);
2875 ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
2876 if (!CI) return APInt::getAllOnesValue(BitWidth);
2877 const APInt &RHS = CI->getValue();
2879 switch (I.getPredicate()) {
2880 // For a UGT comparison, we don't care about any bits that
2881 // correspond to the trailing ones of the comparand. The value of these
2882 // bits doesn't impact the outcome of the comparison, because any value
2883 // greater than the RHS must differ in a bit higher than these due to carry.
2884 case ICmpInst::ICMP_UGT: {
2885 unsigned trailingOnes = RHS.countTrailingOnes();
2886 APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
2890 // Similarly, for a ULT comparison, we don't care about the trailing zeros.
2891 // Any value less than the RHS must differ in a higher bit because of carries.
2892 case ICmpInst::ICMP_ULT: {
2893 unsigned trailingZeros = RHS.countTrailingZeros();
2894 APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
2899 return APInt::getAllOnesValue(BitWidth);
2903 /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
2904 /// should be swapped.
2905 /// The decision is based on how many times these two operands are reused
2906 /// as subtract operands and their positions in those instructions.
2907 /// The rational is that several architectures use the same instruction for
2908 /// both subtract and cmp, thus it is better if the order of those operands
2910 /// \return true if Op0 and Op1 should be swapped.
2911 static bool swapMayExposeCSEOpportunities(const Value * Op0,
2912 const Value * Op1) {
2913 // Filter out pointer value as those cannot appears directly in subtract.
2914 // FIXME: we may want to go through inttoptrs or bitcasts.
2915 if (Op0->getType()->isPointerTy())
2917 // Count every uses of both Op0 and Op1 in a subtract.
2918 // Each time Op0 is the first operand, count -1: swapping is bad, the
2919 // subtract has already the same layout as the compare.
2920 // Each time Op0 is the second operand, count +1: swapping is good, the
2921 // subtract has a different layout as the compare.
2922 // At the end, if the benefit is greater than 0, Op0 should come second to
2923 // expose more CSE opportunities.
2924 int GlobalSwapBenefits = 0;
2925 for (const User *U : Op0->users()) {
2926 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U);
2927 if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
2929 // If Op0 is the first argument, this is not beneficial to swap the
2931 int LocalSwapBenefits = -1;
2932 unsigned Op1Idx = 1;
2933 if (BinOp->getOperand(Op1Idx) == Op0) {
2935 LocalSwapBenefits = 1;
2937 if (BinOp->getOperand(Op1Idx) != Op1)
2939 GlobalSwapBenefits += LocalSwapBenefits;
2941 return GlobalSwapBenefits > 0;
2944 /// \brief Check that one use is in the same block as the definition and all
2945 /// other uses are in blocks dominated by a given block
2947 /// \param DI Definition
2949 /// \param DB Block that must dominate all uses of \p DI outside
2950 /// the parent block
2951 /// \return true when \p UI is the only use of \p DI in the parent block
2952 /// and all other uses of \p DI are in blocks dominated by \p DB.
2954 bool InstCombiner::dominatesAllUses(const Instruction *DI,
2955 const Instruction *UI,
2956 const BasicBlock *DB) const {
2957 assert(DI && UI && "Instruction not defined\n");
2958 // ignore incomplete definitions
2959 if (!DI->getParent())
2961 // DI and UI must be in the same block
2962 if (DI->getParent() != UI->getParent())
2964 // Protect from self-referencing blocks
2965 if (DI->getParent() == DB)
2967 // DominatorTree available?
2970 for (const User *U : DI->users()) {
2971 auto *Usr = cast<Instruction>(U);
2972 if (Usr != UI && !DT->dominates(DB, Usr->getParent()))
2979 /// true when the instruction sequence within a block is select-cmp-br.
2981 static bool isChainSelectCmpBranch(const SelectInst *SI) {
2982 const BasicBlock *BB = SI->getParent();
2985 auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
2986 if (!BI || BI->getNumSuccessors() != 2)
2988 auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
2989 if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
2995 /// \brief True when a select result is replaced by one of its operands
2996 /// in select-icmp sequence. This will eventually result in the elimination
2999 /// \param SI Select instruction
3000 /// \param Icmp Compare instruction
3001 /// \param SIOpd Operand that replaces the select
3004 /// - The replacement is global and requires dominator information
3005 /// - The caller is responsible for the actual replacement
3010 /// %4 = select i1 %3, %C* %0, %C* null
3011 /// %5 = icmp eq %C* %4, null
3012 /// br i1 %5, label %9, label %7
3014 /// ; <label>:7 ; preds = %entry
3015 /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0
3018 /// can be transformed to
3020 /// %5 = icmp eq %C* %0, null
3021 /// %6 = select i1 %3, i1 %5, i1 true
3022 /// br i1 %6, label %9, label %7
3024 /// ; <label>:7 ; preds = %entry
3025 /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0!
3027 /// Similar when the first operand of the select is a constant or/and
3028 /// the compare is for not equal rather than equal.
3030 /// NOTE: The function is only called when the select and compare constants
3031 /// are equal, the optimization can work only for EQ predicates. This is not a
3032 /// major restriction since a NE compare should be 'normalized' to an equal
3033 /// compare, which usually happens in the combiner and test case
3034 /// select-cmp-br.ll
3036 bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
3037 const ICmpInst *Icmp,
3038 const unsigned SIOpd) {
3039 assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
3040 if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
3041 BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
3042 // The check for the unique predecessor is not the best that can be
3043 // done. But it protects efficiently against cases like when SI's
3044 // home block has two successors, Succ and Succ1, and Succ1 predecessor
3045 // of Succ. Then SI can't be replaced by SIOpd because the use that gets
3046 // replaced can be reached on either path. So the uniqueness check
3047 // guarantees that the path all uses of SI (outside SI's parent) are on
3048 // is disjoint from all other paths out of SI. But that information
3049 // is more expensive to compute, and the trade-off here is in favor
3051 if (Succ->getUniquePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
3053 SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
3060 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
3061 bool Changed = false;
3062 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3063 unsigned Op0Cplxity = getComplexity(Op0);
3064 unsigned Op1Cplxity = getComplexity(Op1);
3066 /// Orders the operands of the compare so that they are listed from most
3067 /// complex to least complex. This puts constants before unary operators,
3068 /// before binary operators.
3069 if (Op0Cplxity < Op1Cplxity ||
3070 (Op0Cplxity == Op1Cplxity &&
3071 swapMayExposeCSEOpportunities(Op0, Op1))) {
3073 std::swap(Op0, Op1);
3078 SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
3079 return ReplaceInstUsesWith(I, V);
3081 // comparing -val or val with non-zero is the same as just comparing val
3082 // ie, abs(val) != 0 -> val != 0
3083 if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
3085 Value *Cond, *SelectTrue, *SelectFalse;
3086 if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
3087 m_Value(SelectFalse)))) {
3088 if (Value *V = dyn_castNegVal(SelectTrue)) {
3089 if (V == SelectFalse)
3090 return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
3092 else if (Value *V = dyn_castNegVal(SelectFalse)) {
3093 if (V == SelectTrue)
3094 return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
3099 Type *Ty = Op0->getType();
3101 // icmp's with boolean values can always be turned into bitwise operations
3102 if (Ty->isIntegerTy(1)) {
3103 switch (I.getPredicate()) {
3104 default: llvm_unreachable("Invalid icmp instruction!");
3105 case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
3106 Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
3107 return BinaryOperator::CreateNot(Xor);
3109 case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B
3110 return BinaryOperator::CreateXor(Op0, Op1);
3112 case ICmpInst::ICMP_UGT:
3113 std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
3115 case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
3116 Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
3117 return BinaryOperator::CreateAnd(Not, Op1);
3119 case ICmpInst::ICMP_SGT:
3120 std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
3122 case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
3123 Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
3124 return BinaryOperator::CreateAnd(Not, Op0);
3126 case ICmpInst::ICMP_UGE:
3127 std::swap(Op0, Op1); // Change icmp uge -> icmp ule
3129 case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
3130 Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
3131 return BinaryOperator::CreateOr(Not, Op1);
3133 case ICmpInst::ICMP_SGE:
3134 std::swap(Op0, Op1); // Change icmp sge -> icmp sle
3136 case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
3137 Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
3138 return BinaryOperator::CreateOr(Not, Op0);
3143 unsigned BitWidth = 0;
3144 if (Ty->isIntOrIntVectorTy())
3145 BitWidth = Ty->getScalarSizeInBits();
3146 else // Get pointer size.
3147 BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
3149 bool isSignBit = false;
3151 // See if we are doing a comparison with a constant.
3152 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
3153 Value *A = nullptr, *B = nullptr;
3155 // Match the following pattern, which is a common idiom when writing
3156 // overflow-safe integer arithmetic function. The source performs an
3157 // addition in wider type, and explicitly checks for overflow using
3158 // comparisons against INT_MIN and INT_MAX. Simplify this by using the
3159 // sadd_with_overflow intrinsic.
3161 // TODO: This could probably be generalized to handle other overflow-safe
3162 // operations if we worked out the formulas to compute the appropriate
3166 // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
3168 ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
3169 if (I.getPredicate() == ICmpInst::ICMP_UGT &&
3170 match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
3171 if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
3175 // The following transforms are only 'worth it' if the only user of the
3176 // subtraction is the icmp.
3177 if (Op0->hasOneUse()) {
3178 // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
3179 if (I.isEquality() && CI->isZero() &&
3180 match(Op0, m_Sub(m_Value(A), m_Value(B))))
3181 return new ICmpInst(I.getPredicate(), A, B);
3183 // (icmp sgt (sub nsw A B), -1) -> (icmp sge A, B)
3184 if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isAllOnesValue() &&
3185 match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
3186 return new ICmpInst(ICmpInst::ICMP_SGE, A, B);
3188 // (icmp sgt (sub nsw A B), 0) -> (icmp sgt A, B)
3189 if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isZero() &&
3190 match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
3191 return new ICmpInst(ICmpInst::ICMP_SGT, A, B);
3193 // (icmp slt (sub nsw A B), 0) -> (icmp slt A, B)
3194 if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isZero() &&
3195 match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
3196 return new ICmpInst(ICmpInst::ICMP_SLT, A, B);
3198 // (icmp slt (sub nsw A B), 1) -> (icmp sle A, B)
3199 if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isOne() &&
3200 match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
3201 return new ICmpInst(ICmpInst::ICMP_SLE, A, B);
3204 // If we have an icmp le or icmp ge instruction, turn it into the
3205 // appropriate icmp lt or icmp gt instruction. This allows us to rely on
3206 // them being folded in the code below. The SimplifyICmpInst code has
3207 // already handled the edge cases for us, so we just assert on them.
3208 switch (I.getPredicate()) {
3210 case ICmpInst::ICMP_ULE:
3211 assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
3212 return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
3213 Builder->getInt(CI->getValue()+1));
3214 case ICmpInst::ICMP_SLE:
3215 assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
3216 return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
3217 Builder->getInt(CI->getValue()+1));
3218 case ICmpInst::ICMP_UGE:
3219 assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
3220 return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
3221 Builder->getInt(CI->getValue()-1));
3222 case ICmpInst::ICMP_SGE:
3223 assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
3224 return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
3225 Builder->getInt(CI->getValue()-1));
3228 if (I.isEquality()) {
3230 if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) ||
3231 match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) {
3232 // (icmp eq/ne (ashr/lshr const2, A), const1)
3233 if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2))
3236 if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) {
3237 // (icmp eq/ne (shl const2, A), const1)
3238 if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2))
3243 // If this comparison is a normal comparison, it demands all
3244 // bits, if it is a sign bit comparison, it only demands the sign bit.
3246 isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
3249 // See if we can fold the comparison based on range information we can get
3250 // by checking whether bits are known to be zero or one in the input.
3251 if (BitWidth != 0) {
3252 APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
3253 APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
3255 if (SimplifyDemandedBits(I.getOperandUse(0),
3256 DemandedBitsLHSMask(I, BitWidth, isSignBit),
3257 Op0KnownZero, Op0KnownOne, 0))
3259 if (SimplifyDemandedBits(I.getOperandUse(1),
3260 APInt::getAllOnesValue(BitWidth), Op1KnownZero,
3264 // Given the known and unknown bits, compute a range that the LHS could be
3265 // in. Compute the Min, Max and RHS values based on the known bits. For the
3266 // EQ and NE we use unsigned values.
3267 APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
3268 APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
3270 ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
3272 ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
3275 ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
3277 ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
3281 // If Min and Max are known to be the same, then SimplifyDemandedBits
3282 // figured out that the LHS is a constant. Just constant fold this now so
3283 // that code below can assume that Min != Max.
3284 if (!isa<Constant>(Op0) && Op0Min == Op0Max)
3285 return new ICmpInst(I.getPredicate(),
3286 ConstantInt::get(Op0->getType(), Op0Min), Op1);
3287 if (!isa<Constant>(Op1) && Op1Min == Op1Max)
3288 return new ICmpInst(I.getPredicate(), Op0,
3289 ConstantInt::get(Op1->getType(), Op1Min));
3291 // Based on the range information we know about the LHS, see if we can
3292 // simplify this comparison. For example, (x&4) < 8 is always true.
3293 switch (I.getPredicate()) {
3294 default: llvm_unreachable("Unknown icmp opcode!");
3295 case ICmpInst::ICMP_EQ: {
3296 if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
3297 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3299 // If all bits are known zero except for one, then we know at most one
3300 // bit is set. If the comparison is against zero, then this is a check
3301 // to see if *that* bit is set.
3302 APInt Op0KnownZeroInverted = ~Op0KnownZero;
3303 if (~Op1KnownZero == 0) {
3304 // If the LHS is an AND with the same constant, look through it.
3305 Value *LHS = nullptr;
3306 ConstantInt *LHSC = nullptr;
3307 if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
3308 LHSC->getValue() != Op0KnownZeroInverted)
3311 // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
3312 // then turn "((1 << x)&8) == 0" into "x != 3".
3313 // or turn "((1 << x)&7) == 0" into "x > 2".
3315 if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
3316 APInt ValToCheck = Op0KnownZeroInverted;
3317 if (ValToCheck.isPowerOf2()) {
3318 unsigned CmpVal = ValToCheck.countTrailingZeros();
3319 return new ICmpInst(ICmpInst::ICMP_NE, X,
3320 ConstantInt::get(X->getType(), CmpVal));
3321 } else if ((++ValToCheck).isPowerOf2()) {
3322 unsigned CmpVal = ValToCheck.countTrailingZeros() - 1;
3323 return new ICmpInst(ICmpInst::ICMP_UGT, X,
3324 ConstantInt::get(X->getType(), CmpVal));
3328 // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
3329 // then turn "((8 >>u x)&1) == 0" into "x != 3".
3331 if (Op0KnownZeroInverted == 1 &&
3332 match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
3333 return new ICmpInst(ICmpInst::ICMP_NE, X,
3334 ConstantInt::get(X->getType(),
3335 CI->countTrailingZeros()));
3339 case ICmpInst::ICMP_NE: {
3340 if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
3341 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3343 // If all bits are known zero except for one, then we know at most one
3344 // bit is set. If the comparison is against zero, then this is a check
3345 // to see if *that* bit is set.
3346 APInt Op0KnownZeroInverted = ~Op0KnownZero;
3347 if (~Op1KnownZero == 0) {
3348 // If the LHS is an AND with the same constant, look through it.
3349 Value *LHS = nullptr;
3350 ConstantInt *LHSC = nullptr;
3351 if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
3352 LHSC->getValue() != Op0KnownZeroInverted)
3355 // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
3356 // then turn "((1 << x)&8) != 0" into "x == 3".
3357 // or turn "((1 << x)&7) != 0" into "x < 3".
3359 if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
3360 APInt ValToCheck = Op0KnownZeroInverted;
3361 if (ValToCheck.isPowerOf2()) {
3362 unsigned CmpVal = ValToCheck.countTrailingZeros();
3363 return new ICmpInst(ICmpInst::ICMP_EQ, X,
3364 ConstantInt::get(X->getType(), CmpVal));
3365 } else if ((++ValToCheck).isPowerOf2()) {
3366 unsigned CmpVal = ValToCheck.countTrailingZeros();
3367 return new ICmpInst(ICmpInst::ICMP_ULT, X,
3368 ConstantInt::get(X->getType(), CmpVal));
3372 // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
3373 // then turn "((8 >>u x)&1) != 0" into "x == 3".
3375 if (Op0KnownZeroInverted == 1 &&
3376 match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
3377 return new ICmpInst(ICmpInst::ICMP_EQ, X,
3378 ConstantInt::get(X->getType(),
3379 CI->countTrailingZeros()));
3383 case ICmpInst::ICMP_ULT:
3384 if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
3385 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3386 if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
3387 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3388 if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
3389 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
3390 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
3391 if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
3392 return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
3393 Builder->getInt(CI->getValue()-1));
3395 // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
3396 if (CI->isMinValue(true))
3397 return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
3398 Constant::getAllOnesValue(Op0->getType()));
3401 case ICmpInst::ICMP_UGT:
3402 if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
3403 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3404 if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
3405 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3407 if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
3408 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
3409 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
3410 if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
3411 return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
3412 Builder->getInt(CI->getValue()+1));
3414 // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
3415 if (CI->isMaxValue(true))
3416 return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
3417 Constant::getNullValue(Op0->getType()));
3420 case ICmpInst::ICMP_SLT:
3421 if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
3422 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3423 if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
3424 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3425 if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
3426 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
3427 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
3428 if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
3429 return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
3430 Builder->getInt(CI->getValue()-1));
3433 case ICmpInst::ICMP_SGT:
3434 if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
3435 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3436 if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
3437 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3439 if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
3440 return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
3441 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
3442 if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
3443 return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
3444 Builder->getInt(CI->getValue()+1));
3447 case ICmpInst::ICMP_SGE:
3448 assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
3449 if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
3450 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3451 if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
3452 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3454 case ICmpInst::ICMP_SLE:
3455 assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
3456 if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
3457 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3458 if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
3459 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3461 case ICmpInst::ICMP_UGE:
3462 assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
3463 if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
3464 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3465 if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
3466 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3468 case ICmpInst::ICMP_ULE:
3469 assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
3470 if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
3471 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3472 if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
3473 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3477 // Turn a signed comparison into an unsigned one if both operands
3478 // are known to have the same sign.
3480 ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
3481 (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
3482 return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
3485 // Test if the ICmpInst instruction is used exclusively by a select as
3486 // part of a minimum or maximum operation. If so, refrain from doing
3487 // any other folding. This helps out other analyses which understand
3488 // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
3489 // and CodeGen. And in this case, at least one of the comparison
3490 // operands has at least one user besides the compare (the select),
3491 // which would often largely negate the benefit of folding anyway.
3493 if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
3494 if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
3495 (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
3498 // See if we are doing a comparison between a constant and an instruction that
3499 // can be folded into the comparison.
3500 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
3501 // Since the RHS is a ConstantInt (CI), if the left hand side is an
3502 // instruction, see if that instruction also has constants so that the
3503 // instruction can be folded into the icmp
3504 if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
3505 if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
3509 // Handle icmp with constant (but not simple integer constant) RHS
3510 if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
3511 if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
3512 switch (LHSI->getOpcode()) {
3513 case Instruction::GetElementPtr:
3514 // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
3515 if (RHSC->isNullValue() &&
3516 cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
3517 return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
3518 Constant::getNullValue(LHSI->getOperand(0)->getType()));
3520 case Instruction::PHI:
3521 // Only fold icmp into the PHI if the phi and icmp are in the same
3522 // block. If in the same block, we're encouraging jump threading. If
3523 // not, we are just pessimizing the code by making an i1 phi.
3524 if (LHSI->getParent() == I.getParent())
3525 if (Instruction *NV = FoldOpIntoPhi(I))
3528 case Instruction::Select: {
3529 // If either operand of the select is a constant, we can fold the
3530 // comparison into the select arms, which will cause one to be
3531 // constant folded and the select turned into a bitwise or.
3532 Value *Op1 = nullptr, *Op2 = nullptr;
3533 ConstantInt *CI = nullptr;
3534 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
3535 Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
3536 CI = dyn_cast<ConstantInt>(Op1);
3538 if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
3539 Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
3540 CI = dyn_cast<ConstantInt>(Op2);
3543 // We only want to perform this transformation if it will not lead to
3544 // additional code. This is true if either both sides of the select
3545 // fold to a constant (in which case the icmp is replaced with a select
3546 // which will usually simplify) or this is the only user of the
3547 // select (in which case we are trading a select+icmp for a simpler
3548 // select+icmp) or all uses of the select can be replaced based on
3549 // dominance information ("Global cases").
3550 bool Transform = false;
3553 else if (Op1 || Op2) {
3555 if (LHSI->hasOneUse())
3558 else if (CI && !CI->isZero())
3559 // When Op1 is constant try replacing select with second operand.
3560 // Otherwise Op2 is constant and try replacing select with first
3562 Transform = replacedSelectWithOperand(cast<SelectInst>(LHSI), &I,
3567 Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
3570 Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
3572 return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
3576 case Instruction::IntToPtr:
3577 // icmp pred inttoptr(X), null -> icmp pred X, 0
3578 if (RHSC->isNullValue() &&
3579 DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
3580 return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
3581 Constant::getNullValue(LHSI->getOperand(0)->getType()));
3584 case Instruction::Load:
3585 // Try to optimize things like "A[i] > 4" to index computations.
3586 if (GetElementPtrInst *GEP =
3587 dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
3588 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
3589 if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
3590 !cast<LoadInst>(LHSI)->isVolatile())
3591 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
3598 // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
3599 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
3600 if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
3602 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
3603 if (Instruction *NI = FoldGEPICmp(GEP, Op0,
3604 ICmpInst::getSwappedPredicate(I.getPredicate()), I))
3607 // Try to optimize equality comparisons against alloca-based pointers.
3608 if (Op0->getType()->isPointerTy() && I.isEquality()) {
3609 assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
3610 if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
3611 if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1))
3613 if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
3614 if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0))
3618 // Test to see if the operands of the icmp are casted versions of other
3619 // values. If the ptr->ptr cast can be stripped off both arguments, we do so
3621 if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
3622 if (Op0->getType()->isPointerTy() &&
3623 (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
3624 // We keep moving the cast from the left operand over to the right
3625 // operand, where it can often be eliminated completely.
3626 Op0 = CI->getOperand(0);
3628 // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
3629 // so eliminate it as well.
3630 if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
3631 Op1 = CI2->getOperand(0);
3633 // If Op1 is a constant, we can fold the cast into the constant.
3634 if (Op0->getType() != Op1->getType()) {
3635 if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
3636 Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
3638 // Otherwise, cast the RHS right before the icmp
3639 Op1 = Builder->CreateBitCast(Op1, Op0->getType());
3642 return new ICmpInst(I.getPredicate(), Op0, Op1);
3646 if (isa<CastInst>(Op0)) {
3647 // Handle the special case of: icmp (cast bool to X), <cst>
3648 // This comes up when you have code like
3651 // For generality, we handle any zero-extension of any operand comparison
3652 // with a constant or another cast from the same type.
3653 if (isa<Constant>(Op1) || isa<CastInst>(Op1))
3654 if (Instruction *R = visitICmpInstWithCastAndCast(I))
3658 // Special logic for binary operators.
3659 BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
3660 BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
3662 CmpInst::Predicate Pred = I.getPredicate();
3663 bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
3664 if (BO0 && isa<OverflowingBinaryOperator>(BO0))
3665 NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
3666 (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
3667 (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
3668 if (BO1 && isa<OverflowingBinaryOperator>(BO1))
3669 NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
3670 (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
3671 (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
3673 // Analyze the case when either Op0 or Op1 is an add instruction.
3674 // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
3675 Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
3676 if (BO0 && BO0->getOpcode() == Instruction::Add)
3677 A = BO0->getOperand(0), B = BO0->getOperand(1);
3678 if (BO1 && BO1->getOpcode() == Instruction::Add)
3679 C = BO1->getOperand(0), D = BO1->getOperand(1);
3681 // icmp (X+cst) < 0 --> X < -cst
3682 if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero()))
3683 if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B))
3684 if (!RHSC->isMinValue(/*isSigned=*/true))
3685 return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC));
3687 // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
3688 if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
3689 return new ICmpInst(Pred, A == Op1 ? B : A,
3690 Constant::getNullValue(Op1->getType()));
3692 // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
3693 if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
3694 return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
3697 // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
3698 if (A && C && (A == C || A == D || B == C || B == D) &&
3699 NoOp0WrapProblem && NoOp1WrapProblem &&
3700 // Try not to increase register pressure.
3701 BO0->hasOneUse() && BO1->hasOneUse()) {
3702 // Determine Y and Z in the form icmp (X+Y), (X+Z).
3705 // C + B == C + D -> B == D
3708 } else if (A == D) {
3709 // D + B == C + D -> B == C
3712 } else if (B == C) {
3713 // A + C == C + D -> A == D
3718 // A + D == C + D -> A == C
3722 return new ICmpInst(Pred, Y, Z);
3725 // icmp slt (X + -1), Y -> icmp sle X, Y
3726 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
3727 match(B, m_AllOnes()))
3728 return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
3730 // icmp sge (X + -1), Y -> icmp sgt X, Y
3731 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
3732 match(B, m_AllOnes()))
3733 return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
3735 // icmp sle (X + 1), Y -> icmp slt X, Y
3736 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
3738 return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
3740 // icmp sgt (X + 1), Y -> icmp sge X, Y
3741 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
3743 return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
3745 // icmp sgt X, (Y + -1) -> icmp sge X, Y
3746 if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
3747 match(D, m_AllOnes()))
3748 return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
3750 // icmp sle X, (Y + -1) -> icmp slt X, Y
3751 if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
3752 match(D, m_AllOnes()))
3753 return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
3755 // icmp sge X, (Y + 1) -> icmp sgt X, Y
3756 if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE &&
3758 return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
3760 // icmp slt X, (Y + 1) -> icmp sle X, Y
3761 if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT &&
3763 return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
3765 // if C1 has greater magnitude than C2:
3766 // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
3767 // s.t. C3 = C1 - C2
3769 // if C2 has greater magnitude than C1:
3770 // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
3771 // s.t. C3 = C2 - C1
3772 if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
3773 (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
3774 if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
3775 if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
3776 const APInt &AP1 = C1->getValue();
3777 const APInt &AP2 = C2->getValue();
3778 if (AP1.isNegative() == AP2.isNegative()) {
3779 APInt AP1Abs = C1->getValue().abs();
3780 APInt AP2Abs = C2->getValue().abs();
3781 if (AP1Abs.uge(AP2Abs)) {
3782 ConstantInt *C3 = Builder->getInt(AP1 - AP2);
3783 Value *NewAdd = Builder->CreateNSWAdd(A, C3);
3784 return new ICmpInst(Pred, NewAdd, C);
3786 ConstantInt *C3 = Builder->getInt(AP2 - AP1);
3787 Value *NewAdd = Builder->CreateNSWAdd(C, C3);
3788 return new ICmpInst(Pred, A, NewAdd);
3794 // Analyze the case when either Op0 or Op1 is a sub instruction.
3795 // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
3796 A = nullptr; B = nullptr; C = nullptr; D = nullptr;
3797 if (BO0 && BO0->getOpcode() == Instruction::Sub)
3798 A = BO0->getOperand(0), B = BO0->getOperand(1);
3799 if (BO1 && BO1->getOpcode() == Instruction::Sub)
3800 C = BO1->getOperand(0), D = BO1->getOperand(1);
3802 // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
3803 if (A == Op1 && NoOp0WrapProblem)
3804 return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
3806 // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
3807 if (C == Op0 && NoOp1WrapProblem)
3808 return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
3810 // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
3811 if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
3812 // Try not to increase register pressure.
3813 BO0->hasOneUse() && BO1->hasOneUse())
3814 return new ICmpInst(Pred, A, C);
3816 // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
3817 if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
3818 // Try not to increase register pressure.
3819 BO0->hasOneUse() && BO1->hasOneUse())
3820 return new ICmpInst(Pred, D, B);
3822 // icmp (0-X) < cst --> x > -cst
3823 if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
3825 if (match(BO0, m_Neg(m_Value(X))))
3826 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
3827 if (!RHSC->isMinValue(/*isSigned=*/true))
3828 return new ICmpInst(I.getSwappedPredicate(), X,
3829 ConstantExpr::getNeg(RHSC));
3832 BinaryOperator *SRem = nullptr;
3833 // icmp (srem X, Y), Y
3834 if (BO0 && BO0->getOpcode() == Instruction::SRem &&
3835 Op1 == BO0->getOperand(1))
3837 // icmp Y, (srem X, Y)
3838 else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
3839 Op0 == BO1->getOperand(1))
3842 // We don't check hasOneUse to avoid increasing register pressure because
3843 // the value we use is the same value this instruction was already using.
3844 switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
3846 case ICmpInst::ICMP_EQ:
3847 return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3848 case ICmpInst::ICMP_NE:
3849 return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3850 case ICmpInst::ICMP_SGT:
3851 case ICmpInst::ICMP_SGE:
3852 return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
3853 Constant::getAllOnesValue(SRem->getType()));
3854 case ICmpInst::ICMP_SLT:
3855 case ICmpInst::ICMP_SLE:
3856 return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
3857 Constant::getNullValue(SRem->getType()));
3861 if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
3862 BO0->hasOneUse() && BO1->hasOneUse() &&
3863 BO0->getOperand(1) == BO1->getOperand(1)) {
3864 switch (BO0->getOpcode()) {
3866 case Instruction::Add:
3867 case Instruction::Sub:
3868 case Instruction::Xor:
3869 if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
3870 return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
3871 BO1->getOperand(0));
3872 // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
3873 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
3874 if (CI->getValue().isSignBit()) {
3875 ICmpInst::Predicate Pred = I.isSigned()
3876 ? I.getUnsignedPredicate()
3877 : I.getSignedPredicate();
3878 return new ICmpInst(Pred, BO0->getOperand(0),
3879 BO1->getOperand(0));
3882 if (CI->isMaxValue(true)) {
3883 ICmpInst::Predicate Pred = I.isSigned()
3884 ? I.getUnsignedPredicate()
3885 : I.getSignedPredicate();
3886 Pred = I.getSwappedPredicate(Pred);
3887 return new ICmpInst(Pred, BO0->getOperand(0),
3888 BO1->getOperand(0));
3892 case Instruction::Mul:
3893 if (!I.isEquality())
3896 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
3897 // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
3898 // Mask = -1 >> count-trailing-zeros(Cst).
3899 if (!CI->isZero() && !CI->isOne()) {
3900 const APInt &AP = CI->getValue();
3901 ConstantInt *Mask = ConstantInt::get(I.getContext(),
3902 APInt::getLowBitsSet(AP.getBitWidth(),
3904 AP.countTrailingZeros()));
3905 Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
3906 Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
3907 return new ICmpInst(I.getPredicate(), And1, And2);
3911 case Instruction::UDiv:
3912 case Instruction::LShr:
3916 case Instruction::SDiv:
3917 case Instruction::AShr:
3918 if (!BO0->isExact() || !BO1->isExact())
3920 return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
3921 BO1->getOperand(0));
3922 case Instruction::Shl: {
3923 bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
3924 bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
3927 if (!NSW && I.isSigned())
3929 return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
3930 BO1->getOperand(0));
3936 // Transform A & (L - 1) `ult` L --> L != 0
3937 auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
3939 m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
3941 if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
3942 auto *Zero = Constant::getNullValue(BO0->getType());
3943 return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
3949 // Transform (A & ~B) == 0 --> (A & B) != 0
3950 // and (A & ~B) != 0 --> (A & B) == 0
3951 // if A is a power of 2.
3952 if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
3953 match(Op1, m_Zero()) &&
3954 isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
3955 return new ICmpInst(I.getInversePredicate(),
3956 Builder->CreateAnd(A, B),
3959 // ~x < ~y --> y < x
3960 // ~x < cst --> ~cst < x
3961 if (match(Op0, m_Not(m_Value(A)))) {
3962 if (match(Op1, m_Not(m_Value(B))))
3963 return new ICmpInst(I.getPredicate(), B, A);
3964 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
3965 return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
3968 Instruction *AddI = nullptr;
3969 if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
3970 m_Instruction(AddI))) &&
3971 isa<IntegerType>(A->getType())) {
3974 if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
3976 ReplaceInstUsesWith(*AddI, Result);
3977 return ReplaceInstUsesWith(I, Overflow);
3981 // (zext a) * (zext b) --> llvm.umul.with.overflow.
3982 if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
3983 if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this))
3986 if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
3987 if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this))
3992 if (I.isEquality()) {
3993 Value *A, *B, *C, *D;
3995 if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
3996 if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
3997 Value *OtherVal = A == Op1 ? B : A;
3998 return new ICmpInst(I.getPredicate(), OtherVal,
3999 Constant::getNullValue(A->getType()));
4002 if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
4003 // A^c1 == C^c2 --> A == C^(c1^c2)
4004 ConstantInt *C1, *C2;
4005 if (match(B, m_ConstantInt(C1)) &&
4006 match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
4007 Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
4008 Value *Xor = Builder->CreateXor(C, NC);
4009 return new ICmpInst(I.getPredicate(), A, Xor);
4012 // A^B == A^D -> B == D
4013 if (A == C) return new ICmpInst(I.getPredicate(), B, D);
4014 if (A == D) return new ICmpInst(I.getPredicate(), B, C);
4015 if (B == C) return new ICmpInst(I.getPredicate(), A, D);
4016 if (B == D) return new ICmpInst(I.getPredicate(), A, C);
4020 if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
4021 (A == Op0 || B == Op0)) {
4022 // A == (A^B) -> B == 0
4023 Value *OtherVal = A == Op0 ? B : A;
4024 return new ICmpInst(I.getPredicate(), OtherVal,
4025 Constant::getNullValue(A->getType()));
4028 // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
4029 if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
4030 match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
4031 Value *X = nullptr, *Y = nullptr, *Z = nullptr;
4034 X = B; Y = D; Z = A;
4035 } else if (A == D) {
4036 X = B; Y = C; Z = A;
4037 } else if (B == C) {
4038 X = A; Y = D; Z = B;
4039 } else if (B == D) {
4040 X = A; Y = C; Z = B;
4043 if (X) { // Build (X^Y) & Z
4044 Op1 = Builder->CreateXor(X, Y);
4045 Op1 = Builder->CreateAnd(Op1, Z);
4046 I.setOperand(0, Op1);
4047 I.setOperand(1, Constant::getNullValue(Op1->getType()));
4052 // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
4053 // and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
4055 if ((Op0->hasOneUse() &&
4056 match(Op0, m_ZExt(m_Value(A))) &&
4057 match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
4058 (Op1->hasOneUse() &&
4059 match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
4060 match(Op1, m_ZExt(m_Value(A))))) {
4061 APInt Pow2 = Cst1->getValue() + 1;
4062 if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
4063 Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
4064 return new ICmpInst(I.getPredicate(), A,
4065 Builder->CreateTrunc(B, A->getType()));
4068 // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
4069 // For lshr and ashr pairs.
4070 if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
4071 match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
4072 (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
4073 match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
4074 unsigned TypeBits = Cst1->getBitWidth();
4075 unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
4076 if (ShAmt < TypeBits && ShAmt != 0) {
4077 ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
4078 ? ICmpInst::ICMP_UGE
4079 : ICmpInst::ICMP_ULT;
4080 Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
4081 APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
4082 return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
4086 // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
4087 if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
4088 match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
4089 unsigned TypeBits = Cst1->getBitWidth();
4090 unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
4091 if (ShAmt < TypeBits && ShAmt != 0) {
4092 Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
4093 APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
4094 Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
4095 I.getName() + ".mask");
4096 return new ICmpInst(I.getPredicate(), And,
4097 Constant::getNullValue(Cst1->getType()));
4101 // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
4102 // "icmp (and X, mask), cst"
4104 if (Op0->hasOneUse() &&
4105 match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
4106 m_ConstantInt(ShAmt))))) &&
4107 match(Op1, m_ConstantInt(Cst1)) &&
4108 // Only do this when A has multiple uses. This is most important to do
4109 // when it exposes other optimizations.
4111 unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
4113 if (ShAmt < ASize) {
4115 APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
4118 APInt CmpV = Cst1->getValue().zext(ASize);
4121 Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
4122 return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
4127 // The 'cmpxchg' instruction returns an aggregate containing the old value and
4128 // an i1 which indicates whether or not we successfully did the swap.
4130 // Replace comparisons between the old value and the expected value with the
4131 // indicator that 'cmpxchg' returns.
4133 // N.B. This transform is only valid when the 'cmpxchg' is not permitted to
4134 // spuriously fail. In those cases, the old value may equal the expected
4135 // value but it is possible for the swap to not occur.
4136 if (I.getPredicate() == ICmpInst::ICMP_EQ)
4137 if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
4138 if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
4139 if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
4141 return ExtractValueInst::Create(ACXI, 1);
4144 Value *X; ConstantInt *Cst;
4146 if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
4147 return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
4150 if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
4151 return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
4153 return Changed ? &I : nullptr;
4156 /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
4157 Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
4160 if (!isa<ConstantFP>(RHSC)) return nullptr;
4161 const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
4163 // Get the width of the mantissa. We don't want to hack on conversions that
4164 // might lose information from the integer, e.g. "i64 -> float"
4165 int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
4166 if (MantissaWidth == -1) return nullptr; // Unknown.
4168 IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
4170 bool LHSUnsigned = isa<UIToFPInst>(LHSI);
4172 if (I.isEquality()) {
4173 FCmpInst::Predicate P = I.getPredicate();
4174 bool IsExact = false;
4175 APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
4176 RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
4178 // If the floating point constant isn't an integer value, we know if we will
4179 // ever compare equal / not equal to it.
4181 // TODO: Can never be -0.0 and other non-representable values
4182 APFloat RHSRoundInt(RHS);
4183 RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
4184 if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
4185 if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
4186 return ReplaceInstUsesWith(I, Builder->getFalse());
4188 assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
4189 return ReplaceInstUsesWith(I, Builder->getTrue());
4193 // TODO: If the constant is exactly representable, is it always OK to do
4194 // equality compares as integer?
4197 // Check to see that the input is converted from an integer type that is small
4198 // enough that preserves all bits. TODO: check here for "known" sign bits.
4199 // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
4200 unsigned InputSize = IntTy->getScalarSizeInBits();
4202 // Following test does NOT adjust InputSize downwards for signed inputs,
4203 // because the most negative value still requires all the mantissa bits
4204 // to distinguish it from one less than that value.
4205 if ((int)InputSize > MantissaWidth) {
4206 // Conversion would lose accuracy. Check if loss can impact comparison.
4207 int Exp = ilogb(RHS);
4208 if (Exp == APFloat::IEK_Inf) {
4209 int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
4210 if (MaxExponent < (int)InputSize - !LHSUnsigned)
4211 // Conversion could create infinity.
4214 // Note that if RHS is zero or NaN, then Exp is negative
4215 // and first condition is trivially false.
4216 if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
4217 // Conversion could affect comparison.
4222 // Otherwise, we can potentially simplify the comparison. We know that it
4223 // will always come through as an integer value and we know the constant is
4224 // not a NAN (it would have been previously simplified).
4225 assert(!RHS.isNaN() && "NaN comparison not already folded!");
4227 ICmpInst::Predicate Pred;
4228 switch (I.getPredicate()) {
4229 default: llvm_unreachable("Unexpected predicate!");
4230 case FCmpInst::FCMP_UEQ:
4231 case FCmpInst::FCMP_OEQ:
4232 Pred = ICmpInst::ICMP_EQ;
4234 case FCmpInst::FCMP_UGT:
4235 case FCmpInst::FCMP_OGT:
4236 Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
4238 case FCmpInst::FCMP_UGE:
4239 case FCmpInst::FCMP_OGE:
4240 Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
4242 case FCmpInst::FCMP_ULT:
4243 case FCmpInst::FCMP_OLT:
4244 Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
4246 case FCmpInst::FCMP_ULE:
4247 case FCmpInst::FCMP_OLE:
4248 Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
4250 case FCmpInst::FCMP_UNE:
4251 case FCmpInst::FCMP_ONE:
4252 Pred = ICmpInst::ICMP_NE;
4254 case FCmpInst::FCMP_ORD:
4255 return ReplaceInstUsesWith(I, Builder->getTrue());
4256 case FCmpInst::FCMP_UNO:
4257 return ReplaceInstUsesWith(I, Builder->getFalse());
4260 // Now we know that the APFloat is a normal number, zero or inf.
4262 // See if the FP constant is too large for the integer. For example,
4263 // comparing an i8 to 300.0.
4264 unsigned IntWidth = IntTy->getScalarSizeInBits();
4267 // If the RHS value is > SignedMax, fold the comparison. This handles +INF
4268 // and large values.
4269 APFloat SMax(RHS.getSemantics());
4270 SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
4271 APFloat::rmNearestTiesToEven);
4272 if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
4273 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
4274 Pred == ICmpInst::ICMP_SLE)
4275 return ReplaceInstUsesWith(I, Builder->getTrue());
4276 return ReplaceInstUsesWith(I, Builder->getFalse());
4279 // If the RHS value is > UnsignedMax, fold the comparison. This handles
4280 // +INF and large values.
4281 APFloat UMax(RHS.getSemantics());
4282 UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
4283 APFloat::rmNearestTiesToEven);
4284 if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
4285 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
4286 Pred == ICmpInst::ICMP_ULE)
4287 return ReplaceInstUsesWith(I, Builder->getTrue());
4288 return ReplaceInstUsesWith(I, Builder->getFalse());
4293 // See if the RHS value is < SignedMin.
4294 APFloat SMin(RHS.getSemantics());
4295 SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
4296 APFloat::rmNearestTiesToEven);
4297 if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
4298 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
4299 Pred == ICmpInst::ICMP_SGE)
4300 return ReplaceInstUsesWith(I, Builder->getTrue());
4301 return ReplaceInstUsesWith(I, Builder->getFalse());
4304 // See if the RHS value is < UnsignedMin.
4305 APFloat SMin(RHS.getSemantics());
4306 SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
4307 APFloat::rmNearestTiesToEven);
4308 if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
4309 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
4310 Pred == ICmpInst::ICMP_UGE)
4311 return ReplaceInstUsesWith(I, Builder->getTrue());
4312 return ReplaceInstUsesWith(I, Builder->getFalse());
4316 // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
4317 // [0, UMAX], but it may still be fractional. See if it is fractional by
4318 // casting the FP value to the integer value and back, checking for equality.
4319 // Don't do this for zero, because -0.0 is not fractional.
4320 Constant *RHSInt = LHSUnsigned
4321 ? ConstantExpr::getFPToUI(RHSC, IntTy)
4322 : ConstantExpr::getFPToSI(RHSC, IntTy);
4323 if (!RHS.isZero()) {
4324 bool Equal = LHSUnsigned
4325 ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
4326 : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
4328 // If we had a comparison against a fractional value, we have to adjust
4329 // the compare predicate and sometimes the value. RHSC is rounded towards
4330 // zero at this point.
4332 default: llvm_unreachable("Unexpected integer comparison!");
4333 case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
4334 return ReplaceInstUsesWith(I, Builder->getTrue());
4335 case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
4336 return ReplaceInstUsesWith(I, Builder->getFalse());
4337 case ICmpInst::ICMP_ULE:
4338 // (float)int <= 4.4 --> int <= 4
4339 // (float)int <= -4.4 --> false
4340 if (RHS.isNegative())
4341 return ReplaceInstUsesWith(I, Builder->getFalse());
4343 case ICmpInst::ICMP_SLE:
4344 // (float)int <= 4.4 --> int <= 4
4345 // (float)int <= -4.4 --> int < -4
4346 if (RHS.isNegative())
4347 Pred = ICmpInst::ICMP_SLT;
4349 case ICmpInst::ICMP_ULT:
4350 // (float)int < -4.4 --> false
4351 // (float)int < 4.4 --> int <= 4
4352 if (RHS.isNegative())
4353 return ReplaceInstUsesWith(I, Builder->getFalse());
4354 Pred = ICmpInst::ICMP_ULE;
4356 case ICmpInst::ICMP_SLT:
4357 // (float)int < -4.4 --> int < -4
4358 // (float)int < 4.4 --> int <= 4
4359 if (!RHS.isNegative())
4360 Pred = ICmpInst::ICMP_SLE;
4362 case ICmpInst::ICMP_UGT:
4363 // (float)int > 4.4 --> int > 4
4364 // (float)int > -4.4 --> true
4365 if (RHS.isNegative())
4366 return ReplaceInstUsesWith(I, Builder->getTrue());
4368 case ICmpInst::ICMP_SGT:
4369 // (float)int > 4.4 --> int > 4
4370 // (float)int > -4.4 --> int >= -4
4371 if (RHS.isNegative())
4372 Pred = ICmpInst::ICMP_SGE;
4374 case ICmpInst::ICMP_UGE:
4375 // (float)int >= -4.4 --> true
4376 // (float)int >= 4.4 --> int > 4
4377 if (RHS.isNegative())
4378 return ReplaceInstUsesWith(I, Builder->getTrue());
4379 Pred = ICmpInst::ICMP_UGT;
4381 case ICmpInst::ICMP_SGE:
4382 // (float)int >= -4.4 --> int >= -4
4383 // (float)int >= 4.4 --> int > 4
4384 if (!RHS.isNegative())
4385 Pred = ICmpInst::ICMP_SGT;
4391 // Lower this FP comparison into an appropriate integer version of the
4393 return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
4396 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
4397 bool Changed = false;
4399 /// Orders the operands of the compare so that they are listed from most
4400 /// complex to least complex. This puts constants before unary operators,
4401 /// before binary operators.
4402 if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
4407 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4409 if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
4410 I.getFastMathFlags(), DL, TLI, DT, AC, &I))
4411 return ReplaceInstUsesWith(I, V);
4413 // Simplify 'fcmp pred X, X'
4415 switch (I.getPredicate()) {
4416 default: llvm_unreachable("Unknown predicate!");
4417 case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
4418 case FCmpInst::FCMP_ULT: // True if unordered or less than
4419 case FCmpInst::FCMP_UGT: // True if unordered or greater than
4420 case FCmpInst::FCMP_UNE: // True if unordered or not equal
4421 // Canonicalize these to be 'fcmp uno %X, 0.0'.
4422 I.setPredicate(FCmpInst::FCMP_UNO);
4423 I.setOperand(1, Constant::getNullValue(Op0->getType()));
4426 case FCmpInst::FCMP_ORD: // True if ordered (no nans)
4427 case FCmpInst::FCMP_OEQ: // True if ordered and equal
4428 case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
4429 case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
4430 // Canonicalize these to be 'fcmp ord %X, 0.0'.
4431 I.setPredicate(FCmpInst::FCMP_ORD);
4432 I.setOperand(1, Constant::getNullValue(Op0->getType()));
4437 // Test if the FCmpInst instruction is used exclusively by a select as
4438 // part of a minimum or maximum operation. If so, refrain from doing
4439 // any other folding. This helps out other analyses which understand
4440 // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
4441 // and CodeGen. And in this case, at least one of the comparison
4442 // operands has at least one user besides the compare (the select),
4443 // which would often largely negate the benefit of folding anyway.
4445 if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
4446 if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
4447 (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
4450 // Handle fcmp with constant RHS
4451 if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
4452 if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
4453 switch (LHSI->getOpcode()) {
4454 case Instruction::FPExt: {
4455 // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
4456 FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
4457 ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
4461 const fltSemantics *Sem;
4462 // FIXME: This shouldn't be here.
4463 if (LHSExt->getSrcTy()->isHalfTy())
4464 Sem = &APFloat::IEEEhalf;
4465 else if (LHSExt->getSrcTy()->isFloatTy())
4466 Sem = &APFloat::IEEEsingle;
4467 else if (LHSExt->getSrcTy()->isDoubleTy())
4468 Sem = &APFloat::IEEEdouble;
4469 else if (LHSExt->getSrcTy()->isFP128Ty())
4470 Sem = &APFloat::IEEEquad;
4471 else if (LHSExt->getSrcTy()->isX86_FP80Ty())
4472 Sem = &APFloat::x87DoubleExtended;
4473 else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
4474 Sem = &APFloat::PPCDoubleDouble;
4479 APFloat F = RHSF->getValueAPF();
4480 F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
4482 // Avoid lossy conversions and denormals. Zero is a special case
4483 // that's OK to convert.
4487 ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
4488 APFloat::cmpLessThan) || Fabs.isZero()))
4490 return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
4491 ConstantFP::get(RHSC->getContext(), F));
4494 case Instruction::PHI:
4495 // Only fold fcmp into the PHI if the phi and fcmp are in the same
4496 // block. If in the same block, we're encouraging jump threading. If
4497 // not, we are just pessimizing the code by making an i1 phi.
4498 if (LHSI->getParent() == I.getParent())
4499 if (Instruction *NV = FoldOpIntoPhi(I))
4502 case Instruction::SIToFP:
4503 case Instruction::UIToFP:
4504 if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
4507 case Instruction::FSub: {
4508 // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
4510 if (match(LHSI, m_FNeg(m_Value(Op))))
4511 return new FCmpInst(I.getSwappedPredicate(), Op,
4512 ConstantExpr::getFNeg(RHSC));
4515 case Instruction::Load:
4516 if (GetElementPtrInst *GEP =
4517 dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
4518 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
4519 if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
4520 !cast<LoadInst>(LHSI)->isVolatile())
4521 if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
4525 case Instruction::Call: {
4526 if (!RHSC->isNullValue())
4529 CallInst *CI = cast<CallInst>(LHSI);
4530 const Function *F = CI->getCalledFunction();
4534 // Various optimization for fabs compared with zero.
4536 if (F->getIntrinsicID() == Intrinsic::fabs ||
4537 (TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
4538 (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
4539 Func == LibFunc::fabsl))) {
4540 switch (I.getPredicate()) {
4543 // fabs(x) < 0 --> false
4544 case FCmpInst::FCMP_OLT:
4545 return ReplaceInstUsesWith(I, Builder->getFalse());
4546 // fabs(x) > 0 --> x != 0
4547 case FCmpInst::FCMP_OGT:
4548 return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
4549 // fabs(x) <= 0 --> x == 0
4550 case FCmpInst::FCMP_OLE:
4551 return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
4552 // fabs(x) >= 0 --> !isnan(x)
4553 case FCmpInst::FCMP_OGE:
4554 return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
4555 // fabs(x) == 0 --> x == 0
4556 // fabs(x) != 0 --> x != 0
4557 case FCmpInst::FCMP_OEQ:
4558 case FCmpInst::FCMP_UEQ:
4559 case FCmpInst::FCMP_ONE:
4560 case FCmpInst::FCMP_UNE:
4561 return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), RHSC);
4568 // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
4570 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
4571 return new FCmpInst(I.getSwappedPredicate(), X, Y);
4573 // fcmp (fpext x), (fpext y) -> fcmp x, y
4574 if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
4575 if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
4576 if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
4577 return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
4578 RHSExt->getOperand(0));
4580 return Changed ? &I : nullptr;