Reapply r237520 with another fix for infinite looping

author James Molloy <james.molloy@arm.com>

Sun, 17 May 2015 08:27:27 +0000 (08:27 +0000)

committer James Molloy <james.molloy@arm.com>

Sun, 17 May 2015 08:27:27 +0000 (08:27 +0000)
author James Molloy <james.molloy@arm.com>
Sun, 17 May 2015 08:27:27 +0000 (08:27 +0000)
committer James Molloy <james.molloy@arm.com>
Sun, 17 May 2015 08:27:27 +0000 (08:27 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 9c2bc34f678fd71f53b2a78b40ab0eba3cc24aad..48ab0eb2c1b9227abc39fd2b32c2c100d1e2ab43 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -435,6 +435,15 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    if (Instruction *Result = commonCastTransforms(CI))
      return Result;
  
+  // Test if the trunc is the user of a select which is part of a
+  // minimum or maximum operation. If so, don't do any more simplification.
+  // Even simplifying demanded bits can break the canonical form of a 
+  // min/max.
+  Value *LHS, *RHS;
+  if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
+    if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+      return nullptr;
+  
    // See if we can simplify any instructions used by the input whose sole
    // purpose is to compute bits we don't care about.
    if (SimplifyDemandedInstructionBits(CI))
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp

index 223bba03507cb221cae933205844c01274e2a79b..482768655bd0c8e9abc5b0b3d360f3d1460fb0d5 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3970,6 +3970,19 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
      }
    }
  
+  // Test if the FCmpInst instruction is used exclusively by a select as
+  // part of a minimum or maximum operation. If so, refrain from doing
+  // any other folding. This helps out other analyses which understand
+  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+  // and CodeGen. And in this case, at least one of the comparison
+  // operands has at least one user besides the compare (the select),
+  // which would often largely negate the benefit of folding anyway.
+  if (I.hasOneUse())
+    if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
+      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+        return nullptr;
+
    // Handle fcmp with constant RHS
    if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
      if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp

index b13d3edb9ad411ca550e5bdace296821bcd0fb51..d2fbcdd39915c8d0cba67be02aa9e66945becba7 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1154,18 +1154,30 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
        }
  
    // See if we can fold the select into one of our operands.
-  if (SI.getType()->isIntegerTy()) {
+  if (SI.getType()->isIntOrIntVectorTy()) {
      if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
        return FoldI;
  
      Value *LHS, *RHS, *LHS2, *RHS2;
-    SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS);
+    Instruction::CastOps CastOp;
+    SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
  
-    // MAX(MAX(a, b), a) -> MAX(a, b)
-    // MIN(MIN(a, b), a) -> MIN(a, b)
-    // MAX(MIN(a, b), a) -> a
-    // MIN(MAX(a, b), a) -> a
      if (SPF) {
+      // Canonicalize so that type casts are outside select patterns.
+      if (LHS->getType()->getPrimitiveSizeInBits() !=
+          SI.getType()->getPrimitiveSizeInBits()) {
+        CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+        Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+        Value *NewSI = Builder->CreateCast(CastOp,
+                                           Builder->CreateSelect(Cmp, LHS, RHS),
+                                           SI.getType());
+        return ReplaceInstUsesWith(SI, NewSI);
+      }
+
+      // MAX(MAX(a, b), a) -> MAX(a, b)
+      // MIN(MIN(a, b), a) -> MIN(a, b)
+      // MAX(MIN(a, b), a) -> a
+      // MIN(MAX(a, b), a) -> a
        if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
          if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                            SI, SPF, RHS))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

index 955776e0f85cf454259583e0e9829b70db89b1cd..80628b23f111dc8aed659d1a49a5acdbb69f365f 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -13,6 +13,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "InstCombineInternal.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/PatternMatch.h"
  
@@ -406,6 +407,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
      break;
    }
    case Instruction::Select:
+    // If this is a select as part of a min/max pattern, don't simplify any
+    // further in case we break the structure.
+    Value *LHS, *RHS;
+    if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+      return nullptr;
+      
      if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
                               RHSKnownOne, Depth + 1) ||
          SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp

index 53ab81dbef2dbed2a1a1ebaa47e8f2735dd6f47e..b0604534150e6bfb621b085db99969ea191d6e28 100644 (file)
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -714,6 +714,22 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
          return nullptr;
      }
  
+    // Test if a CmpInst instruction is used exclusively by a select as
+    // part of a minimum or maximum operation. If so, refrain from doing
+    // any other folding. This helps out other analyses which understand
+    // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+    // and CodeGen. And in this case, at least one of the comparison
+    // operands has at least one user besides the compare (the select),
+    // which would often largely negate the benefit of folding anyway.
+    if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+      if (CI->hasOneUse()) {
+        Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+        if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+            (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+          return nullptr;
+      }
+    }
+
      Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
      Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
  
@@ -723,7 +739,6 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
    return nullptr;
  }
  
-
  /// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
  /// has a PHI node as operand #0, see if we can fold the instruction into the
  /// PHI (which is only possible if all operands to the PHI are constants).
diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll

new file mode 100644 (file)

index 0000000..5372511
--- /dev/null
+++ b/test/Transforms/InstCombine/minmax-fold.ll
@@ -0,0 +1,99 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @t1
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: sext
+define i64 @t1(i32 %a) {
+  ; This is the canonical form for a type-changing min/max.
+  %1 = icmp slt i32 %a, 5
+  %2 = select i1 %1, i32 %a, i32 5
+  %3 = sext i32 %2 to i64
+  ret i64 %3
+}
+
+; CHECK-LABEL: @t2
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: sext
+define i64 @t2(i32 %a) {
+  ; Check this is converted into canonical form, as above.
+  %1 = icmp slt i32 %a, 5
+  %2 = sext i32 %a to i64
+  %3 = select i1 %1, i64 %2, i64 5
+  ret i64 %3
+}
+
+; CHECK-LABEL: @t3
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: zext
+define i64 @t3(i32 %a) {
+  ; Same as @t2, with flipped operands and zext instead of sext.
+  %1 = icmp ult i32 %a, 5
+  %2 = zext i32 %a to i64
+  %3 = select i1 %1, i64 5, i64 %2
+  ret i64 %3
+}
+
+; CHECK-LABEL: @t4
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: trunc
+define i32 @t4(i64 %a) {
+  ; Same again, with trunc.
+  %1 = icmp slt i64 %a, 5
+  %2 = trunc i64 %a to i32
+  %3 = select i1 %1, i32 %2, i32 5
+  ret i32 %3
+}
+
+; CHECK-LABEL: @t5
+; CHECK-NEXT: icmp
+; CHECK-NEXT: zext
+; CHECK-NEXT: select
+define i64 @t5(i32 %a) {
+  ; Same as @t3, but with mismatched signedness between icmp and zext.
+  ; InstCombine should leave this alone.
+  %1 = icmp slt i32 %a, 5
+  %2 = zext i32 %a to i64
+  %3 = select i1 %1, i64 5, i64 %2
+  ret i64 %3
+}
+
+; CHECK-LABEL: @t6
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: sitofp
+define float @t6(i32 %a) {
+  %1 = icmp slt i32 %a, 0
+  %2 = select i1 %1, i32 %a, i32 0
+  %3 = sitofp i32 %2 to float
+  ret float %3
+}
+
+; CHECK-LABEL: @t7
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: trunc
+define i16 @t7(i32 %a) {
+  %1 = icmp slt i32 %a, -32768
+  %2 = trunc i32 %a to i16
+  %3 = select i1 %1, i16 %2, i16 -32768
+  ret i16 %3
+}
+
+; Just check for no infinite loop. InstSimplify liked to
+; "simplify" -32767 by removing all the sign bits,
+; which led to a canonicalization fight between different
+; parts of instcombine.
+define i32 @t8(i64 %a, i32 %b) {
+  %1 = icmp slt i64 %a, -32767
+  %2 = select i1 %1, i64 %a, i64 -32767
+  %3 = trunc i64 %2 to i32
+  %4 = icmp slt i32 %b, 42
+  %5 = select i1 %4, i32 42, i32 %3
+  %6 = icmp ne i32 %5, %b
+  %7 = zext i1 %6 to i32
+  ret i32 %7
+}
author	James Molloy <james.molloy@arm.com>
	Sun, 17 May 2015 08:27:27 +0000 (08:27 +0000)
committer	James Molloy <james.molloy@arm.com>
	Sun, 17 May 2015 08:27:27 +0000 (08:27 +0000)
lib/Transforms/InstCombine/InstCombineCasts.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineSelect.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstructionCombining.cpp		patch \| blob \| history
test/Transforms/InstCombine/minmax-fold.ll	[new file with mode: 0644]	patch \| blob