[InstCombine][CodeGenPrep] Create llvm.uadd.with.overflow in CGP.

author Sanjoy Das <sanjoy@playingwithpointers.com>

Fri, 10 Apr 2015 21:07:09 +0000 (21:07 +0000)

committer Sanjoy Das <sanjoy@playingwithpointers.com>

Fri, 10 Apr 2015 21:07:09 +0000 (21:07 +0000)
author Sanjoy Das <sanjoy@playingwithpointers.com>
Fri, 10 Apr 2015 21:07:09 +0000 (21:07 +0000)
committer Sanjoy Das <sanjoy@playingwithpointers.com>
Fri, 10 Apr 2015 21:07:09 +0000 (21:07 +0000)
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h

index f94e10576893310c24fd967e59f0ba8cc9161b05..41154e6441a93b9c3c1b42e9959eb2f84bb4a538 100644 (file)
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -295,6 +295,9 @@ template <typename Class> struct bind_ty {
  /// \brief Match a value, capturing it if we match.
  inline bind_ty<Value> m_Value(Value *&V) { return V; }
  
+/// \brief Match an instruction, capturing it if we match.
+inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
  /// \brief Match a binary operator, capturing it if we match.
  inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
  
@@ -1103,6 +1106,52 @@ m_UnordFMax(const LHS &L, const RHS &R) {
    return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
  }
  
+//===----------------------------------------------------------------------===//
+// Matchers for overflow check patterns: e.g. (a + b) u< a
+//
+
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+struct UAddWithOverflow_match {
+  LHS_t L;
+  RHS_t R;
+  Sum_t S;
+
+  UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
+      : L(L), R(R), S(S) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    Value *ICmpLHS, *ICmpRHS;
+    ICmpInst::Predicate Pred;
+    if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
+      return false;
+
+    Value *AddLHS, *AddRHS;
+    auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
+
+    // (a + b) u< a, (a + b) u< b
+    if (Pred == ICmpInst::ICMP_ULT)
+      if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+
+    // a >u (a + b), b >u (a + b)
+    if (Pred == ICmpInst::ICMP_UGT)
+      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+
+    return false;
+  }
+};
+
+/// \brief Match an icmp instruction checking for unsigned overflow on addition.
+///
+/// S is matched to the addition whose result is being checked for overflow, and
+/// L and R are matched to the LHS and RHS of S.
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
+m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
+  return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
+}
+
  /// \brief Match an 'unordered' floating point minimum function.
  /// Floating point has one special value 'NaN'. Therefore, there is no total
  /// order. However, if we can ignore the 'NaN' value (for example, because of a
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp

index 8b48186a9c4ce63075b04e8e54471f6318046b99..d101f52b6eca5cf1618473b2b7ae39a853296fc7 100644 (file)
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
    return SinkCast(CI);
  }
  
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
  /// the number of virtual registers that must be created and coalesced.  This is
  /// a clear win except on targets with multiple condition code registers
  ///  (PowerPC), where it might lose; some adjustment may be wanted there.
  ///
  /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
    BasicBlock *DefBB = CI->getParent();
  
    /// InsertedCmp - Only insert a cmp in each block once.
@@ -802,6 +849,16 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
    return MadeChange;
  }
  
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
  /// isExtractBitsCandidateUse - Check if the candidates could
  /// be combined with shift instruction, which includes:
  /// 1. Truncate instruction
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp

index 17d2634d66fa0f73cc2cba80ba116ba95435c571..e084d3d8b03ba28f36b4f66a8e6ebeb9b10fe0fd 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2109,35 +2109,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
    return ExtractValueInst::Create(Call, 1, "sadd.overflow");
  }
  
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
-                                     InstCombiner &IC) {
-  // Don't bother doing this transformation for pointers, don't do it for
-  // vectors.
-  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
-
-  // If the add is a constant expr, then we don't bother transforming it.
-  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
-  if (!OrigAdd) return nullptr;
-
-  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
-
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(OrigAdd);
-
-  Module *M = I.getParent()->getParent()->getParent();
-  Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0);
-
-  IC.ReplaceInstUsesWith(*OrigAdd, Add);
-
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
-}
-
  bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
                                           Value *RHS, Instruction &OrigI,
                                           Value *&Result, Constant *&Overflow) {
@@ -3539,21 +3510,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
      }
  
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    Instruction *AddI = nullptr;
+    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+                                     m_Instruction(AddI))) &&
+        isa<IntegerType>(A->getType())) {
+      Value *Result;
+      Constant *Overflow;
+      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+                                Overflow)) {
+        ReplaceInstUsesWith(*AddI, Result);
+        return ReplaceInstUsesWith(I, Overflow);
+      }
+    }
  
      // (zext a) * (zext b)  --> llvm.umul.with.overflow.
      if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll

index 9c24be4289ff6f359c151878b667731be58cd427..44b587af3aaad1e3a5aa3da24d467d21c9558980 100644 (file)
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -4,43 +4,26 @@
  define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
  entry:
  ; CHECK-LABEL: test1:
-; CHECK: cmpl %ecx, %eax 
-; CHECK-NOT: addl
-; CHECK: adcl $0, %eax
-  %add4 = add i32 %x, %sum
-  %cmp = icmp ult i32 %add4, %x
-  %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %add4, %inc
-  ret i32 %z.0
-}
-
-; Instcombine transforms test1 into test2:
-; CHECK-LABEL: test2:
  ; CHECK: movl
  ; CHECK-NEXT: addl
  ; CHECK-NEXT: adcl $0
  ; CHECK-NEXT: ret
-define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
-entry:
-  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
-  %0 = extractvalue { i32, i1 } %uadd, 0
-  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
    %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %0, %inc
+  %z.0 = add i32 %add4, %inc
    ret i32 %z.0
  }
  
  ; <rdar://problem/12579915>
-define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
  entry:
    %cmp = icmp ugt i32 %x, %y
    %dec = sext i1 %cmp to i32
    %dec.res = add nsw i32 %dec, %res
    ret i32 %dec.res
-; CHECK-LABEL: test3:
+; CHECK-LABEL: test2:
  ; CHECK: cmpl
  ; CHECK: sbbl
  ; CHECK: ret
  }
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll

new file mode 100644 (file)

index 0000000..1e41319
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
@@ -0,0 +1,74 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK-LABEL: @test1(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test2(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: extractvalue
+; CHECK: extractvalue
+; CHECK: select
+define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
+
+; CHECK-LABEL: @test5(
+; CHECK-NOT: llvm.uadd.with.overflow
+; CHECK: next
+define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  store i64 %add, i64* %ptr
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll

index 3eddc80a704898d7c0065e4fe8078d2bfc13d3e6..4c0a8a2c0df7d7c49a305a09a8e874a710344154 100644 (file)
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -97,39 +97,6 @@ if.end:                                           ; preds = %entry
  ; CHECK: ret i8
  }
  
-; CHECK-LABEL: @test5(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test5(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %a
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test6(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test6(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %b
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test7(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test7(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ugt i64 %b, %add
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
  ; CHECK-LABEL: @test8(
  ; PR11438
  ; This is @test1, but the operands are not sign-extended.  Make sure
author	Sanjoy Das <sanjoy@playingwithpointers.com>
	Fri, 10 Apr 2015 21:07:09 +0000 (21:07 +0000)
committer	Sanjoy Das <sanjoy@playingwithpointers.com>
	Fri, 10 Apr 2015 21:07:09 +0000 (21:07 +0000)
include/llvm/IR/PatternMatch.h		patch \| blob \| history
lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
test/CodeGen/X86/add-of-carry.ll		patch \| blob \| history
test/Transforms/CodeGenPrepare/overflow-intrinsics.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/InstCombine/overflow.ll		patch \| blob \| history