From: Erik Eckstein Date: Wed, 17 Dec 2014 07:29:19 +0000 (+0000) Subject: Strength reduce intrinsics with overflow into regular arithmetic operations if possible. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=96bd465d6ce31fbdaa09cbc1205797de1fd3a529 Strength reduce intrinsics with overflow into regular arithmetic operations if possible. Some intrinsics, like s/uadd.with.overflow and umul.with.overflow, are already strength reduced. This change adds other arithmetic intrinsics: s/usub.with.overflow, smul.with.overflow. It completes the work on PR20194. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224417 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 755de87c53b..326bf8f726d 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -285,6 +285,7 @@ private: bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 902b640daca..37ae797fbec 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1008,6 +1008,51 @@ bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, return false; } +/// \brief Return true if we can prove that: +/// (mul LHS, RHS) === (mul nsw LHS, RHS) +bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, + Instruction *CxtI) { + if (IntegerType *IT = dyn_cast(LHS->getType())) { + + // Multiplying n * m significant bits yields a result of n + m significant + // bits. If the total number of significant bits does not exceed the + // result bit width (minus 1), there is no overflow. + // This means if we have enough leading sign bits in the operands + // we can guarantee that the result does not overflow. + // Ref: "Hacker's Delight" by Henry Warren + unsigned BitWidth = IT->getBitWidth(); + + // Note that underestimating the number of sign bits gives a more + // conservative answer. + unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) + + ComputeNumSignBits(RHS, 0, CxtI); + + // First handle the easy case: if we have enough sign bits there's + // definitely no overflow. + if (SignBits > BitWidth + 1) + return true; + + // There are two ambiguous cases where there can be no overflow: + // SignBits == BitWidth + 1 and + // SignBits == BitWidth + // The second case is difficult to check, therefore we only handle the + // first case. + if (SignBits == BitWidth + 1) { + // It overflows only when both arguments are negative and the true + // product is exactly the minimum negative number. + // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 + // For simplicity we just check if at least one side is not negative. + bool LHSNonNegative, LHSNegative; + bool RHSNonNegative, RHSNegative; + ComputeSignBit(LHS, LHSNonNegative, LHSNegative, DL, 0, AT, CxtI, DT); + ComputeSignBit(RHS, RHSNonNegative, RHSNegative, DL, 0, AT, CxtI, DT); + if (LHSNonNegative || RHSNonNegative) + return true; + } + } + return false; +} + // Checks if any operand is negative and we can convert add to sub. // This function checks for following negative patterns // ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 33af7ee0eff..b214b552df8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -427,6 +427,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); } } + if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { + if (WillNotOverflowSignedSub(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); + } + } else { + if (WillNotOverflowUnsignedSub(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); + } + } break; } case Intrinsic::umul_with_overflow: { @@ -477,6 +486,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { /*ReUseName*/false); } } + if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + if (WillNotOverflowSignedMul(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); + } + } break; case Intrinsic::minnum: case Intrinsic::maxnum: { diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index 9b58d9386f5..c745ea8a59b 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -1,10 +1,17 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s %overflow.result = type {i8, i1} +%ov.result.32 = type { i32, i1 } + -declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) -declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) -declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) +declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone +declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone +declare %ov.result.32 @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone declare double @llvm.powi.f64(double, i32) nounwind readonly declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone @@ -91,17 +98,92 @@ define i8 @uaddtest7(i8 %A, i8 %B) { } ; PR20194 -define { i32, i1 } @saddtest1(i8 %a, i8 %b) { +define %ov.result.32 @saddtest_nsw(i8 %a, i8 %b) { %A = sext i8 %a to i32 %B = sext i8 %b to i32 - %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B) - ret { i32, i1 } %x -; CHECK-LABEL: @saddtest1 + %x = call %ov.result.32 @llvm.sadd.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @saddtest_nsw ; CHECK: %x = add nsw i32 %A, %B -; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0 -; CHECK-NEXT: ret { i32, i1 } %1 +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 } +define %ov.result.32 @uaddtest_nuw(i32 %a, i32 %b) { + %A = and i32 %a, 2147483647 + %B = and i32 %b, 2147483647 + %x = call %ov.result.32 @llvm.uadd.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @uaddtest_nuw +; CHECK: %x = add nuw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @ssubtest_nsw(i8 %a, i8 %b) { + %A = sext i8 %a to i32 + %B = sext i8 %b to i32 + %x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @ssubtest_nsw +; CHECK: %x = sub nsw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @usubtest_nuw(i32 %a, i32 %b) { + %A = or i32 %a, 2147483648 + %B = and i32 %b, 2147483647 + %x = call %ov.result.32 @llvm.usub.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @usubtest_nuw +; CHECK: %x = sub nuw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) { + %A = and i32 %a, 4095 ; 0xfff + %B = and i32 %b, 524287; 0x7ffff + %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @smultest1_nsw +; CHECK: %x = mul nsw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @smultest2_nsw(i32 %a, i32 %b) { + %A = ashr i32 %a, 16 + %B = ashr i32 %b, 16 + %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @smultest2_nsw +; CHECK: %x = mul nsw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @smultest3_sw(i32 %a, i32 %b) { + %A = ashr i32 %a, 16 + %B = ashr i32 %b, 15 + %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @smultest3_sw +; CHECK: %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) +; CHECK-NEXT: ret %ov.result.32 %x +} + +define %ov.result.32 @umultest_nuw(i32 %a, i32 %b) { + %A = and i32 %a, 65535 ; 0xffff + %B = and i32 %b, 65535 ; 0xffff + %x = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @umultest_nuw +; CHECK: %x = mul nuw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} define i8 @umultest1(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A) @@ -125,9 +207,6 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) { ; CHECK-NEXT: ret i8 %A } -%ov.result.32 = type { i32, i1 } -declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone - define i32 @umultest3(i32 %n) nounwind { %shr = lshr i32 %n, 2 %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)