Strength reduce intrinsics with overflow into regular arithmetic operations if possible.

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineCalls.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp

index 8a0d91dfe512cd16b9cfe4f8f38bd8e879681f02..b214b552df8a7b55d460960d60cbecdf94b83ee7 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -427,6 +427,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
          return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
        }
      }
+    if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
+      if (WillNotOverflowSignedSub(LHS, RHS, II)) {
+        return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
+      }
+    } else {
+      if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
+        return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
+      }
+    }
      break;
    }
    case Intrinsic::umul_with_overflow: {
@@ -477,6 +486,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                      /*ReUseName*/false);
        }
      }
+    if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
+      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+      if (WillNotOverflowSignedMul(LHS, RHS, II)) {
+        return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
+      }
+    }
      break;
    case Intrinsic::minnum:
    case Intrinsic::maxnum: {
@@ -733,7 +748,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // TODO: eventually we should lower this intrinsic to IR
      if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
        if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
-        if (CIWidth->equalsInt(64) && CIStart->isZero()) {
+        unsigned Index = CIStart->getZExtValue();
+        // From AMD documentation: "a value of zero in the field length is
+        // defined as length of 64".
+        unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
+
+        // From AMD documentation: "If the sum of the bit index + length field
+        // is greater than 64, the results are undefined".
+
+        // Note that both field index and field length are 8-bit quantities.
+        // Since variables 'Index' and 'Length' are unsigned values
+        // obtained from zero-extending field index and field length
+        // respectively, their sum should never wrap around.
+        if ((Index + Length) > 64)
+          return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+        if (Length == 64 && Index == 0) {
            Value *Vec = II->getArgOperand(1);
            Value *Undef = UndefValue::get(Vec->getType());
            const uint32_t Mask[] = { 0, 2 };