Reapply r155136 after fixing PR12599.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Mon, 23 Apr 2012 17:39:52 +0000 (17:39 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Mon, 23 Apr 2012 17:39:52 +0000 (17:39 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Mon, 23 Apr 2012 17:39:52 +0000 (17:39 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Mon, 23 Apr 2012 17:39:52 +0000 (17:39 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp

index b31049e59f18f745632f1a1368458b65ab6e35a2..4c14509cb6009e28ba58d581974215ae75a2168f 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -529,6 +529,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
      ShiftOp = 0;
    
    if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+
+    // This is a constant shift of a constant shift. Be careful about hiding
+    // shl instructions behind bit masks. They are used to represent multiplies
+    // by a constant, and it is important that simple arithmetic expressions
+    // are still recognizable by scalar evolution.
+    //
+    // The transforms applied to shl are very similar to the transforms applied
+    // to mul by constant. We can be more aggressive about optimizing right
+    // shifts.
+    //
+    // Combinations of right and left shifts will still be optimized in
+    // DAGCombine where scalar evolution no longer applies.
+
      ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
      uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
      uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
@@ -554,13 +567,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
      }
      
      if (ShiftAmt1 == ShiftAmt2) {
-      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
-      if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl) {
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X,
-                                         ConstantInt::get(I.getContext(),Mask));
-      }
        // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
        if (I.getOpcode() == Instruction::LShr &&
            ShiftOp->getOpcode() == Instruction::Shl) {
@@ -570,28 +576,23 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
        }
      } else if (ShiftAmt1 < ShiftAmt2) {
        uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
-      
-      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+
+      // (X >>?,exact C1) << C2 --> X << (C2-C1)
+      // The inexact version is deferred to DAGCombine so we don't hide shl
+      // behind a bit mask.
        if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl) {
+          ShiftOp->getOpcode() != Instruction::Shl &&
+          ShiftOp->isExact()) {
          assert(ShiftOp->getOpcode() == Instruction::LShr ||
                 ShiftOp->getOpcode() == Instruction::AShr);
          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        if (ShiftOp->isExact()) {
-          // (X >>?,exact C1) << C2 --> X << (C2-C1)
-          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
-                                                          X, ShiftDiffCst);
-          NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
-          NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
-          return NewShl;
-        }
-        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-        
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(I.getContext(),Mask));
+        BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+                                                        X, ShiftDiffCst);
+        NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+        NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+        return NewShl;
        }
-      
+
        // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
        if (I.getOpcode() == Instruction::LShr &&
            ShiftOp->getOpcode() == Instruction::Shl) {
@@ -627,24 +628,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
        assert(ShiftAmt2 < ShiftAmt1);
        uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
  
-      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+      // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+      // The inexact version is deferred to DAGCombine so we don't hide shl
+      // behind a bit mask.
        if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl) {
+          ShiftOp->getOpcode() != Instruction::Shl &&
+          ShiftOp->isExact()) {
          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        if (ShiftOp->isExact()) {
-          // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
-          BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
-                                                          X, ShiftDiffCst);
-          NewShr->setIsExact(true);
-          return NewShr;
-        }
-        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
-                                            X, ShiftDiffCst);
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(I.getContext(),Mask));
+        BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
+                                                        X, ShiftDiffCst);
+        NewShr->setIsExact(true);
+        return NewShr;
        }
-      
+
        // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
        if (I.getOpcode() == Instruction::LShr &&
            ShiftOp->getOpcode() == Instruction::Shl) {
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll

index 441d5f9b0b6445fc7c2f4ad13c943c3a2bda0927..eb28994756952b6be8c9e07afd447210fa89ebc9 100644 (file)
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -5,8 +5,8 @@
  define i32 @main(i32 %argc) nounwind ssp {
  entry:
    %tmp3151 = trunc i32 %argc to i8
-; CHECK: %tmp3162 = shl i8 %tmp3151, 5
-; CHECK: and i8 %tmp3162, 64
+; CHECK: %tmp3163 = shl i8 %tmp3162, 6
+; CHECK: and i8 %tmp3163, 64
  ; CHECK-NOT: shl
  ; CHECK-NOT: shr
    %tmp3161 = or i8 %tmp3151, -17
@@ -38,8 +38,8 @@ bb:
    %tmp10 = lshr i8 %tmp8, 7
    %tmp11 = shl i8 %tmp10, 5
  
-; CHECK: %0 = lshr i8 %tmp8, 2
-; CHECK: %tmp11 = and i8 %0, 32
+; CHECK: %tmp10 = lshr i8 %tmp8, 7
+; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5
  
    %tmp12 = xor i8 %tmp11, %tmp9
    ret i8 %tmp12
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll

index 0ea73a058c051e5853fdb5c55b499c551bd1fdb7..73f630ebfec62d3bea5aa9989924ed55f42e6684 100644 (file)
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -47,13 +47,21 @@ define i32 @test5a(i32 %A) {
  }
  
  ; CHECK: @test6
-; CHECK-NOT: sh
+; CHECK: mul i55 %A, 6
  define i55 @test6(i55 %A) {
         %B = shl i55 %A, 1              ; <i55> [#uses=1]
         %C = mul i55 %B, 3              ; <i55> [#uses=1]
         ret i55 %C
  }
  
+; CHECK: @test6a
+; CHECK: mul i55 %A, 6
+define i55 @test6a(i55 %A) {
+       %B = mul i55 %A, 3              ; <i55> [#uses=1]
+       %C = shl i55 %B, 1              ; <i55> [#uses=1]
+       ret i55 %C
+}
+
  ; CHECK: @test7
  ; CHECK-NOT: sh
  define i29 @test7(i8 %X) {
@@ -87,7 +95,8 @@ define i19 @test10(i19 %A) {
  }
  
  ; CHECK: @test11
-; CHECK-NOT: sh
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+; CHECK: shl
  define i23 @test11(i23 %A) {
         %a = mul i23 %A, 3              ; <i23> [#uses=1]
         %B = lshr i23 %a, 11            ; <i23> [#uses=1]
@@ -104,7 +113,8 @@ define i47 @test12(i47 %A) {
  }
  
  ; CHECK: @test13
-; CHECK-NOT: sh
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+; CHECK: shl
  define i18 @test13(i18 %A) {
         %a = mul i18 %A, 3              ; <i18> [#uses=1]
         %B = ashr i18 %a, 8             ; <i18> [#uses=1]
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll

index 19d5a0ae772d1d360db721554a68560050254a1e..7cf63e8ce1f1cb55f0d968d65cabb5a62ed03b36 100644 (file)
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -457,10 +457,12 @@ define i64 @test50(i64 %A) {
    %E = sext i32 %D to i64
    ret i64 %E
  ; CHECK: @test50
-; CHECK-NEXT: shl i64 %A, 30
+; lshr+shl will be handled by DAGCombine.
+; CHECK-NEXT: lshr i64 %A, 2
+; CHECK-NEXT: shl i64 %a, 32
  ; CHECK-NEXT: add i64 {{.*}}, -4294967296
-; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
-; CHECK-NEXT: ret i64 %sext
+; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
+; CHECK-NEXT: ret i64 %E
  }
  
  define i64 @test51(i64 %A, i1 %cond) {
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll

index 52310e34e09d9580f5775fd452b25605334b6719..25e708b7f51d9bccabf4e24ca561c1ec6cc92cbe 100644 (file)
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -65,8 +65,17 @@ define i32 @test6(i32 %A) {
  ; CHECK: @test6
  ; CHECK-NEXT: mul i32 %A, 6
  ; CHECK-NEXT: ret i32
-        %B = shl i32 %A, 1      ;; convert to an mul instruction 
-        %C = mul i32 %B, 3             
+        %B = shl i32 %A, 1      ;; convert to an mul instruction
+        %C = mul i32 %B, 3
+        ret i32 %C
+}
+
+define i32 @test6a(i32 %A) {
+; CHECK: @test6a
+; CHECK-NEXT: mul i32 %A, 6
+; CHECK-NEXT: ret i32
+        %B = mul i32 %A, 3
+        %C = shl i32 %B, 1      ;; convert to an mul instruction
          ret i32 %C
  }
  
@@ -97,7 +106,9 @@ define i8 @test9(i8 %A) {
          ret i8 %C
  }
  
+;; This transformation is deferred to DAGCombine:
  ;; (A >> 7) << 7 === A & 128
+;; The shl may be valuable to scalar evolution.
  define i8 @test10(i8 %A) {
  ; CHECK: @test10
  ; CHECK-NEXT: and i8 %A, -128
@@ -107,11 +118,21 @@ define i8 @test10(i8 %A) {
          ret i8 %C
  }
  
+;; Allow the simplification when the lshr shift is exact.
+define i8 @test10a(i8 %A) {
+; CHECK: @test10a
+; CHECK-NEXT: ret i8 %A
+        %B = lshr exact i8 %A, 7
+        %C = shl i8 %B, 7
+        ret i8 %C
+}
+
+;; This transformation is deferred to DAGCombine:
  ;; (A >> 3) << 4 === (A & 0x1F) << 1
+;; The shl may be valuable to scalar evolution.
  define i8 @test11(i8 %A) {
  ; CHECK: @test11
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: and i8
+; CHECK: shl i8
  ; CHECK-NEXT: ret i8
          %a = mul i8 %A, 3               ; <i8> [#uses=1]
          %B = lshr i8 %a, 3              ; <i8> [#uses=1]
@@ -119,6 +140,18 @@ define i8 @test11(i8 %A) {
          ret i8 %C
  }
  
+;; Allow the simplification in InstCombine when the lshr shift is exact.
+define i8 @test11a(i8 %A) {
+; CHECK: @test11a
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3
+        %B = lshr exact i8 %a, 3
+        %C = shl i8 %B, 4
+        ret i8 %C
+}
+
+;; This is deferred to DAGCombine unless %B is single-use.
  ;; (A >> 8) << 8 === A & -256
  define i32 @test12(i32 %A) {
  ; CHECK: @test12
@@ -129,11 +162,12 @@ define i32 @test12(i32 %A) {
          ret i32 %C
  }
  
+;; This transformation is deferred to DAGCombine:
  ;; (A >> 3) << 4 === (A & -8) * 2
+;; The shl may be valuable to scalar evolution.
  define i8 @test13(i8 %A) {
  ; CHECK: @test13
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: and i8
+; CHECK: shl i8
  ; CHECK-NEXT: ret i8
          %a = mul i8 %A, 3               ; <i8> [#uses=1]
          %B = ashr i8 %a, 3              ; <i8> [#uses=1]
@@ -141,6 +175,16 @@ define i8 @test13(i8 %A) {
          ret i8 %C
  }
  
+define i8 @test13a(i8 %A) {
+; CHECK: @test13a
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3
+        %B = ashr exact i8 %a, 3
+        %C = shl i8 %B, 4
+        ret i8 %C
+}
+
  ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
  define i32 @test14(i32 %A) {
  ; CHECK: @test14
@@ -477,10 +521,11 @@ entry:
    %tmp49 = lshr i8 %tmp48, 5
    %tmp50 = mul i8 %tmp49, 64
    %tmp51 = xor i8 %tmp50, %tmp5
-; CHECK: and i8 %0, 16
    %tmp52 = and i8 %tmp51, -128
    %tmp53 = lshr i8 %tmp52, 7
+; CHECK: lshr i8 %tmp51, 7
    %tmp54 = mul i8 %tmp53, 16
+; CHECK: shl nuw nsw i8 %tmp53, 4
    %tmp55 = xor i8 %tmp54, %tmp51
  ; CHECK: ret i8 %tmp551
    ret i8 %tmp55
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll

index 2d52ae50df4721ec609a5cc35c7755c67c8ca653..88ebca0a9c3d1b6066d7b6002e349b728efd048d 100644 (file)
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -22,3 +22,30 @@ define void @test1() nounwind ssp {
  ; CHECK: @test1
  ; CHECK-NEXT: ret void
  }
+
+; This function exposes a phase ordering problem when InstCombine is
+; turning %add into a bitmask, making it difficult to spot a 0 return value.
+;
+; It it also important that %add is expressed as a multiple of %div so scalar
+; evolution can recognize it.
+define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i32 %a, 4
+  %arrayidx = getelementptr inbounds i32* %p, i64 0
+  store i32 %div, i32* %arrayidx, align 4
+  %add = add i32 %div, %div
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 1
+  store i32 %add, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32* %p, i64 1
+  %0 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %p, i64 0
+  %1 = load i32* %arrayidx3, align 4
+  %mul = mul i32 2, %1
+  %sub = sub i32 %0, %mul
+  ret i32 %sub
+
+; CHECK: @test2
+; CHECK: %div = lshr i32 %a, 2
+; CHECK: %add = shl nuw nsw i32 %div, 1
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll

new file mode 100644 (file)

index 0000000..c731280
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/scev.ll
@@ -0,0 +1,64 @@
+; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+;
+; This file contains phase ordering tests for scalar evolution.
+; Test that the standard passes don't obfuscate the IR so scalar evolution can't
+; recognize expressions.
+
+; CHECK: test1
+; The loop body contains two increments by %div.
+; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
+; CHECK: -->  {%p,+,(2 * (%d /u 4) * sizeof(i32))}
+define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: test1a
+; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
+; CHECK: -->  {%p,+,(2 * (%d /u 2) * sizeof(i32))}
+define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Mon, 23 Apr 2012 17:39:52 +0000 (17:39 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Mon, 23 Apr 2012 17:39:52 +0000 (17:39 +0000)
lib/Transforms/InstCombine/InstCombineShifts.cpp		patch \| blob \| history
test/Transforms/InstCombine/2010-11-01-lshr-mask.ll		patch \| blob \| history
test/Transforms/InstCombine/apint-shift.ll		patch \| blob \| history
test/Transforms/InstCombine/cast.ll		patch \| blob \| history
test/Transforms/InstCombine/shift.ll		patch \| blob \| history
test/Transforms/PhaseOrdering/basic.ll		patch \| blob \| history
test/Transforms/PhaseOrdering/scev.ll	[new file with mode: 0644]	patch \| blob